[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID 5cc367720223d06e5ec420da387218f46c9ed8e9 # Parent 5719550652a19bdfb593987113f802b34ce7d415 # Parent edefe24d78bd5e6e51bdd002f3ee7721d6dc2ce5 merge with xen-unstable.hg diff -r 5719550652a1 -r 5cc367720223 .hgtags --- a/.hgtags Tue Apr 11 13:55:47 2006 -0600 +++ b/.hgtags Tue Apr 11 18:54:18 2006 -0600 @@ -14,3 +14,4 @@ 3d330e41f41ce1bc118c02346e18949ad5d67f6b 3d330e41f41ce1bc118c02346e18949ad5d67f6b split-1.1 c8fdb0caa77b429cf47f9707926e83947778cb48 RELEASE-3.0.0 af0573e9e5258db0a9d28aa954dd302ddd2c2d23 3.0.2-rc +d0d3fef37685be264a7f52201f8ef44c030daad3 3.0.2-branched diff -r 5719550652a1 -r 5cc367720223 Config.mk --- a/Config.mk Tue Apr 11 13:55:47 2006 -0600 +++ b/Config.mk Tue Apr 11 18:54:18 2006 -0600 @@ -24,6 +24,7 @@ OBJDUMP = $(CROSS_COMPILE)objdump OBJDUMP = $(CROSS_COMPILE)objdump DISTDIR ?= $(XEN_ROOT)/dist +DESTDIR ?= / INSTALL = install INSTALL_DIR = $(INSTALL) -d -m0755 diff -r 5719550652a1 -r 5cc367720223 Makefile --- a/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -14,6 +14,7 @@ endif endif # Default target must appear before any include lines +.PHONY: all all: dist XEN_ROOT=$(CURDIR) @@ -24,12 +25,11 @@ export pae=y export pae=y endif -.PHONY: all dist install xen kernels tools dev-docs docs world clean -.PHONY: mkpatches mrproper kbuild kdelete kclean - # build and install everything into the standard system directories +.PHONY: install install: install-xen install-kernels install-tools install-docs +.PHONY: build build: kernels $(MAKE) -C xen build $(MAKE) -C tools build @@ -38,10 +38,12 @@ build: kernels # The test target is for unit tests that can run without an installation. Of # course, many tests require a machine running Xen itself, and these are # handled elsewhere. +.PHONY: test test: $(MAKE) -C tools/python test # build and install everything into local dist directory +.PHONY: dist dist: DESTDIR=$(DISTDIR)/install dist: dist-xen dist-kernels dist-tools dist-docs $(INSTALL_DIR) $(DISTDIR)/check @@ -54,79 +56,83 @@ dist-%: install-% @: # do nothing # Legacy dist targets +.PHONY: xen tools kernels docs xen: dist-xen tools: dist-tools kernels: dist-kernels docs: dist-docs +.PHONY: prep-kernels prep-kernels: for i in $(XKERNELS) ; do $(MAKE) $$i-prep || exit 1; done +.PHONY: install-xen install-xen: $(MAKE) -C xen install +.PHONY: install-tools install-tools: $(MAKE) -C tools install +.PHONY: install-kernels install-kernels: for i in $(XKERNELS) ; do $(MAKE) $$i-install || exit 1; done +.PHONY: install-docs install-docs: sh ./docs/check_pkgs && $(MAKE) -C docs install || true +.PHONY: dev-docs dev-docs: $(MAKE) -C docs dev-docs # Build all the various kernels and modules +.PHONY: kbuild kbuild: kernels # Delete the kernel build trees entirely +.PHONY: kdelete kdelete: for i in $(XKERNELS) ; do $(MAKE) $$i-delete ; done # Clean the kernel build trees +.PHONY: kclean kclean: for i in $(XKERNELS) ; do $(MAKE) $$i-clean ; done # Make patches from kernel sparse trees +.PHONY: mkpatches mkpatches: for i in $(ALLSPARSETREES) ; do $(MAKE) $$i-xen.patch; done # build xen, the tools, and a domain 0 plus unprivileged linux-xen images, # and place them in the install directory. 'make install' should then # copy them to the normal system directories +.PHONY: world world: $(MAKE) clean $(MAKE) kdelete $(MAKE) dist # clean doesn't do a kclean +.PHONY: clean clean:: $(MAKE) -C xen clean $(MAKE) -C tools clean $(MAKE) -C docs clean # clean, but blow away kernel build tree plus tarballs +.PHONY: distclean distclean: clean rm -rf dist patches/tmp for i in $(ALLKERNELS) ; do $(MAKE) $$i-delete ; done for i in $(ALLSPARSETREES) ; do $(MAKE) $$i-mrproper ; done # Linux name for GNU distclean +.PHONY: mrproper mrproper: distclean -install-logging: LOGGING=logging-0.4.9.2 -install-logging: - [ -f $(LOGGING).tar.gz ] || wget http://www.red-dove.com/$(LOGGING).tar.gz - tar -zxf $(LOGGING).tar.gz - cd $(LOGGING) && python setup.py install - -# handy target to upgrade iptables (use rpm or apt-get in preference) -install-iptables: - wget http://www.netfilter.org/files/iptables-1.2.11.tar.bz2 - tar -jxf iptables-1.2.11.tar.bz2 - $(MAKE) -C iptables-1.2.11 PREFIX= KERNEL_DIR=../linux-$(LINUX_VER)-xen0 install - +.PHONY: help help: @echo 'Installation targets:' @echo ' install - build and install everything' @@ -147,25 +153,28 @@ help: @echo ' dev-docs - build developer-only documentation' @echo '' @echo 'Cleaning targets:' - @echo ' clean - clean the Xen, tools and docs (but not' - @echo ' guest kernel) trees' - @echo ' distclean - clean plus delete kernel tarballs and kernel' - @echo ' build trees' + @echo ' clean - clean the Xen, tools and docs (but not guest kernel trees)' + @echo ' distclean - clean plus delete kernel build trees and' + @echo ' local downloaded files' @echo ' kdelete - delete guest kernel build trees' @echo ' kclean - clean guest kernel build trees' - @echo '' - @echo 'Dependency installation targets:' - @echo ' install-logging - install the Python Logging package' - @echo ' install-iptables - install iptables tools' @echo '' @echo 'Miscellaneous targets:' @echo ' prep-kernels - prepares kernel directories, does not build' @echo ' mkpatches - make patches against vanilla kernels from' @echo ' sparse trees' - @echo ' uninstall - attempt to remove installed Xen tools (use' - @echo ' with extreme care!)' + @echo ' uninstall - attempt to remove installed Xen tools' + @echo ' (use with extreme care!)' + @echo + @echo 'Environment:' + @echo ' XEN_PYTHON_NATIVE_INSTALL=y' + @echo ' - native python install or dist' + @echo ' install into prefix/lib/python<VERSION>' + @echo ' instead of <PREFIX>/lib/python' + @echo ' true if set to non-empty value, false otherwise' # Use this target with extreme care! +.PHONY: uninstall uninstall: D=$(DESTDIR) uninstall: [ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-`date +%s` || true @@ -199,5 +208,6 @@ uninstall: rm -rf $(D)/usr/share/man/man8/xen* # Legacy targets for compatibility +.PHONY: linux26 linux26: $(MAKE) 'KERNELS=linux-2.6*' kernels diff -r 5719550652a1 -r 5cc367720223 README --- a/README Tue Apr 11 13:55:47 2006 -0600 +++ b/README Tue Apr 11 18:54:18 2006 -0600 @@ -1,158 +1,176 @@ -################################ - __ __ _____ ___ - \ \/ /___ _ __ |___ / / _ \ - \ // _ \ '_ \ |_ \| | | | - / \ __/ | | | ___) | |_| | - /_/\_\___|_| |_| |____(_)___/ - -################################ - -http://www.xensource.com/xen/about.html - -What is Xen? -============ - -Xen is a Virtual Machine Monitor (VMM) originally developed by the -Systems Research Group of the University of Cambridge Computer -Laboratory, as part of the UK-EPSRC funded XenoServers project. Xen -is freely-distributable Open Source software, released under the GNU -GPL. Since its initial public release, Xen has grown a large -development community, spearheaded by XenSource Inc, a company created -by the original Xen development team to build enterprise products -around Xen. - -The 3.0 release offers excellent performance, hardware support and -enterprise-grade features such as x86_32-PAE, x86_64, SMP guests and -live relocation of VMs. This install tree contains source for a Linux -2.6 guest; ports to Linux 2.4, NetBSD, FreeBSD and Solaris will follow -later (and are already available for previous Xen releases). - -This file contains some quick-start instructions to install Xen on -your system. For full documentation, see the Xen User Manual. If this -is a pre-built release then you can find the manual at: - dist/install/usr/share/doc/xen/pdf/user.pdf -If you have a source release, then 'make -C docs' will build the -manual at docs/pdf/user.pdf. - -Quick-Start Guide - Pre-Built Binary Release -============================================ - -[NB. Unless noted otherwise, all the following steps should be -performed with root privileges.] - -1. Install the binary distribution onto your filesystem: - - # sh ./install.sh - - Among other things, this will install Xen and Xen-ready Linux - kernel files in /boot, kernel modules and Python packages in /lib, - and various control tools in standard 'bin' directories. - -2. Configure your bootloader to boot Xen and an initial Linux virtual - machine. Note that Xen currently only works with GRUB and pxelinux - derived boot loaders: less common alternatives such as LILO are - *not* supported. You can most likely find your GRUB menu file at - /boot/grub/menu.lst: edit this file to include an entry like the - following: - - title Xen 3.0 / XenLinux 2.6 - kernel /boot/xen-3.0.gz console=vga - module /boot/vmlinuz-2.6-xen root=<root-dev> ro console=tty0 - module /boot/initrd-2.6-xen.img - - NB: Not all kernel configs need an initial ram disk (initrd), but - if you do specify one you'll need to use the 'module' grub directive - rather than 'initrd'. - - The linux command line takes all the usual options, such as - root=<root-dev> to specify your usual root partition (e.g., - /dev/hda1). - - The Xen command line takes a number of optional arguments described - in the manual. The most common is 'dom0_mem=xxxM' which sets the - amount of memory to allocate for use by your initial virtual - machine (known as domain 0). Note that Xen itself reserves about - 32MB memory for internal use, which is not available for allocation - to virtual machines. - -3. Reboot your system and select the "Xen 3.0 / XenLinux 2.6" menu - option. After booting Xen, Linux will start and your initialisation - scripts should execute in the usual way. - -Quick-Start Guide - Source Release -================================== - -First, there are a number of prerequisites for building a Xen source -release. Make sure you have all the following installed, either by -visiting the project webpage or installing a pre-built package -provided by your Linux distributor: - * GCC (preferably v3.2.x or v3.3.x; older versions are unsupported) - * GNU Make - * GNU Binutils - * Development install of zlib (e.g., zlib-dev) - * Development install of Python v2.3 or later (e.g., python-dev) - * bridge-utils package (/sbin/brctl) - * iproute package (/sbin/ip) - * hotplug or udev - -[NB. Unless noted otherwise, all the following steps should be -performed with root privileges.] - -1. Download and untar the source tarball file. This will be a - file named xen-unstable-src.tgz, or xen-$version-src.tgz. - You can also pull the current version from the SCMS - that is being used (Bitkeeper, scheduled to change shortly). - - # tar xzf xen-unstable-src.tgz - - Assuming you are using the unstable tree, this will - untar into xen-unstable. The rest of the instructions - use the unstable tree as an example, substitute the - version for unstable. - -2. cd to xen-unstable (or whatever you sensibly rename it to). - The Linux, netbsd and freebsd kernel source trees are in - the $os-$version-xen-sparse directories. - -On Linux: - -3. For the very first build, or if you want to destroy existing - .configs and build trees, perform the following steps: - - # make KERNELS=linux-2.6-xen world - # make install - - It will create and install into the dist/ directory which is the - default install location. It will build the xen binary (xen.gz), - and a linux kernel and modules that can be used in both dom0 and an - unprivileged guest kernel (vmlinuz-2.6.x-xen), the tools and the - documentation. - - If you don't specify KERNELS= on the make command line it will - default to building two kernels, vmlinuz-2.6.x-xen0 and - vmlinuz-2.6.x-xenU. These are smaller builds with just selected - modules, intended primarilly for developers that don't like to wait - for a -xen kernel to build. The -xenU kernel is particularly small - as it does not contain any physical device drivers, and hence is - only useful for guest domains. - - If you want to build an x86_32 PAE capable xen and kernel to work - on machines with >= 4GB of memory, use XEN_TARGET_X86_PAE=y on the - make command line. - -4. To rebuild an existing tree without modifying the config: - # make dist - - This will build and install xen, kernels, tools, and - docs into the local dist/ directory. - -5. To rebuild a kernel with a modified config: - - # make linux-2.6-xen-config CONFIGMODE=menuconfig (or xconfig) - # make linux-2.6-xen-build - # make linux-2.6-xen-install - - Depending on your config, you may need to use 'mkinitrd' to create - an initial ram disk, just like a native system e.g. - # depmod 2.6.12.6-xen - # mkinitrd -v -f --with=aacraid --with=sd_mod --with=scsi_mod initrd-2.6.12.6-xen.img 2.6.12.6-xen +################################ + __ __ _____ ___ + \ \/ /___ _ __ |___ / / _ \ + \ // _ \ '_ \ |_ \| | | | + / \ __/ | | | ___) | |_| | + /_/\_\___|_| |_| |____(_)___/ + +################################ + +http://www.xensource.com/xen/about.html + +What is Xen? +============ + +Xen is a Virtual Machine Monitor (VMM) originally developed by the +Systems Research Group of the University of Cambridge Computer +Laboratory, as part of the UK-EPSRC funded XenoServers project. Xen +is freely-distributable Open Source software, released under the GNU +GPL. Since its initial public release, Xen has grown a large +development community, spearheaded by XenSource Inc, a company created +by the original Xen development team to build enterprise products +around Xen. + +The 3.0 release offers excellent performance, hardware support and +enterprise-grade features such as x86_32-PAE, x86_64, SMP guests and +live relocation of VMs. This install tree contains source for a Linux +2.6 guest; ports to Linux 2.4, NetBSD, FreeBSD and Solaris will follow +later (and are already available for previous Xen releases). + +This file contains some quick-start instructions to install Xen on +your system. For full documentation, see the Xen User Manual. If this +is a pre-built release then you can find the manual at: + dist/install/usr/share/doc/xen/pdf/user.pdf +If you have a source release, then 'make -C docs' will build the +manual at docs/pdf/user.pdf. + +Quick-Start Guide - Pre-Built Binary Release +============================================ + +[NB. Unless noted otherwise, all the following steps should be +performed with root privileges.] + +1. Install the binary distribution onto your filesystem: + + # sh ./install.sh + + Among other things, this will install Xen and Xen-ready Linux + kernel files in /boot, kernel modules and Python packages in /lib, + and various control tools in standard 'bin' directories. + +2. Configure your bootloader to boot Xen and an initial Linux virtual + machine. Note that Xen currently only works with GRUB and pxelinux + derived boot loaders: less common alternatives such as LILO are + *not* supported. You can most likely find your GRUB menu file at + /boot/grub/menu.lst: edit this file to include an entry like the + following: + + title Xen 3.0 / XenLinux 2.6 + kernel /boot/xen-3.0.gz console=vga + module /boot/vmlinuz-2.6-xen root=<root-dev> ro console=tty0 + module /boot/initrd-2.6-xen.img + + NB: Not all kernel configs need an initial ram disk (initrd), but + if you do specify one you'll need to use the 'module' grub directive + rather than 'initrd'. + + The linux command line takes all the usual options, such as + root=<root-dev> to specify your usual root partition (e.g., + /dev/hda1). + + The Xen command line takes a number of optional arguments described + in the manual. The most common is 'dom0_mem=xxxM' which sets the + amount of memory to allocate for use by your initial virtual + machine (known as domain 0). Note that Xen itself reserves about + 32MB memory for internal use, which is not available for allocation + to virtual machines. + +3. Reboot your system and select the "Xen 3.0 / XenLinux 2.6" menu + option. After booting Xen, Linux will start and your initialisation + scripts should execute in the usual way. + +Quick-Start Guide - Source Release +================================== + +First, there are a number of prerequisites for building a Xen source +release. Make sure you have all the following installed, either by +visiting the project webpage or installing a pre-built package +provided by your Linux distributor: + * GCC (preferably v3.2.x or v3.3.x; older versions are unsupported) + * GNU Make + * GNU Binutils + * Development install of zlib (e.g., zlib-dev) + * Development install of Python v2.3 or later (e.g., python-dev) + * bridge-utils package (/sbin/brctl) + * iproute package (/sbin/ip) + * hotplug or udev + +[NB. Unless noted otherwise, all the following steps should be +performed with root privileges.] + +1. Download and untar the source tarball file. This will be a + file named xen-unstable-src.tgz, or xen-$version-src.tgz. + You can also pull the current version from the SCMS + that is being used (Bitkeeper, scheduled to change shortly). + + # tar xzf xen-unstable-src.tgz + + Assuming you are using the unstable tree, this will + untar into xen-unstable. The rest of the instructions + use the unstable tree as an example, substitute the + version for unstable. + +2. cd to xen-unstable (or whatever you sensibly rename it to). + The Linux, netbsd and freebsd kernel source trees are in + the $os-$version-xen-sparse directories. + +On Linux: + +3. For the very first build, or if you want to destroy existing + .configs and build trees, perform the following steps: + + # make world + # make install + + This will create and install onto the local machine. It will build + the xen binary (xen.gz), and a linux kernel and modules that can be + used in both dom0 and an unprivileged guest kernel (vmlinuz-2.6.x-xen), + the tools and the documentation. + + You can override the destination for make install by setting DESTDIR + to some value. + + The make command line defaults to building the kernel vmlinuz-2.6.x-xen. + You can override this default by specifying KERNELS=kernelname. For + example, you can make two kernels - linux-2.6-xen0 + and linux-2.6-xenU - which are smaller builds containing only selected + modules, intended primarily for developers that don't like to wait + for a full -xen kernel to build. The -xenU kernel is particularly small, + as it does not contain any physical device drivers, and hence is + only useful for guest domains. + + To make these two kernels, simply specify + + KERNELS="linux-2.6-xen0 linux-2.6-xenU" + + in the make command line. + + If you want to build an x86_32 PAE capable xen and kernel to work + on machines with >= 4GB of memory, use XEN_TARGET_X86_PAE=y on the + make command line. + +4. To rebuild an existing tree without modifying the config: + # make dist + + This will build and install xen, kernels, tools, and + docs into the local dist/ directory. + + You can override the destination for make install by setting DISTDIR + to some value. + + make install and make dist differ in that make install does the + right things for your local machine (installing the appropriate + version of hotplug or udev scripts, for example), but make dist + includes all versions of those scripts, so that you can copy the dist + directory to another machine and install from that distribution. + +5. To rebuild a kernel with a modified config: + + # make linux-2.6-xen-config CONFIGMODE=menuconfig (or xconfig) + # make linux-2.6-xen-build + # make linux-2.6-xen-install + + Depending on your config, you may need to use 'mkinitrd' to create + an initial ram disk, just like a native system e.g. + # depmod 2.6.16-xen + # mkinitrd -v -f --with=aacraid --with=sd_mod --with=scsi_mod initrd-2.6.16-xen.img 2.6.16-xen diff -r 5719550652a1 -r 5cc367720223 buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/Rules.mk Tue Apr 11 18:54:18 2006 -0600 @@ -5,8 +5,6 @@ export DESTDIR ALLKERNELS = $(patsubst buildconfigs/mk.%,%,$(wildcard buildconfigs/mk.*)) ALLSPARSETREES = $(patsubst %-xen-sparse,%,$(wildcard *-xen-sparse)) - -.PHONY: mkpatches mrproper # Setup pristine search path PRISTINE_SRC_PATH ?= .:.. @@ -40,29 +38,6 @@ patch-%.bz2: @echo "Cannot find $(@F) in path $(LINUX_SRC_PATH)" wget $(KERNEL_REPO)/pub/linux/kernel/v$(_LINUX_VDIR)/$(_LINUX_XDIR)/$(@F) -O./$@ -# Expand NetBSD release to NetBSD version -NETBSD_RELEASE ?= 2.0 -NETBSD_VER ?= $(patsubst netbsd-%-xen-sparse,%,$(wildcard netbsd-$(NETBSD_RELEASE)*-xen-sparse)) -NETBSD_CVSSNAP ?= 20050309 - -# Setup NetBSD search path -NETBSD_SRC_PATH ?= .:.. -vpath netbsd-%.tar.bz2 $(NETBSD_SRC_PATH) - -# download a pristine NetBSD tarball if there isn't one in NETBSD_SRC_PATH -netbsd-%-xen-kernel-$(NETBSD_CVSSNAP).tar.bz2: - @echo "Cannot find $@ in path $(NETBSD_SRC_PATH)" - wget http://www.cl.cam.ac.uk/Research/SRG/netos/xen/downloads/$@ -O./$@ - -netbsd-%.tar.bz2: netbsd-%-xen-kernel-$(NETBSD_CVSSNAP).tar.bz2 - ln -fs $< $@ - -ifeq ($(OS),linux) -OS_VER = $(LINUX_VER) -else -OS_VER = $(NETBSD_VER) -endif - pristine-%: pristine-%/.valid-pristine @true @@ -84,6 +59,7 @@ ifneq ($(PATCHDIRS),) $(patsubst patches/%,patches/%/.makedep,$(PATCHDIRS)): patches/%/.makedep: @echo 'ref-$*/.valid-ref: $$(wildcard patches/$*/*.patch)' >$@ +.PHONY: clean clean:: rm -f patches/*/.makedep @@ -124,27 +100,21 @@ linux-2.6-xen.patch: ref-linux-$(LINUX_V rm -rf tmp-$@ cp -al $(<D) tmp-$@ ( cd linux-2.6-xen-sparse && ./mkbuildtree ../tmp-$@ ) - diff -Nurp $(<D) tmp-$@ > $@ || true + diff -Nurp $(patsubst ref%,pristine%,$(<D)) tmp-$@ > $@ || true rm -rf tmp-$@ %-xen.patch: ref-%/.valid-ref rm -rf tmp-$@ cp -al $(<D) tmp-$@ ( cd $*-xen-sparse && ./mkbuildtree ../tmp-$@ ) - diff -Nurp $(<D) tmp-$@ > $@ || true + diff -Nurp $(patsubst ref%,pristine%,$(<D)) tmp-$@ > $@ || true rm -rf tmp-$@ -%-mrproper: %-mrproper-extra +%-mrproper: rm -rf pristine-$(*)* ref-$(*)* $*.tar.bz2 rm -rf $*-xen.patch -netbsd-%-mrproper-extra: - rm -rf netbsd-$*-tools netbsd-$*-tools.tar.bz2 - rm -f netbsd-$*-xen-kernel-$(NETBSD_CVSSNAP).tar.bz2 - -%-mrproper-extra: - @: # do nothing - +.PHONY: config-update-pae config-update-pae: ifeq ($(XEN_TARGET_X86_PAE),y) sed -e 's!^CONFIG_HIGHMEM4G=y$$!\# CONFIG_HIGHMEM4G is not set!;s!^\# CONFIG_HIGHMEM64G is not set$$!CONFIG_HIGHMEM64G=y!' $(CONFIG_FILE) > $(CONFIG_FILE)- && mv $(CONFIG_FILE)- $(CONFIG_FILE) diff -r 5719550652a1 -r 5cc367720223 buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Tue Apr 11 18:54:18 2006 -0600 @@ -95,6 +95,7 @@ CONFIG_XEN_PRIVILEGED_GUEST=y CONFIG_XEN_PRIVILEGED_GUEST=y CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_BACKEND=y CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_SYSFS=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y diff -r 5719550652a1 -r 5cc367720223 buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Tue Apr 11 18:54:18 2006 -0600 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc3-xen0 -# Thu Feb 16 22:52:42 2006 +# Linux kernel version: 2.6.16-xen0 +# Sat Apr 8 11:34:07 2006 # CONFIG_X86_32=y CONFIG_SEMAPHORE_SLEEPERS=y @@ -208,7 +208,6 @@ CONFIG_ACPI_EC=y CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y -# CONFIG_X86_PM_TIMER is not set # CONFIG_ACPI_CONTAINER is not set # @@ -392,7 +391,13 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y # # Plug and Play support # -# CONFIG_PNP is not set +CONFIG_PNP=y +CONFIG_PNP_DEBUG=y + +# +# Protocols +# +CONFIG_PNPACPI=y # # Block devices @@ -440,6 +445,7 @@ CONFIG_BLK_DEV_IDECD=y # CONFIG_IDE_GENERIC=y # CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_IDEPNP is not set CONFIG_BLK_DEV_IDEPCI=y # CONFIG_IDEPCI_SHARE_IRQ is not set # CONFIG_BLK_DEV_OFFBOARD is not set @@ -623,6 +629,7 @@ CONFIG_NETDEVICES=y # CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set CONFIG_TUN=y +# CONFIG_NET_SB1000 is not set # # ARCnet devices @@ -1064,11 +1071,7 @@ CONFIG_USB_MON=y # CONFIG_INFINIBAND is not set # -# SN Devices -# - -# -# EDAC - error detection and reporting (RAS) +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # CONFIG_EDAC is not set @@ -1231,6 +1234,7 @@ CONFIG_NLS_ISO8859_1=y # # Instrumentation Support # +# CONFIG_PROFILING is not set # CONFIG_KPROBES is not set # @@ -1305,13 +1309,14 @@ CONFIG_CRYPTO_CRC32C=m # # CONFIG_CRYPTO_DEV_PADLOCK is not set CONFIG_XEN=y -CONFIG_NO_IDLE_HZ=y +CONFIG_XEN_INTERFACE_VERSION=0x00030101 # # XEN # CONFIG_XEN_PRIVILEGED_GUEST=y # CONFIG_XEN_UNPRIVILEGED_GUEST is not set +CONFIG_XEN_BACKEND=y CONFIG_XEN_PCIDEV_BACKEND=y # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y @@ -1331,6 +1336,7 @@ CONFIG_XEN_SYSFS=y CONFIG_XEN_SYSFS=y CONFIG_HAVE_ARCH_ALLOC_SKB=y CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y +CONFIG_NO_IDLE_HZ=y # # Library routines @@ -1343,4 +1349,6 @@ CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_NO_TSS=y +CONFIG_X86_NO_IDT=y CONFIG_KTIME_SCALAR=y diff -r 5719550652a1 -r 5cc367720223 buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Tue Apr 11 18:54:18 2006 -0600 @@ -327,7 +327,13 @@ CONFIG_STANDALONE=y # # Plug and Play support # -# CONFIG_PNP is not set +CONFIG_PNP=y +CONFIG_PNP_DEBUG=y + +# +# Protocols +# +CONFIG_PNPACPI=y # # Block devices @@ -375,6 +381,7 @@ CONFIG_BLK_DEV_IDECD=y # CONFIG_IDE_GENERIC=y # CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_IDEPNP is not set CONFIG_BLK_DEV_IDEPCI=y # CONFIG_IDEPCI_SHARE_IRQ is not set # CONFIG_BLK_DEV_OFFBOARD is not set @@ -559,6 +566,7 @@ CONFIG_NETDEVICES=y # CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set CONFIG_TUN=y +# CONFIG_NET_SB1000 is not set # # ARCnet devices @@ -1183,6 +1191,7 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_FS is not set # CONFIG_DEBUG_VM is not set CONFIG_FRAME_POINTER=y @@ -1237,6 +1246,7 @@ CONFIG_NO_IDLE_HZ=y # CONFIG_XEN_PRIVILEGED_GUEST=y # CONFIG_XEN_UNPRIVILEGED_GUEST is not set +CONFIG_XEN_BACKEND=y CONFIG_XEN_PCIDEV_BACKEND=y # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y diff -r 5719550652a1 -r 5cc367720223 buildconfigs/linux-defconfig_xenU_ia64 --- a/buildconfigs/linux-defconfig_xenU_ia64 Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/linux-defconfig_xenU_ia64 Tue Apr 11 18:54:18 2006 -0600 @@ -92,6 +92,7 @@ CONFIG_XEN_PRIVILEGED_GUEST=y CONFIG_XEN_PRIVILEGED_GUEST=y CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_BACKEND=y CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_SYSFS=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y diff -r 5719550652a1 -r 5cc367720223 buildconfigs/linux-defconfig_xenU_x86_32 --- a/buildconfigs/linux-defconfig_xenU_x86_32 Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/linux-defconfig_xenU_x86_32 Tue Apr 11 18:54:18 2006 -0600 @@ -779,6 +779,7 @@ CONFIG_NLS_ISO8859_1=y # # Instrumentation Support # +# CONFIG_PROFILING is not set # CONFIG_KPROBES is not set # @@ -858,13 +859,9 @@ CONFIG_NO_IDLE_HZ=y # # CONFIG_XEN_PRIVILEGED_GUEST is not set CONFIG_XEN_UNPRIVILEGED_GUEST=y -# CONFIG_XEN_PCIDEV_BACKEND is not set -# CONFIG_XEN_BLKDEV_BACKEND is not set -# CONFIG_XEN_NETDEV_BACKEND is not set -# CONFIG_XEN_TPMDEV_BACKEND is not set +# CONFIG_XEN_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y -# CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_TPMDEV_FRONTEND is not set CONFIG_XEN_SCRUB_PAGES=y CONFIG_XEN_DISABLE_SERIAL=y diff -r 5719550652a1 -r 5cc367720223 buildconfigs/linux-defconfig_xenU_x86_64 --- a/buildconfigs/linux-defconfig_xenU_x86_64 Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/linux-defconfig_xenU_x86_64 Tue Apr 11 18:54:18 2006 -0600 @@ -1080,6 +1080,7 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_FS is not set # CONFIG_DEBUG_VM is not set CONFIG_FRAME_POINTER=y @@ -1134,13 +1135,9 @@ CONFIG_NO_IDLE_HZ=y # # CONFIG_XEN_PRIVILEGED_GUEST is not set CONFIG_XEN_UNPRIVILEGED_GUEST=y -# CONFIG_XEN_PCIDEV_BACKEND is not set -# CONFIG_XEN_BLKDEV_BACKEND is not set -# CONFIG_XEN_NETDEV_BACKEND is not set -# CONFIG_XEN_TPMDEV_BACKEND is not set +# CONFIG_XEN_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y -# CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_TPMDEV_FRONTEND is not set CONFIG_XEN_SCRUB_PAGES=y CONFIG_XEN_DISABLE_SERIAL=y diff -r 5719550652a1 -r 5cc367720223 buildconfigs/linux-defconfig_xen_ia64 --- a/buildconfigs/linux-defconfig_xen_ia64 Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/linux-defconfig_xen_ia64 Tue Apr 11 18:54:18 2006 -0600 @@ -95,6 +95,7 @@ CONFIG_XEN_PRIVILEGED_GUEST=y CONFIG_XEN_PRIVILEGED_GUEST=y CONFIG_XEN_BLKDEV_GRANT=y CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_BACKEND=y CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_SYSFS=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y diff -r 5719550652a1 -r 5cc367720223 buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Tue Apr 11 18:54:18 2006 -0600 @@ -292,11 +292,11 @@ CONFIG_NET=y # Networking options # # CONFIG_NETDEBUG is not set -CONFIG_PACKET=m +CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -CONFIG_UNIX=m +CONFIG_UNIX=y CONFIG_XFRM=y -CONFIG_XFRM_USER=m +CONFIG_XFRM_USER=y CONFIG_NET_KEY=m CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -912,12 +912,12 @@ CONFIG_PARPORT_1284=y # Plug and Play support # CONFIG_PNP=y -# CONFIG_PNP_DEBUG is not set +CONFIG_PNP_DEBUG=y # # Protocols # -# CONFIG_PNPACPI is not set +CONFIG_PNPACPI=y # # Block devices @@ -2892,6 +2892,7 @@ CONFIG_NLS_UTF8=m # # Instrumentation Support # +# CONFIG_PROFILING is not set # CONFIG_KPROBES is not set # @@ -2978,6 +2979,7 @@ CONFIG_NO_IDLE_HZ=y # CONFIG_XEN_PRIVILEGED_GUEST=y # CONFIG_XEN_UNPRIVILEGED_GUEST is not set +CONFIG_XEN_BACKEND=y CONFIG_XEN_PCIDEV_BACKEND=y # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y diff -r 5719550652a1 -r 5cc367720223 buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Tue Apr 11 18:54:18 2006 -0600 @@ -776,7 +776,13 @@ CONFIG_PARPORT_1284=y # # Plug and Play support # -# CONFIG_PNP is not set +CONFIG_PNP=y +CONFIG_PNP_DEBUG=y + +# +# Protocols +# +CONFIG_PNPACPI=y # # Block devices @@ -857,6 +863,7 @@ CONFIG_IDE_GENERIC=y CONFIG_IDE_GENERIC=y CONFIG_BLK_DEV_CMD640=y CONFIG_BLK_DEV_CMD640_ENHANCED=y +CONFIG_BLK_DEV_IDEPNP=y CONFIG_BLK_DEV_IDEPCI=y CONFIG_IDEPCI_SHARE_IRQ=y # CONFIG_BLK_DEV_OFFBOARD is not set @@ -1088,6 +1095,7 @@ CONFIG_BONDING=m CONFIG_BONDING=m CONFIG_EQUALIZER=m CONFIG_TUN=m +CONFIG_NET_SB1000=m # # ARCnet devices @@ -2587,6 +2595,7 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_FS is not set # CONFIG_DEBUG_VM is not set # CONFIG_FRAME_POINTER is not set @@ -2647,6 +2656,7 @@ CONFIG_NO_IDLE_HZ=y # CONFIG_XEN_PRIVILEGED_GUEST=y # CONFIG_XEN_UNPRIVILEGED_GUEST is not set +CONFIG_XEN_BACKEND=y CONFIG_XEN_PCIDEV_BACKEND=y # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y diff -r 5719550652a1 -r 5cc367720223 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Tue Apr 11 13:55:47 2006 -0600 +++ b/buildconfigs/mk.linux-2.6-xen Tue Apr 11 18:54:18 2006 -0600 @@ -1,19 +1,15 @@ - -OS = linux - LINUX_SERIES = 2.6 LINUX_VER = 2.6.16 LINUX_SRCS = linux-2.6.16.tar.bz2 EXTRAVERSION ?= xen -LINUX_DIR = $(OS)-$(LINUX_VER)-$(EXTRAVERSION) +LINUX_DIR = linux-$(LINUX_VER)-$(EXTRAVERSION) include buildconfigs/Rules.mk -.PHONY: build clean delete - # The real action starts here! +.PHONY: build build: $(LINUX_DIR)/include/linux/autoconf.h if grep "^CONFIG_MODULES=" $(LINUX_DIR)/.config ; then \ $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) modules ; \ @@ -22,7 +18,7 @@ build: $(LINUX_DIR)/include/linux/autoco $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_PATH=$(DESTDIR) vmlinuz $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_PATH=$(DESTDIR) install -$(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref +$(LINUX_DIR)/include/linux/autoconf.h: ref-linux-$(LINUX_VER)/.valid-ref rm -rf $(LINUX_DIR) cp -al $(<D) $(LINUX_DIR) # Apply arch-xen patches @@ -42,14 +38,18 @@ build: $(LINUX_DIR)/include/linux/autoco rm -f Makefile ; mv Mk.tmp Makefile ) $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) oldconfig +.PHONY: prep prep: $(LINUX_DIR)/include/linux/autoconf.h +.PHONY: config config: CONFIGMODE = menuconfig config: $(LINUX_DIR)/include/linux/autoconf.h $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) $(CONFIGMODE) +.PHONY: clean clean:: $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) clean +.PHONY: delete delete: - rm -rf tmp-$(OS)-$(LINUX_VER) $(LINUX_DIR) + rm -rf tmp-linux-$(LINUX_VER) $(LINUX_DIR) diff -r 5719550652a1 -r 5cc367720223 docs/Makefile --- a/docs/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/docs/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -26,22 +26,28 @@ DOC_HTML := $(patsubst src/%.tex,html/%/ GFX = $(patsubst %.fig, %.eps, $(wildcard figs/*.fig)) -.PHONY: all build dev-docs python-dev-docs ps pdf html clean install +.PHONY: all +all: build -all: build +.PHONY: build build: ps pdf html man-pages rm -f *.aux *.dvi *.bbl *.blg *.glo *.idx *.ilg *.log *.ind *.toc +.PHONY: dev-docs dev-docs: python-dev-docs +.PHONY: ps ps: $(DOC_PS) +.PHONY: pdf pdf: $(DOC_PDF) +.PHONY: html html: @if which $(LATEX2HTML) 1>/dev/null 2>/dev/null; then \ $(MAKE) $(DOC_HTML); fi +.PHONY: python-dev-docs python-dev-docs: @mkdir -v -p api/tools/python @if which $(DOXYGEN) 1>/dev/null 2>/dev/null; then \ @@ -50,6 +56,7 @@ python-dev-docs: $(MAKE) -C api/tools/python/latex ; else \ echo "Doxygen not installed; skipping python-dev-docs."; fi +.PHONY: man-pages man-pages: @if which $(POD2MAN) 1>/dev/null 2>/dev/null; then \ $(MAKE) $(DOC_MAN1) $(DOC_MAN5); fi @@ -64,6 +71,7 @@ man5/%.5: man/%.pod.5 Makefile $(POD2MAN) --release=$(VERSION) --name=`echo $@ | sed 's/^man5.//'| \ sed 's/.5//'` -s 5 -c "Xen" $< $@ +.PHONY: clean clean: rm -rf .word_count *.aux *.dvi *.bbl *.blg *.glo *.idx *~ rm -rf *.ilg *.log *.ind *.toc *.bak core @@ -72,6 +80,7 @@ clean: rm -rf man5 rm -rf man1 +.PHONY: install install: all rm -rf $(DESTDIR)$(pkgdocdir) $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir) diff -r 5719550652a1 -r 5cc367720223 docs/man/xm.pod.1 --- a/docs/man/xm.pod.1 Tue Apr 11 13:55:47 2006 -0600 +++ b/docs/man/xm.pod.1 Tue Apr 11 18:54:18 2006 -0600 @@ -360,7 +360,7 @@ Moves a domain out of the paused state. Moves a domain out of the paused state. This will allow a previously paused domain to now be eligible for scheduling by the Xen hypervisor. -=item B<set-vcpus> I<domain-id> I<vcpu-count> +=item B<vcpu-set> I<domain-id> I<vcpu-count> Enables the I<vcpu-count> virtual CPUs for the domain in question. Like mem-set, this command can only allocate up to the maximum virtual @@ -370,8 +370,8 @@ VCPUs, the highest number VCPUs will be VCPUs, the highest number VCPUs will be hotplug removed. This may be important for pinning purposes. -Attempting to set-vcpus to a number larger than the initially -configured VCPU count is an error. Trying to set-vcpus to < 1 will be +Attempting to set the VCPUs to a number larger than the initially +configured VCPU count is an error. Trying to set VCPUs to < 1 will be quietly ignored. =item B<vcpu-list> I<[domain-id]> diff -r 5719550652a1 -r 5cc367720223 docs/src/user.tex --- a/docs/src/user.tex Tue Apr 11 13:55:47 2006 -0600 +++ b/docs/src/user.tex Tue Apr 11 18:54:18 2006 -0600 @@ -2052,7 +2052,7 @@ dev86 & The dev86 package provides an as If the dev86 package is not available on the x86\_64 distribution, you can install the i386 version of it. The dev86 rpm package for various distributions can be found at {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=dev86\&submit=Search}} \\ -LibVNCServer & The unmodified guest's VGA display, keyboard, and mouse are virtualized using the vncserver library provided by this package. You can get the sources of libvncserver from {\small {\tt http://sourceforge.net/projects/libvncserver}}. Build and install the sources on the build system to get the libvncserver library. The 0.8pre version of libvncserver is currently working well with Xen.\\ +LibVNCServer & The unmodified guest's VGA display, keyboard, and mouse can be virtualized by the vncserver library. You can get the sources of libvncserver from {\small {\tt http://sourceforge.net/projects/libvncserver}}. Build and install the sources on the build system to get the libvncserver library. There is a significant performance degradation in 0.8 version. The current sources in the CVS tree have fixed this degradation. So it is highly recommended to download the latest CVS sources and install them.\\ SDL-devel, SDL & Simple DirectMedia Layer (SDL) is another way of virtualizing the unmodified guest console. It provides an X window for the guest console. @@ -2076,6 +2076,8 @@ acpi & Enable VMX guest ACPI, default=0 acpi & Enable VMX guest ACPI, default=0 (disabled)\\ apic & Enable VMX guest APIC, default=0 (disabled)\\ + +pae & Enable VMX guest PAE, default=0 (disabled)\\ vif & Optionally defines MAC address and/or bridge for the network interfaces. Random MACs are assigned if not given. {\small {\tt type=ioemu}} means ioemu is used to virtualize the VMX NIC. If no type is specified, vbd is used, as with paravirtualized guests.\\ @@ -2229,6 +2231,30 @@ Simply follow the usual method of creati In the default configuration, VNC is on and SDL is off. Therefore VNC windows will open when VMX guests are created. If you want to use SDL to create VMX guests, set {\small {\tt sdl=1}} in your VMX configuration file. You can also turn off VNC by setting {\small {\tt vnc=0}}. +\subsection{Use mouse in VNC window} +The default PS/2 mouse will not work properly in VMX by a VNC window. Summagraphics mouse emulation does work in this environment. A Summagraphics mouse can be enabled by reconfiguring 2 services: + +{\small {\tt 1. General Purpose Mouse (GPM). The GPM daemon is configured in different ways in different Linux distributions. On a Redhat distribution, this is accomplished by changing the file `/etc/sysconfig/mouse' to have the following:\\ +MOUSETYPE="summa"\\ +XMOUSETYPE="SUMMA"\\ +DEVICE=/dev/ttyS0\\ +\\ +2. X11. For all Linux distributions, change the Mouse0 stanza in `/etc/X11/xorg.conf' to:\\ +Section "InputDevice"\\ +Identifier "Mouse0"\\ +Driver "summa"\\ +Option "Device" "/dev/ttyS0"\\ +Option "InputFashion" "Tablet"\\ +Option "Mode" "Absolute"\\ +Option "Name" "EasyPen"\\ +Option "Compatible" "True"\\ +Option "Protocol" "Auto"\\ +Option "SendCoreEvents" "on"\\ +Option "Vendor" "GENIUS"\\ +EndSection}} + +If the Summagraphics mouse isn't the default mouse, you can manually kill 'gpm' and restart it with the command "gpm -m /dev/ttyS0 -t summa". Note that Summagraphics mouse makes no sense in an SDL window and is therefore not available in this environment. + \subsection{Destroy VMX guests} VMX guests can be destroyed in the same way as can paravirtualized guests. We recommend that you type the command diff -r 5719550652a1 -r 5cc367720223 extras/mini-os/Makefile --- a/extras/mini-os/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/extras/mini-os/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -37,8 +37,10 @@ HDRS := $(wildcard include/*.h) HDRS := $(wildcard include/*.h) HDRS += $(wildcard include/xen/*.h) +.PHONY: default default: $(TARGET) +.PHONY: links links: [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen @@ -46,6 +48,7 @@ links: $(LD) -N -T minios-$(TARGET_ARCH).lds $(OBJS) -o $@.elf gzip -f -9 -c $@.elf >$@.gz +.PHONY: clean clean: find . -type f -name '*.o' | xargs rm -f rm -f *.o *~ core $(TARGET).elf $(TARGET).raw $(TARGET) $(TARGET).gz diff -r 5719550652a1 -r 5cc367720223 extras/mini-os/traps.c --- a/extras/mini-os/traps.c Tue Apr 11 13:55:47 2006 -0600 +++ b/extras/mini-os/traps.c Tue Apr 11 18:54:18 2006 -0600 @@ -4,6 +4,7 @@ #include <hypervisor.h> #include <mm.h> #include <lib.h> +#include <sched.h> /* * These are assembler stubs in entry.S. @@ -31,6 +32,8 @@ void machine_check(void); void dump_regs(struct pt_regs *regs) { + printk("Thread: %s\n", current->name); +#ifdef __i386__ printk("EIP: %x, EFLAGS %x.\n", regs->eip, regs->eflags); printk("EBX: %08x ECX: %08x EDX: %08x\n", regs->ebx, regs->ecx, regs->edx); @@ -40,9 +43,22 @@ void dump_regs(struct pt_regs *regs) regs->xds, regs->xes, regs->orig_eax, regs->eip); printk("CS: %04x EFLAGS: %08x esp: %08x ss: %04x\n", regs->xcs, regs->eflags, regs->esp, regs->xss); - -} - +#else + printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); + printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", + regs->ss, regs->rsp, regs->eflags); + printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", + regs->rax, regs->rbx, regs->rcx); + printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", + regs->rdx, regs->rsi, regs->rdi); + printk("RBP: %016lx R08: %016lx R09: %016lx\n", + regs->rbp, regs->r8, regs->r9); + printk("R10: %016lx R11: %016lx R12: %016lx\n", + regs->r10, regs->r11, regs->r12); + printk("R13: %016lx R14: %016lx R15: %016lx\n", + regs->r13, regs->r14, regs->r15); +#endif +} static void do_trap(int trapnr, char *str, struct pt_regs * regs, unsigned long error_code) { @@ -110,36 +126,17 @@ void do_page_fault(struct pt_regs *regs, printk("Page fault at linear address %p, regs %p, code %lx\n", addr, regs, error_code); dump_regs(regs); -#ifdef __x86_64__ - /* FIXME: _PAGE_PSE */ - { - unsigned long *tab = (unsigned long *)start_info.pt_base; - unsigned long page; - - printk("Pagetable walk from %p:\n", tab); - - page = tab[l4_table_offset(addr)]; - tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); - printk(" L4 = %p (%p)\n", page, tab); - - page = tab[l3_table_offset(addr)]; - tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); - printk(" L3 = %p (%p)\n", page, tab); - - page = tab[l2_table_offset(addr)]; - tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); - printk(" L2 = %p (%p)\n", page, tab); - - page = tab[l1_table_offset(addr)]; - printk(" L1 = %p\n", page); - } -#endif + page_walk(addr); do_exit(); } void do_general_protection(struct pt_regs *regs, long error_code) { - printk("GPF %p, error_code=%lx\n", regs, error_code); +#ifdef __i386__ + printk("GPF eip: %p, error_code=%lx\n", regs->eip, error_code); +#else + printk("GPF rip: %p, error_code=%lx\n", regs->rip, error_code); +#endif dump_regs(regs); do_exit(); } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/i386/Kconfig Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Tue Apr 11 18:54:18 2006 -0600 @@ -1116,9 +1116,7 @@ menu "Instrumentation Support" menu "Instrumentation Support" depends on EXPERIMENTAL -if !X86_XEN source "arch/i386/oprofile/Kconfig" -endif config KPROBES bool "Kprobes (EXPERIMENTAL)" diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/Makefile --- a/linux-2.6-xen-sparse/arch/i386/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -44,6 +44,11 @@ CFLAGS += $(shell if [ $(call cc-vers CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;) CFLAGS += $(cflags-y) + +cppflags-$(CONFIG_XEN) += \ + -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) + +CPPFLAGS += $(cppflags-y) # Default subarch .c files mcore-y := mach-default @@ -107,7 +112,7 @@ boot := arch/i386/boot zdisk bzdisk fdimage fdimage144 fdimage288 install ifdef CONFIG_XEN -CPPFLAGS := -D__KERNEL__ -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(LINUXINCLUDE) +CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) head-y := arch/i386/kernel/head-xen.o arch/i386/kernel/init_task-xen.o boot := arch/i386/boot-xen .PHONY: vmlinuz @@ -157,3 +162,4 @@ endef endef CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf +CLEAN_FILES += vmlinuz vmlinux-stripped diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S --- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Tue Apr 11 18:54:18 2006 -0600 @@ -65,7 +65,6 @@ ORIG_EAX = 0x24 ORIG_EAX = 0x24 EIP = 0x28 CS = 0x2C -EVENT_MASK = 0x2E EFLAGS = 0x30 OLDESP = 0x34 OLDSS = 0x38 @@ -290,14 +289,14 @@ restore_nocheck: restore_nocheck: #else restore_nocheck: - testl $(VM_MASK|NMI_MASK), EFLAGS(%esp) + movl EFLAGS(%esp), %eax + testl $(VM_MASK|NMI_MASK), %eax jnz hypervisor_iret - movb EVENT_MASK(%esp), %al - notb %al # %al == ~saved_mask + shr $9, %eax # EAX[0] == IRET_EFLAGS.IF GET_VCPU_INFO andb evtchn_upcall_mask(%esi),%al - andb $1,%al # %al == mask & ~saved_mask - jnz restore_all_enable_events # != 0 => reenable event delivery + andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask + jnz restore_all_enable_events # != 0 => enable event delivery #endif RESTORE_REGS addl $4, %esp @@ -462,7 +461,7 @@ ENTRY(irq_entries_start) ENTRY(irq_entries_start) .rept NR_IRQS ALIGN -1: pushl 0x80000000+$vector +1: pushl $~(vector) jmp common_interrupt .data .long 1b @@ -479,7 +478,7 @@ common_interrupt: #define BUILD_INTERRUPT(name, nr) \ ENTRY(name) \ - pushl 0x80000000+$nr; \ + pushl $~(nr); \ SAVE_ALL \ movl %esp,%eax; \ call smp_/**/name; \ @@ -555,14 +554,9 @@ scrit: /**** START OF CRITICAL REGION ** RESTORE_REGS addl $4, %esp 1: iret -.section .fixup,"ax" -2: pushl $0 - pushl $do_iret_error - jmp error_code -.previous .section __ex_table,"a" .align 4 - .long 1b,2b + .long 1b,iret_exc .previous 14: __DISABLE_INTERRUPTS jmp 11b @@ -614,30 +608,51 @@ critical_fixup_table: .byte 0x00,0x00 # jmp 11b # Hypervisor uses this for application faults while it executes. +# We get here for two reasons: +# 1. Fault while reloading DS, ES, FS or GS +# 2. Fault while executing IRET +# Category 1 we fix up by reattempting the load, and zeroing the segment +# register if the load fails. +# Category 2 we fix up by jumping to do_iret_error. We cannot use the +# normal Linux return path in this case because if we use the IRET hypercall +# to pop the stack frame we end up in an infinite loop of failsafe callbacks. +# We distinguish between categories by maintaining a status value in EAX. ENTRY(failsafe_callback) -1: popl %ds -2: popl %es -3: popl %fs -4: popl %gs - subl $4,%esp - SAVE_ALL - jmp ret_from_exception -.section .fixup,"ax"; \ -6: movl $0,(%esp); \ - jmp 1b; \ -7: movl $0,(%esp); \ - jmp 2b; \ -8: movl $0,(%esp); \ - jmp 3b; \ -9: movl $0,(%esp); \ - jmp 4b; \ -.previous; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,6b; \ - .long 2b,7b; \ - .long 3b,8b; \ - .long 4b,9b; \ + pushl %eax + movl $1,%eax +1: mov 4(%esp),%ds +2: mov 8(%esp),%es +3: mov 12(%esp),%fs +4: mov 16(%esp),%gs + testl %eax,%eax + popl %eax + jz 5f + addl $16,%esp # EAX != 0 => Category 2 (Bad IRET) + jmp iret_exc +5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment) + pushl $0 + SAVE_ALL + jmp ret_from_exception +.section .fixup,"ax"; \ +6: xorl %eax,%eax; \ + movl %eax,4(%esp); \ + jmp 1b; \ +7: xorl %eax,%eax; \ + movl %eax,8(%esp); \ + jmp 2b; \ +8: xorl %eax,%eax; \ + movl %eax,12(%esp); \ + jmp 3b; \ +9: xorl %eax,%eax; \ + movl %eax,16(%esp); \ + jmp 4b; \ +.previous; \ +.section __ex_table,"a"; \ + .align 4; \ + .long 1b,6b; \ + .long 2b,7b; \ + .long 3b,8b; \ + .long 4b,9b; \ .previous #endif diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/irq-xen.c Tue Apr 11 18:54:18 2006 -0600 @@ -54,7 +54,7 @@ fastcall unsigned int do_IRQ(struct pt_r fastcall unsigned int do_IRQ(struct pt_regs *regs) { /* high bit used in ret_from_ code */ - int irq = regs->orig_eax & __IRQ_MASK(BITS_PER_LONG - 1); + int irq = ~regs->orig_eax; #ifdef CONFIG_4KSTACKS union irq_ctx *curctx, *irqctx; u32 *isp; diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c Tue Apr 11 18:54:18 2006 -0600 @@ -69,7 +69,7 @@ dma_map_sg(struct device *hwdev, struct } else { for (i = 0; i < nents; i++ ) { sg[i].dma_address = - page_to_phys(sg[i].page) + sg[i].offset; + page_to_bus(sg[i].page) + sg[i].offset; sg[i].dma_length = sg[i].length; BUG_ON(!sg[i].page); IOMMU_BUG_ON(address_needs_mapping( @@ -105,7 +105,7 @@ dma_map_page(struct device *dev, struct dma_addr = swiotlb_map_page( dev, page, offset, size, direction); } else { - dma_addr = page_to_phys(page) + offset; + dma_addr = page_to_bus(page) + offset; IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr)); } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Tue Apr 11 18:54:18 2006 -0600 @@ -114,7 +114,7 @@ void xen_idle(void) smp_mb__after_clear_bit(); stop_hz_timer(); /* Blocking includes an implicit local_irq_enable(). */ - HYPERVISOR_sched_op(SCHEDOP_block, 0); + HYPERVISOR_block(); start_hz_timer(); set_thread_flag(TIF_POLLING_NRFLAG); } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/kernel/quirks-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/quirks-xen.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/quirks-xen.c Tue Apr 11 18:54:18 2006 -0600 @@ -5,7 +5,7 @@ #include <linux/pci.h> #include <linux/irq.h> -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) +#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) { diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue Apr 11 18:54:18 2006 -0600 @@ -1317,6 +1317,11 @@ void __init setup_bootmem_allocator(void } } #endif +#ifdef CONFIG_KEXEC + if (crashk_res.start != crashk_res.end) + reserve_bootmem(crashk_res.start, + crashk_res.end - crashk_res.start + 1); +#endif if (!xen_feature(XENFEAT_auto_translated_physmap)) phys_to_machine_mapping = @@ -1435,11 +1440,6 @@ legacy_init_iomem_resources(struct resou #endif } } -#endif -#ifdef CONFIG_KEXEC - if (crashk_res.start != crashk_res.end) - reserve_bootmem(crashk_res.start, - crashk_res.end - crashk_res.start + 1); #endif } @@ -1633,9 +1633,9 @@ void __init setup_arch(char **cmdline_p) physdev_op_t op; unsigned long max_low_pfn; - /* Force a quick death if the kernel panics. */ + /* Force a quick death if the kernel panics (not domain 0). */ extern int panic_timeout; - if (panic_timeout == 0) + if (!panic_timeout && !(xen_start_info->flags & SIF_INITDOMAIN)) panic_timeout = 1; /* Register a call for panic conditions. */ @@ -1848,10 +1848,6 @@ void __init setup_arch(char **cmdline_p) get_smp_config(); #endif - /* XXX Disable irqdebug until we have a way to avoid interrupt - * conflicts. */ - noirqdebug_setup(""); - register_memory(); if (xen_start_info->flags & SIF_INITDOMAIN) { @@ -1877,7 +1873,7 @@ static int static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_crash); + HYPERVISOR_shutdown(SHUTDOWN_crash); /* we're never actually going to get here... */ return NOTIFY_DONE; } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Tue Apr 11 18:54:18 2006 -0600 @@ -32,7 +32,7 @@ EXPORT_SYMBOL(swiotlb); #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1))) -#define SG_ENT_PHYS_ADDRESS(sg) (page_to_phys((sg)->page) + (sg)->offset) +#define SG_ENT_PHYS_ADDRESS(sg) (page_to_bus((sg)->page) + (sg)->offset) /* * Maximum allowable number of contiguous slabs to map, @@ -607,7 +607,7 @@ swiotlb_map_page(struct device *hwdev, s dma_addr_t dev_addr; char *map; - dev_addr = page_to_phys(page) + offset; + dev_addr = page_to_bus(page) + offset; if (address_needs_mapping(hwdev, dev_addr)) { buffer.page = page; buffer.offset = offset; diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Tue Apr 11 18:54:18 2006 -0600 @@ -177,6 +177,32 @@ int touch_pte_range(struct mm_struct *mm EXPORT_SYMBOL(touch_pte_range); +void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot) +{ + int error; + + struct vm_struct *vma; + vma = get_vm_area (vm_size, VM_IOREMAP); + + if (vma == NULL) { + printk ("ioremap.c,vm_map_xen_pages(): " + "Failed to get VMA area\n"); + return NULL; + } + + error = direct_kernel_remap_pfn_range((unsigned long) vma->addr, + maddr >> PAGE_SHIFT, vm_size, + prot, DOMID_SELF ); + if (error == 0) { + return vma->addr; + } else { + printk ("ioremap.c,vm_map_xen_pages(): " + "Failed to map xen shared pages into kernel space\n"); + return NULL; + } +} +EXPORT_SYMBOL(vm_map_xen_pages); + /* * Does @address reside within a non-highmem page that is local to this virtual * machine (i.e., not an I/O page, nor a memory page belonging to another VM). diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/ia64/Kconfig --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Tue Apr 11 18:54:18 2006 -0600 @@ -79,8 +79,13 @@ config XEN_BLKDEV_FRONTEND bool default y +config XEN_BACKEND + depends on XEN + bool + default y + config XEN_BLKDEV_BACKEND - depends on XEN + depends on XEN && XEN_BACKEND bool default y @@ -90,6 +95,11 @@ config XEN_SYSFS default y help Xen hypervisor attributes will show up under /sys/hypervisor/. + +config XEN_INTERFACE_VERSION + hex + depends on XEN + default 0x00030101 config SCHED_NO_NO_OMIT_FRAME_POINTER bool diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/ia64/Makefile --- a/linux-2.6-xen-sparse/arch/ia64/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/ia64/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -42,6 +42,12 @@ endif endif CFLAGS += $(cflags-y) + +cppflags-$(CONFIG_XEN) += \ + -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) + +CPPFLAGS += $(cppflags-y) + head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o libs-y += arch/ia64/lib/ diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile Tue Apr 11 18:54:18 2006 -0600 @@ -10,7 +10,7 @@ CPPFLAGS_vmlinux.lds += -U$(XENARCH) @ln -fsn $(srctree)/arch/$(XENARCH)/kernel/vmlinux.lds.S $@ -obj-y := gnttab.o +obj-y := gnttab.o features.o obj-$(CONFIG_PROC_FS) += xen_proc.o ifeq ($(ARCH),ia64) diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/x86_64/Makefile --- a/linux-2.6-xen-sparse/arch/x86_64/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -30,6 +30,10 @@ cflags-$(CONFIG_MK8) += $(call cc-option cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) CFLAGS += $(cflags-y) + +cppflags-$(CONFIG_XEN) += \ + -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) +CPPFLAGS += $(cppflags-y) CFLAGS += -m64 CFLAGS += -mno-red-zone @@ -71,7 +75,7 @@ boot := arch/x86_64/boot fdimage fdimage144 fdimage288 archclean ifdef CONFIG_XEN -CPPFLAGS := -D__KERNEL__ -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(LINUXINCLUDE) +CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) head-y := arch/x86_64/kernel/head-xen.o arch/x86_64/kernel/head64-xen.o arch/x86_64/kernel/init_task.o LDFLAGS_vmlinux := -e _start boot := arch/i386/boot-xen diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Tue Apr 11 18:54:18 2006 -0600 @@ -520,13 +520,13 @@ retint_check: CFI_REMEMBER_STATE jnz retint_careful retint_restore_args: - movb EVENT_MASK-REST_SKIP(%rsp), %al - notb %al # %al == ~saved_mask - XEN_GET_VCPU_INFO(%rsi) - andb evtchn_upcall_mask(%rsi),%al - andb $1,%al # %al == mask & ~saved_mask - jnz restore_all_enable_events # != 0 => reenable event delivery - XEN_PUT_VCPU_INFO(%rsi) + movl EFLAGS-REST_SKIP(%rsp), %eax + shr $9, %eax # EAX[0] == IRET_EFLAGS.IF + XEN_GET_VCPU_INFO(%rsi) + andb evtchn_upcall_mask(%rsi),%al + andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask + jnz restore_all_enable_events # != 0 => enable event delivery + XEN_PUT_VCPU_INFO(%rsi) RESTORE_ARGS 0,8,0 HYPERVISOR_IRET 0 @@ -584,7 +584,7 @@ retint_kernel: */ .macro apicinterrupt num,func INTR_FRAME - pushq 0x8000000000000000+$\num + pushq $~(\num) CFI_ADJUST_CFA_OFFSET 8 interrupt \func jmp error_entry @@ -803,12 +803,11 @@ ENTRY(hypervisor_callback) # So, on entry to the handler we detect whether we interrupted an # existing activation in its critical region -- if so, we pop the current # activation and restart the handler using the previous one. -ENTRY(do_hypervisor_callback) # do_hyperviosr_callback(struct *pt_regs) +ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will # see the correct pointer to the pt_regs movq %rdi, %rsp # we don't return, adjust the stack frame -11: movb $0, EVENT_MASK(%rdi) - movq %gs:pda_irqstackptr,%rax +11: movq %gs:pda_irqstackptr,%rax incl %gs:pda_irqcount cmovzq %rax,%rsp pushq %rdi @@ -853,15 +852,44 @@ ecrit: /**** END OF CRITICAL REGION *** # When the kernel is interrupted in the critical section, the kernel # will do IRET in that case, and everything will be restored at that point, # i.e. it just resumes from the next instruction interrupted with the same context. - + # Hypervisor uses this for application faults while it executes. -# Unlike i386 there is no need to reload the saved segment selectors: -# Xen already reloaded all valid ones and zeroed the others. +# We get here for two reasons: +# 1. Fault while reloading DS, ES, FS or GS +# 2. Fault while executing IRET +# Category 1 we do not need to fix up as Xen has already reloaded all segment +# registers that could be reloaded and zeroed the others. +# Category 2 we fix up by killing the current process. We cannot use the +# normal Linux return path in this case because if we use the IRET hypercall +# to pop the stack frame we end up in an infinite loop of failsafe callbacks. +# We distinguish between categories by comparing each saved segment register +# with its current contents: any discrepancy means we in category 1. ENTRY(failsafe_callback) - addq $0x30,%rsp /* skip %rcx,%r11,%ds,%es,%fs,%gs */ + movw %ds,%cx + cmpw %cx,0x10(%rsp) + jne 1f + movw %es,%cx + cmpw %cx,0x18(%rsp) + jne 1f + movw %fs,%cx + cmpw %cx,0x20(%rsp) + jne 1f + movw %gs,%cx + cmpw %cx,0x28(%rsp) + jne 1f + /* All segments match their saved values => Category 2 (Bad IRET). */ + movq (%rsp),%rcx + movq 8(%rsp),%r11 + addq $0x30,%rsp + movq $-9999,%rdi /* better code? */ + jmp do_exit +1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ + movq (%rsp),%rcx + movq 8(%rsp),%r11 + addq $0x30,%rsp + pushq $0 SAVE_ALL - jmp error_exit - + jmp error_exit #if 0 .section __ex_table,"a" .align 8 diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/irq-xen.c Tue Apr 11 18:54:18 2006 -0600 @@ -97,7 +97,7 @@ asmlinkage unsigned int do_IRQ(struct pt asmlinkage unsigned int do_IRQ(struct pt_regs *regs) { /* high bit used in ret_from_ code */ - int irq = regs->orig_rax & __IRQ_MASK(BITS_PER_LONG - 1); + unsigned irq = ~regs->orig_rax; exit_idle(); irq_enter(); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Tue Apr 11 18:54:18 2006 -0600 @@ -131,7 +131,7 @@ void xen_idle(void) smp_mb__after_clear_bit(); stop_hz_timer(); /* Blocking includes an implicit local_irq_enable(). */ - HYPERVISOR_sched_op(SCHEDOP_block, 0); + HYPERVISOR_block(); start_hz_timer(); set_thread_flag(TIF_POLLING_NRFLAG); } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue Apr 11 18:54:18 2006 -0600 @@ -999,7 +999,7 @@ static int static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_crash); + HYPERVISOR_shutdown(SHUTDOWN_crash); /* we're never actually going to get here... */ return NOTIFY_DONE; } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/x86_64/kernel/xen_entry.S --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/xen_entry.S Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/xen_entry.S Tue Apr 11 18:54:18 2006 -0600 @@ -2,7 +2,7 @@ * Copied from arch/xen/i386/kernel/entry.S */ /* Offsets into shared_info_t. */ -#define evtchn_upcall_pending 0 +#define evtchn_upcall_pending /* 0 */ #define evtchn_upcall_mask 1 #define sizeof_vcpu_shift 6 @@ -35,7 +35,6 @@ XEN_PUT_VCPU_INFO(reg) #define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg) -EVENT_MASK = (CS+4) VGCF_IN_SYSCALL = (1<<8) diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/Kconfig --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Tue Apr 11 18:54:18 2006 -0600 @@ -10,12 +10,11 @@ config XEN help This is the Linux Xen port. -config NO_IDLE_HZ - bool - depends on XEN - default y - if XEN +config XEN_INTERFACE_VERSION + hex + default 0x00030101 + menu "XEN" config XEN_PRIVILEGED_GUEST @@ -29,9 +28,16 @@ config XEN_UNPRIVILEGED_GUEST bool default !XEN_PRIVILEGED_GUEST +config XEN_BACKEND + tristate "Backend driver support" + default y + help + Support for backend device drivers that provide I/O services + to other virtual machines. + config XEN_PCIDEV_BACKEND tristate "PCI device backend driver" - depends PCI + depends on PCI && XEN_BACKEND default XEN_PRIVILEGED_GUEST help The PCI device backend driver allows the kernel to export arbitrary @@ -71,6 +77,7 @@ config XEN_PCIDEV_BE_DEBUG config XEN_BLKDEV_BACKEND tristate "Block-device backend driver" + depends on XEN_BACKEND default y help The block-device backend driver allows the kernel to export its @@ -92,6 +99,7 @@ config XEN_BLKDEV_TAP_BE config XEN_NETDEV_BACKEND tristate "Network-device backend driver" + depends on XEN_BACKEND default y help The network-device backend driver allows the kernel to export its @@ -121,6 +129,7 @@ config XEN_NETDEV_LOOPBACK config XEN_TPMDEV_BACKEND tristate "TPM-device backend driver" + depends on XEN_BACKEND default n help The TPM-device backend driver @@ -156,6 +165,7 @@ config XEN_NETDEV_FRONTEND config XEN_BLKDEV_TAP tristate "Block device tap driver" + depends on XEN_BACKEND default n help This driver allows a VM to interact on block device channels @@ -191,7 +201,6 @@ config XEN_DISABLE_SERIAL config XEN_SYSFS tristate "Export Xen attributes in sysfs" - depends on XEN depends on SYSFS default y help @@ -207,4 +216,8 @@ config HAVE_ARCH_DEV_ALLOC_SKB bool default y +config NO_IDLE_HZ + bool + default y + endif diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Apr 11 18:54:18 2006 -0600 @@ -215,52 +215,26 @@ static void print_stats(blkif_t *blkif) int blkif_schedule(void *arg) { - blkif_t *blkif = arg; + blkif_t *blkif = arg; blkif_get(blkif); + if (debug_lvl) printk(KERN_DEBUG "%s: started\n", current->comm); - for (;;) { - if (kthread_should_stop()) { - /* asked to quit? */ - if (!atomic_read(&blkif->io_pending)) - break; - if (debug_lvl) - printk(KERN_DEBUG "%s: I/O pending, " - "delaying exit\n", current->comm); - } - - if (!atomic_read(&blkif->io_pending)) { - /* Wait for work to do. */ - wait_event_interruptible( - blkif->wq, - (atomic_read(&blkif->io_pending) || - kthread_should_stop())); - } else if (list_empty(&pending_free)) { - /* Wait for pending_req becoming available. */ - wait_event_interruptible( - pending_free_wq, - !list_empty(&pending_free)); - } - - if (blkif->status != CONNECTED) { - /* make sure we are connected */ - if (debug_lvl) - printk(KERN_DEBUG "%s: not connected " - "(%d pending)\n", - current->comm, - atomic_read(&blkif->io_pending)); - wait_event_interruptible( - blkif->wq, - (blkif->status == CONNECTED || - kthread_should_stop())); - continue; - } - - /* Schedule I/O */ - atomic_set(&blkif->io_pending, 0); + + while (!kthread_should_stop()) { + wait_event_interruptible( + blkif->wq, + blkif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + pending_free_wq, + !list_empty(&pending_free) || kthread_should_stop()); + + blkif->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + if (do_block_io_op(blkif)) - atomic_inc(&blkif->io_pending); + blkif->waiting_reqs = 1; unplug_queue(blkif); if (log_stats && time_after(jiffies, blkif->st_print)) @@ -271,8 +245,10 @@ int blkif_schedule(void *arg) print_stats(blkif); if (debug_lvl) printk(KERN_DEBUG "%s: exiting\n", current->comm); + blkif->xenblkd = NULL; blkif_put(blkif); + return 0; } @@ -311,12 +287,15 @@ static int end_block_io_op(struct bio *b * NOTIFICATION FROM GUEST OS. */ +static void blkif_notify_work(blkif_t *blkif) +{ + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} + irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) { - blkif_t *blkif = dev_id; - - atomic_inc(&blkif->io_pending); - wake_up(&blkif->wq); + blkif_notify_work(dev_id); return IRQ_HANDLED; } @@ -536,10 +515,8 @@ static void make_response(blkif_t *blkif } spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); - if (more_to_do) { - atomic_inc(&blkif->io_pending); - wake_up(&blkif->wq); - } + if (more_to_do) + blkif_notify_work(blkif); if (notify) notify_remote_via_irq(blkif->irq); } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/blkback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Apr 11 18:54:18 2006 -0600 @@ -72,7 +72,6 @@ typedef struct blkif_st { /* Back pointer to the backend_info. */ struct backend_info *be; /* Private fields. */ - enum { DISCONNECTED, CONNECTED } status; #ifdef CONFIG_XEN_BLKDEV_TAP_BE /* Is this a blktap frontend */ unsigned int is_blktap; @@ -82,7 +81,7 @@ typedef struct blkif_st { wait_queue_head_t wq; struct task_struct *xenblkd; - atomic_t io_pending; + unsigned int waiting_reqs; request_queue_t *plug; /* statistics */ @@ -133,8 +132,6 @@ irqreturn_t blkif_be_int(int irq, void * irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); int blkif_schedule(void *arg); -void update_blkif_status(blkif_t *blkif); - #endif /* __BLKIF__BACKEND__COMMON_H__ */ /* diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Apr 11 18:54:18 2006 -0600 @@ -45,7 +45,6 @@ blkif_t *alloc_blkif(domid_t domid) memset(blkif, 0, sizeof(*blkif)); blkif->domid = domid; - blkif->status = DISCONNECTED; spin_lock_init(&blkif->blk_ring_lock); atomic_set(&blkif->refcnt, 1); init_waitqueue_head(&blkif->wq); @@ -138,9 +137,6 @@ int blkif_map(blkif_t *blkif, unsigned l blkif->irq = bind_evtchn_to_irqhandler( blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif); - /* We're potentially connected now */ - update_blkif_status(blkif); - return 0; } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Apr 11 18:54:18 2006 -0600 @@ -16,7 +16,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - #include <stdarg.h> #include <linux/module.h> @@ -25,36 +24,52 @@ #include "common.h" #undef DPRINTK -#define DPRINTK(fmt, args...) \ - pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) - +#define DPRINTK(fmt, args...) \ + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + __FUNCTION__, __LINE__, ##args) struct backend_info { struct xenbus_device *dev; blkif_t *blkif; struct xenbus_watch backend_watch; - unsigned major; unsigned minor; char *mode; }; - -static void maybe_connect(struct backend_info *); static void connect(struct backend_info *); static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); -void update_blkif_status(blkif_t *blkif) +static void update_blkif_status(blkif_t *blkif) { - if(blkif->irq && blkif->vbd.bdev) { - blkif->status = CONNECTED; - (void)blkif_be_int(0, blkif, NULL); - } - maybe_connect(blkif->be); + int err; + + /* Not ready to connect? */ + if (!blkif->irq || !blkif->vbd.bdev) + return; + + /* Already connected? */ + if (blkif->be->dev->state == XenbusStateConnected) + return; + + /* Attempt to connect: exit if we fail to. */ + connect(blkif->be); + if (blkif->be->dev->state != XenbusStateConnected) + return; + + blkif->xenblkd = kthread_run(blkif_schedule, blkif, + "xvd %d %02x:%02x", + blkif->domid, + blkif->be->major, blkif->be->minor); + if (IS_ERR(blkif->xenblkd)) { + err = PTR_ERR(blkif->xenblkd); + blkif->xenblkd = NULL; + xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); + } } @@ -91,7 +106,6 @@ static int blkback_remove(struct xenbus_ be->backend_watch.node = NULL; } if (be->blkif) { - be->blkif->status = DISCONNECTED; if (be->blkif->xenblkd) kthread_stop(be->blkif->xenblkd); blkif_put(be->blkif); @@ -142,7 +156,7 @@ static int blkback_probe(struct xenbus_d if (err) goto fail; - err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait); + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -185,8 +199,8 @@ static void backend_changed(struct xenbu return; } - if (be->major && be->minor && - (be->major != major || be->minor != minor)) { + if ((be->major || be->minor) && + ((be->major != major) || (be->minor != minor))) { printk(KERN_WARNING "blkback: changing physical device (from %x:%x to " "%x:%x) not supported.\n", be->major, be->minor, @@ -220,17 +234,6 @@ static void backend_changed(struct xenbu return; } - be->blkif->xenblkd = kthread_run(blkif_schedule, be->blkif, - "xvd %d %02x:%02x", - be->blkif->domid, - be->major, be->minor); - if (IS_ERR(be->blkif->xenblkd)) { - err = PTR_ERR(be->blkif->xenblkd); - be->blkif->xenblkd = NULL; - xenbus_dev_error(dev, err, "start xenblkd"); - return; - } - device_create_file(&dev->dev, &dev_attr_physical_device); device_create_file(&dev->dev, &dev_attr_mode); @@ -253,19 +256,24 @@ static void frontend_changed(struct xenb switch (frontend_state) { case XenbusStateInitialising: + break; + + case XenbusStateInitialised: case XenbusStateConnected: - break; - - case XenbusStateInitialised: + /* Ensure we connect even when two watches fire in + close successsion and we miss the intermediate value + of frontend_state. */ + if (dev->state == XenbusStateConnected) + break; + err = connect_ring(be); - if (err) { - return; - } - update_blkif_status(be->blkif); + if (err) + break; + update_blkif_status(be->blkif); break; case XenbusStateClosing: - xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing); + xenbus_switch_state(dev, XenbusStateClosing); break; case XenbusStateClosed: @@ -285,14 +293,6 @@ static void frontend_changed(struct xenb /* ** Connection ** */ -static void maybe_connect(struct backend_info *be) -{ - if ((be->major != 0 || be->minor != 0) && - be->blkif->status == CONNECTED) - connect(be); -} - - /** * Write the physical details regarding the block device to the store, and * switch to Connected state. @@ -337,16 +337,18 @@ again: dev->nodename); goto abort; } - - err = xenbus_switch_state(dev, xbt, XenbusStateConnected); - if (err) - goto abort; err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) goto again; if (err) xenbus_dev_fatal(dev, err, "ending transaction"); + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", + dev->nodename); + return; abort: xenbus_transaction_end(xbt, 1); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Apr 11 18:54:18 2006 -0600 @@ -176,10 +176,6 @@ again: goto abort_transaction; } - err = xenbus_switch_state(dev, xbt, XenbusStateInitialised); - if (err) - goto abort_transaction; - err = xenbus_transaction_end(xbt, 0); if (err) { if (err == -EAGAIN) @@ -187,6 +183,8 @@ again: xenbus_dev_fatal(dev, err, "completing transaction"); goto destroy_blkring; } + + xenbus_switch_state(dev, XenbusStateInitialised); return 0; @@ -324,7 +322,7 @@ static void connect(struct blkfront_info return; } - (void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected); + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); /* Kick pending requests. */ spin_lock_irq(&blkif_io_lock); @@ -349,7 +347,7 @@ static void blkfront_closing(struct xenb xlvbd_del(info); - xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed); + xenbus_switch_state(dev, XenbusStateClosed); } @@ -755,7 +753,7 @@ static void blkif_recover(struct blkfron kfree(copy); - (void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected); + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); /* Now safe for us to use the shared ring */ spin_lock_irq(&blkif_io_lock); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/core/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Tue Apr 11 18:54:18 2006 -0600 @@ -170,9 +170,9 @@ static inline void exit_idle(void) {} #include <asm/idle.h> #define IRQ_REG orig_rax #endif -#define do_IRQ(irq, regs) do { \ - (regs)->IRQ_REG = (irq) | (1UL << (BITS_PER_LONG - 1)); \ - do_IRQ((regs)); \ +#define do_IRQ(irq, regs) do { \ + (regs)->IRQ_REG = ~(irq); \ + do_IRQ((regs)); \ } while (0) #endif diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Tue Apr 11 18:54:18 2006 -0600 @@ -395,6 +395,9 @@ gnttab_resume(void) setup.frame_list = frames; rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + if (rc == -ENOSYS) + return -ENOSYS; + BUG_ON(rc || setup.status); #ifndef __ia64__ @@ -436,7 +439,8 @@ gnttab_init(void) if (xen_init() < 0) return -ENODEV; - BUG_ON(gnttab_resume()); + if (gnttab_resume() < 0) + return -ENODEV; for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) gnttab_list[i] = i + 1; diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue Apr 11 18:54:18 2006 -0600 @@ -15,6 +15,7 @@ #include <xen/xenbus.h> #include <linux/cpu.h> #include <linux/kthread.h> +#include <xen/gnttab.h> #include <xen/xencons.h> #if defined(__i386__) || defined(__x86_64__) @@ -41,7 +42,7 @@ void machine_emergency_restart(void) { /* We really want to get pending console data out before we die. */ xencons_force_flush(); - HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_reboot); + HYPERVISOR_shutdown(SHUTDOWN_reboot); } void machine_restart(char * __unused) @@ -58,7 +59,11 @@ void machine_power_off(void) { /* We really want to get pending console data out before we die. */ xencons_force_flush(); - HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_poweroff); +#if defined(__i386__) || defined(__x86_64__) + if (pm_power_off) + pm_power_off(); +#endif + HYPERVISOR_shutdown(SHUTDOWN_poweroff); } int reboot_thru_bios = 0; /* for dmi_scan.c */ @@ -76,30 +81,40 @@ static void __shutdown_handler(void *unu static void __shutdown_handler(void *unused); static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); -#ifndef CONFIG_HOTPLUG_CPU -#define cpu_down(x) (-EOPNOTSUPP) -#define cpu_up(x) (-EOPNOTSUPP) -#endif - +#ifdef CONFIG_SMP +int smp_suspend(void); +void smp_resume(void); +#else +#define smp_suspend() (0) +#define smp_resume() ((void)0) +#endif + +/* Ensure we run on the idle task page tables so that we will + switch page tables before running user space. This is needed + on architectures with separate kernel and user page tables + because the user page table pointer is not saved/restored. */ +static void switch_idle_mm(void) +{ + struct mm_struct *mm = current->active_mm; + + if (mm == &init_mm) + return; + + atomic_inc(&init_mm.mm_count); + switch_mm(mm, &init_mm, current); + current->active_mm = &init_mm; + mmdrop(mm); +} static int __do_suspend(void *ignore) { - int i, j, k, fpp; + int i, j, k, fpp, err; extern unsigned long max_pfn; extern unsigned long *pfn_to_mfn_frame_list_list; extern unsigned long *pfn_to_mfn_frame_list[]; - extern int gnttab_suspend(void); - extern int gnttab_resume(void); extern void time_resume(void); - -#ifdef CONFIG_SMP - cpumask_t prev_online_cpus; - int vcpu_prepare(int vcpu); -#endif - - int err = 0; BUG_ON(smp_processor_id() != 0); BUG_ON(in_interrupt()); @@ -110,50 +125,21 @@ static int __do_suspend(void *ignore) return -EOPNOTSUPP; } -#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) - if (num_online_cpus() > 1) { - printk(KERN_WARNING "Can't suspend SMP guests " - "without CONFIG_HOTPLUG_CPU\n"); - return -EOPNOTSUPP; - } -#endif + err = smp_suspend(); + if (err) + return err; xenbus_suspend(); - - lock_cpu_hotplug(); -#ifdef CONFIG_SMP - /* - * Take all other CPUs offline. We hold the hotplug mutex to - * avoid other processes bringing up CPUs under our feet. - */ - cpus_clear(prev_online_cpus); - while (num_online_cpus() > 1) { - for_each_online_cpu(i) { - if (i == 0) - continue; - unlock_cpu_hotplug(); - err = cpu_down(i); - lock_cpu_hotplug(); - if (err != 0) { - printk(KERN_CRIT "Failed to take all CPUs " - "down: %d.\n", err); - goto out_reenable_cpus; - } - cpu_set(i, prev_online_cpus); - } - } -#endif preempt_disable(); #ifdef __i386__ kmem_cache_shrink(pgd_cache); +#endif mm_pin_all(); -#endif __cli(); preempt_enable(); - unlock_cpu_hotplug(); gnttab_suspend(); @@ -199,34 +185,15 @@ static int __do_suspend(void *ignore) time_resume(); + switch_idle_mm(); + __sti(); xencons_resume(); -#ifdef CONFIG_SMP - for_each_cpu(i) - vcpu_prepare(i); - -#endif - - /* - * Only resume xenbus /after/ we've prepared our VCPUs; otherwise - * the VCPU hotplug callback can race with our vcpu_prepare - */ xenbus_resume(); -#ifdef CONFIG_SMP - out_reenable_cpus: - for_each_cpu_mask(i, prev_online_cpus) { - j = cpu_up(i); - if ((j != 0) && !cpu_online(i)) { - printk(KERN_CRIT "Failed to bring cpu " - "%d back up (%d).\n", - i, j); - err = j; - } - } -#endif + smp_resume(); return err; } @@ -334,7 +301,6 @@ static void shutdown_handler(struct xenb kfree(str); } -#ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) { @@ -360,45 +326,35 @@ static void sysrq_handler(struct xenbus_ if (err == -EAGAIN) goto again; - if (sysrq_key != '\0') { +#ifdef CONFIG_MAGIC_SYSRQ + if (sysrq_key != '\0') handle_sysrq(sysrq_key, NULL, NULL); - } -} -#endif +#endif +} static struct xenbus_watch shutdown_watch = { .node = "control/shutdown", .callback = shutdown_handler }; -#ifdef CONFIG_MAGIC_SYSRQ static struct xenbus_watch sysrq_watch = { .node ="control/sysrq", .callback = sysrq_handler }; -#endif static int setup_shutdown_watcher(struct notifier_block *notifier, unsigned long event, void *data) { - int err1 = 0; -#ifdef CONFIG_MAGIC_SYSRQ - int err2 = 0; -#endif - - err1 = register_xenbus_watch(&shutdown_watch); -#ifdef CONFIG_MAGIC_SYSRQ - err2 = register_xenbus_watch(&sysrq_watch); -#endif - - if (err1) + int err; + + err = register_xenbus_watch(&shutdown_watch); + if (err) printk(KERN_ERR "Failed to set shutdown watcher\n"); -#ifdef CONFIG_MAGIC_SYSRQ - if (err2) + err = register_xenbus_watch(&sysrq_watch); + if (err) printk(KERN_ERR "Failed to set sysrq watcher\n"); -#endif return NOTIFY_DONE; } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Tue Apr 11 18:54:18 2006 -0600 @@ -79,6 +79,15 @@ unsigned int maxcpus = NR_CPUS; unsigned int maxcpus = NR_CPUS; #endif +/* + * Set of CPUs that remote admin software will allow us to bring online. + * Notified to us via xenbus. + */ +static cpumask_t xenbus_allowed_cpumask; + +/* Set of CPUs that local admin will allow us to bring online. */ +static cpumask_t local_allowed_cpumask = CPU_MASK_ALL; + void __init prefill_possible_map(void) { int i, rc; @@ -146,7 +155,7 @@ static void cpu_bringup(void) cpu_idle(); } -void vcpu_prepare(int vcpu) +static void vcpu_prepare(int vcpu) { vcpu_guest_context_t ctxt; struct task_struct *idle = idle_task(vcpu); @@ -278,6 +287,8 @@ void __init smp_prepare_cpus(unsigned in vcpu_prepare(cpu); } + xenbus_allowed_cpumask = cpu_present_map; + /* Currently, Xen gives no dynamic NUMA/HT info. */ for (cpu = 1; cpu < NR_CPUS; cpu++) { cpu_sibling_map[cpu] = cpumask_of_cpu(cpu); @@ -301,6 +312,15 @@ void __devinit smp_prepare_boot_cpu(void cpu_online_map = cpumask_of_cpu(0); } +static int local_cpu_hotplug_request(void) +{ + /* + * We assume a CPU hotplug request comes from local admin if it is made + * via a userspace process (i.e., one with a real mm_struct). + */ + return (current->mm != NULL); +} + #ifdef CONFIG_HOTPLUG_CPU /* @@ -331,8 +351,10 @@ static void vcpu_hotplug(unsigned int cp } if (strcmp(state, "online") == 0) { + cpu_set(cpu, xenbus_allowed_cpumask); (void)cpu_up(cpu); } else if (strcmp(state, "offline") == 0) { + cpu_clear(cpu, xenbus_allowed_cpumask); (void)cpu_down(cpu); } else { printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", @@ -353,6 +375,22 @@ static void handle_vcpu_hotplug_event( } } +static int smpboot_cpu_notify(struct notifier_block *notifier, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + /* + * We do this in a callback notifier rather than __cpu_disable() + * because local_cpu_hotplug_request() does not work in the latter + * as it's always executed from within a stopmachine kthread. + */ + if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) + cpu_clear(cpu, local_allowed_cpumask); + + return NOTIFY_OK; +} + static int setup_cpu_watcher(struct notifier_block *notifier, unsigned long event, void *data) { @@ -360,7 +398,8 @@ static int setup_cpu_watcher(struct noti static struct xenbus_watch cpu_watch = { .node = "cpu", - .callback = handle_vcpu_hotplug_event }; + .callback = handle_vcpu_hotplug_event, + .flags = XBWF_new_thread }; (void)register_xenbus_watch(&cpu_watch); if (!(xen_start_info->flags & SIF_INITDOMAIN)) { @@ -375,13 +414,61 @@ static int setup_cpu_watcher(struct noti static int __init setup_vcpu_hotplug_event(void) { + static struct notifier_block hotplug_cpu = { + .notifier_call = smpboot_cpu_notify }; static struct notifier_block xsn_cpu = { .notifier_call = setup_cpu_watcher }; + + register_cpu_notifier(&hotplug_cpu); register_xenstore_notifier(&xsn_cpu); + return 0; } arch_initcall(setup_vcpu_hotplug_event); + +int smp_suspend(void) +{ + int i, err; + + lock_cpu_hotplug(); + + /* + * Take all other CPUs offline. We hold the hotplug mutex to + * avoid other processes bringing up CPUs under our feet. + */ + while (num_online_cpus() > 1) { + unlock_cpu_hotplug(); + for_each_online_cpu(i) { + if (i == 0) + continue; + err = cpu_down(i); + if (err) { + printk(KERN_CRIT "Failed to take all CPUs " + "down: %d.\n", err); + for_each_cpu(i) + vcpu_hotplug(i); + return err; + } + } + lock_cpu_hotplug(); + } + + return 0; +} + +void smp_resume(void) +{ + int i; + + for_each_cpu(i) + vcpu_prepare(i); + + unlock_cpu_hotplug(); + + for_each_cpu(i) + vcpu_hotplug(i); +} int __cpu_disable(void) { @@ -415,6 +502,20 @@ void __cpu_die(unsigned int cpu) #else /* !CONFIG_HOTPLUG_CPU */ +int smp_suspend(void) +{ + if (num_online_cpus() > 1) { + printk(KERN_WARNING "Can't suspend SMP guests " + "without CONFIG_HOTPLUG_CPU\n"); + return -EOPNOTSUPP; + } + return 0; +} + +void smp_resume(void) +{ +} + int __cpu_disable(void) { return -ENOSYS; @@ -429,6 +530,20 @@ void __cpu_die(unsigned int cpu) int __devinit __cpu_up(unsigned int cpu) { + int rc; + + if (local_cpu_hotplug_request()) { + cpu_set(cpu, local_allowed_cpumask); + if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { + printk("%s: attempt to bring up CPU %u disallowed by " + "remote admin.\n", __FUNCTION__, cpu); + return -EBUSY; + } + } else if (!cpu_isset(cpu, local_allowed_cpumask) || + !cpu_isset(cpu, xenbus_allowed_cpumask)) { + return -EBUSY; + } + #ifdef CONFIG_SMP_ALTERNATIVES if (num_online_cpus() == 1) prepare_for_smp(); @@ -436,7 +551,9 @@ int __devinit __cpu_up(unsigned int cpu) xen_smp_intr_init(cpu); cpu_set(cpu, cpu_online_map); - if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL) != 0) + + rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + if (rc != 0) BUG(); return 0; diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/netback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Tue Apr 11 18:54:18 2006 -0600 @@ -97,7 +97,6 @@ typedef struct netif_st { #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) -void netif_creditlimit(netif_t *netif); void netif_disconnect(netif_t *netif); netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Tue Apr 11 18:54:18 2006 -0600 @@ -31,6 +31,7 @@ */ #include "common.h" +#include <linux/ethtool.h> #include <linux/rtnetlink.h> static void __netif_up(netif_t *netif) @@ -70,6 +71,12 @@ static int net_close(struct net_device * __netif_down(netif); return 0; } + +static struct ethtool_ops network_ethtool_ops = +{ + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = ethtool_op_set_tx_csum, +}; netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]) { @@ -101,7 +108,9 @@ netif_t *alloc_netif(domid_t domid, unsi dev->get_stats = netif_be_get_stats; dev->open = net_open; dev->stop = net_close; - dev->features = NETIF_F_NO_CSUM; + dev->features = NETIF_F_IP_CSUM; + + SET_ETHTOOL_OPS(dev, &network_ethtool_ops); /* Disable queuing. */ dev->tx_queue_len = 0; @@ -291,25 +300,6 @@ void free_netif(netif_t *netif) { INIT_WORK(&netif->free_work, free_netif_callback, (void *)netif); schedule_work(&netif->free_work); -} - -void netif_creditlimit(netif_t *netif) -{ -#if 0 - /* Set the credit limit (reset remaining credit to new limit). */ - netif->credit_bytes = creditlimit->credit_bytes; - netif->remaining_credit = creditlimit->credit_bytes; - netif->credit_usec = creditlimit->period_usec; - - if (netif->status == CONNECTED) { - /* - * Schedule work so that any packets waiting under previous - * credit limit are dealt with (acts as a replenishment point). - */ - netif->credit_timeout.expires = jiffies; - netif_schedule_work(netif); - } -#endif } void netif_disconnect(netif_t *netif) diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/netback/loopback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Tue Apr 11 18:54:18 2006 -0600 @@ -100,10 +100,10 @@ static int loopback_start_xmit(struct sk /* Defer checksum calculation. */ skb->proto_csum_blank = 1; /* Must be a local packet: assert its integrity. */ - skb->proto_csum_valid = 1; - } - - skb->ip_summed = skb->proto_csum_valid ? + skb->proto_data_valid = 1; + } + + skb->ip_summed = skb->proto_data_valid ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */ @@ -121,6 +121,12 @@ static struct net_device_stats *loopback return &np->stats; } +static struct ethtool_ops network_ethtool_ops = +{ + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = ethtool_op_set_tx_csum, +}; + static void loopback_construct(struct net_device *dev, struct net_device *lo) { struct net_private *np = netdev_priv(dev); @@ -134,7 +140,11 @@ static void loopback_construct(struct ne dev->tx_queue_len = 0; - dev->features = NETIF_F_HIGHDMA | NETIF_F_LLTX; + dev->features = (NETIF_F_HIGHDMA | + NETIF_F_LLTX | + NETIF_F_IP_CSUM); + + SET_ETHTOOL_OPS(dev, &network_ethtool_ops); /* * We do not set a jumbo MTU on the interface. Otherwise the network @@ -147,12 +157,6 @@ static void loopback_construct(struct ne /*dev->mtu = 16*1024;*/ } -static struct ethtool_ops network_ethtool_ops = -{ - .get_tx_csum = ethtool_op_get_tx_csum, - .set_tx_csum = ethtool_op_set_tx_csum, -}; - static int __init make_loopback(int i) { struct net_device *dev1, *dev2; @@ -171,11 +175,6 @@ static int __init make_loopback(int i) loopback_construct(dev1, dev2); loopback_construct(dev2, dev1); - - dev1->features |= NETIF_F_NO_CSUM; - dev2->features |= NETIF_F_IP_CSUM; - - SET_ETHTOOL_OPS(dev2, &network_ethtool_ops); /* * Initialise a dummy MAC address for the 'dummy backend' interface. We diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue Apr 11 18:54:18 2006 -0600 @@ -171,7 +171,7 @@ int netif_be_start_xmit(struct sk_buff * skb->len + hlen); BUG_ON(ret); nskb->dev = skb->dev; - nskb->proto_csum_valid = skb->proto_csum_valid; + nskb->proto_data_valid = skb->proto_data_valid; dev_kfree_skb(skb); skb = nskb; } @@ -213,7 +213,7 @@ static void net_rx_action(unsigned long { netif_t *netif = NULL; s8 status; - u16 size, id, irq; + u16 size, id, irq, flags; multicall_entry_t *mcl; mmu_update_t *mmu; gnttab_transfer_t *gop; @@ -301,9 +301,6 @@ static void net_rx_action(unsigned long netif = netdev_priv(skb->dev); size = skb->tail - skb->data; - /* Rederive the machine addresses. */ - new_mfn = mcl->args[1] >> PAGE_SHIFT; - old_mfn = gop->mfn; atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->frag_list = NULL; @@ -328,10 +325,14 @@ static void net_rx_action(unsigned long } irq = netif->irq; id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id; + flags = 0; + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ + flags |= NETRXF_csum_blank | NETRXF_data_validated; + else if (skb->proto_data_valid) /* remote but checksummed? */ + flags |= NETRXF_data_validated; if (make_rx_response(netif, id, status, (unsigned long)skb->data & ~PAGE_MASK, - size, skb->proto_csum_valid ? - NETRXF_data_validated : 0) && + size, flags) && (rx_notify[irq] == 0)) { rx_notify[irq] = 1; notify_list[notify_nr++] = irq; @@ -655,11 +656,16 @@ static void net_tx_action(unsigned long skb->protocol = eth_type_trans(skb, skb->dev); /* - * No checking needed on localhost, but remember the field is - * blank. - */ - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->proto_csum_valid = 1; + * Old frontends do not assert data_validated but we + * can infer it from csum_blank so test both flags. + */ + if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->proto_data_valid = 1; + } else { + skb->ip_summed = CHECKSUM_NONE; + skb->proto_data_valid = 0; + } skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank); netif->stats.rx_bytes += txreq.size; diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue Apr 11 18:54:18 2006 -0600 @@ -92,7 +92,7 @@ static int netback_probe(struct xenbus_d if (err) goto fail; - err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait); + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) { goto fail; } @@ -209,7 +209,7 @@ static void frontend_changed(struct xenb break; case XenbusStateClosing: - xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing); + xenbus_switch_state(dev, XenbusStateClosing); break; case XenbusStateClosed: @@ -233,9 +233,44 @@ static void frontend_changed(struct xenb static void maybe_connect(struct backend_info *be) { - if (be->netif != NULL && be->frontend_state == XenbusStateConnected) { + if (be->netif && (be->frontend_state == XenbusStateConnected)) connect(be); - } +} + +static void xen_net_read_rate(struct xenbus_device *dev, + unsigned long *bytes, unsigned long *usec) +{ + char *s, *e; + unsigned long b, u; + char *ratestr; + + /* Default to unlimited bandwidth. */ + *bytes = ~0UL; + *usec = 0; + + ratestr = xenbus_read(XBT_NULL, dev->nodename, "rate", NULL); + if (IS_ERR(ratestr)) + return; + + s = ratestr; + b = simple_strtoul(s, &e, 10); + if ((s == e) || (*e != ',')) + goto fail; + + s = e + 1; + u = simple_strtoul(s, &e, 10); + if ((s == e) || (*e != '\0')) + goto fail; + + *bytes = b; + *usec = u; + + kfree(ratestr); + return; + + fail: + WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n"); + kfree(ratestr); } @@ -254,7 +289,11 @@ static void connect(struct backend_info return; } - xenbus_switch_state(dev, XBT_NULL, XenbusStateConnected); + xen_net_read_rate(dev, &be->netif->credit_bytes, + &be->netif->credit_usec); + be->netif->remaining_credit = be->netif->credit_bytes; + + xenbus_switch_state(dev, XenbusStateConnected); } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Apr 11 18:54:18 2006 -0600 @@ -300,13 +300,6 @@ again: goto abort_transaction; } - err = xenbus_printf(xbt, dev->nodename, - "state", "%d", XenbusStateConnected); - if (err) { - message = "writing frontend XenbusStateConnected"; - goto abort_transaction; - } - err = xenbus_transaction_end(xbt, 0); if (err) { if (err == -EAGAIN) @@ -314,6 +307,8 @@ again: xenbus_dev_fatal(dev, err, "completing transaction"); goto destroy_ring; } + + xenbus_switch_state(dev, XenbusStateConnected); return 0; @@ -696,7 +691,12 @@ static int network_start_xmit(struct sk_ tx->gref = np->grant_tx_ref[id] = ref; tx->offset = (unsigned long)skb->data & ~PAGE_MASK; tx->size = skb->len; - tx->flags = (skb->ip_summed == CHECKSUM_HW) ? NETTXF_csum_blank : 0; + + tx->flags = 0; + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ + tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; + if (skb->proto_data_valid) /* remote but checksummed? */ + tx->flags |= NETTXF_data_validated; np->tx.req_prod_pvt = i + 1; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); @@ -811,8 +811,18 @@ static int netif_poll(struct net_device skb->len = rx->status; skb->tail = skb->data + skb->len; - if (rx->flags & NETRXF_data_validated) + /* + * Old backends do not assert data_validated but we + * can infer it from csum_blank so test both flags. + */ + if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) { skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->proto_data_valid = 1; + } else { + skb->ip_summed = CHECKSUM_NONE; + skb->proto_data_valid = 0; + } + skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank); np->stats.rx_packets++; np->stats.rx_bytes += rx->status; @@ -978,8 +988,8 @@ static void network_connect(struct net_d * the RX ring because some of our pages are currently flipped out * so we can't just free the RX skbs. * NB2. Freelist index entries are always going to be less than - * __PAGE_OFFSET, whereas pointers to skbs will always be equal or - * greater than __PAGE_OFFSET: we use this property to distinguish + * PAGE_OFFSET, whereas pointers to skbs will always be equal or + * greater than PAGE_OFFSET: we use this property to distinguish * them. */ @@ -990,7 +1000,7 @@ static void network_connect(struct net_d * interface has been down. */ for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) { - if ((unsigned long)np->tx_skbs[i] < __PAGE_OFFSET) + if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET) continue; skb = np->tx_skbs[i]; @@ -1006,8 +1016,11 @@ static void network_connect(struct net_d tx->gref = np->grant_tx_ref[i]; tx->offset = (unsigned long)skb->data & ~PAGE_MASK; tx->size = skb->len; - tx->flags = (skb->ip_summed == CHECKSUM_HW) ? - NETTXF_csum_blank : 0; + tx->flags = 0; + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ + tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; + if (skb->proto_data_valid) /* remote but checksummed? */ + tx->flags |= NETTXF_data_validated; np->stats.tx_bytes += skb->len; np->stats.tx_packets++; @@ -1018,7 +1031,7 @@ static void network_connect(struct net_d /* Rebuild the RX buffer freelist and the RX ring itself. */ for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) { - if ((unsigned long)np->rx_skbs[i] < __PAGE_OFFSET) + if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET) continue; gnttab_grant_foreign_transfer_ref( np->grant_rx_ref[i], np->xbdev->otherend_id, @@ -1216,7 +1229,7 @@ static void netfront_closing(struct xenb close_netdev(info); - xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed); + xenbus_switch_state(dev, XenbusStateClosed); } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/pciback/conf_space.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/conf_space.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/conf_space.c Tue Apr 11 18:54:18 2006 -0600 @@ -106,7 +106,7 @@ static inline int valid_request(int offs } static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask, - u32 offset) + int offset) { if (offset >= 0) { new_val_mask <<= (offset * 8); @@ -180,7 +180,8 @@ int pciback_config_read(struct pci_dev * if ((req_start >= field_start && req_start < field_end) || (req_end > field_start && req_end <= field_end)) { - err = conf_space_read(dev, cfg_entry, offset, &tmp_val); + err = conf_space_read(dev, cfg_entry, field_start, + &tmp_val); if (err) goto out; @@ -228,14 +229,16 @@ int pciback_config_write(struct pci_dev || (req_end > field_start && req_end <= field_end)) { tmp_val = 0; - err = pciback_config_read(dev, offset, size, &tmp_val); + err = pciback_config_read(dev, field_start, + field->size, &tmp_val); if (err) break; tmp_val = merge_value(tmp_val, value, get_mask(size), - field_start - req_start); - - err = conf_space_write(dev, cfg_entry, offset, tmp_val); + req_start - field_start); + + err = conf_space_write(dev, cfg_entry, field_start, + tmp_val); handled = 1; } } diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue Apr 11 18:54:18 2006 -0600 @@ -16,7 +16,7 @@ static struct pciback_device *alloc_pdev { struct pciback_device *pdev; - pdev = kmalloc(sizeof(struct pciback_device), GFP_KERNEL); + pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL); if (pdev == NULL) goto out; dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); @@ -137,7 +137,7 @@ static int pciback_attach(struct pciback dev_dbg(&pdev->xdev->dev, "Connecting...\n"); - err = xenbus_switch_state(pdev->xdev, XBT_NULL, XenbusStateConnected); + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); if (err) xenbus_dev_fatal(pdev->xdev, err, "Error switching to connected state!"); @@ -165,7 +165,7 @@ static void pciback_frontend_changed(str break; case XenbusStateClosing: - xenbus_switch_state(xdev, XBT_NULL, XenbusStateClosing); + xenbus_switch_state(xdev, XenbusStateClosing); break; case XenbusStateClosed: @@ -341,7 +341,7 @@ static int pciback_setup_backend(struct goto out; } - err = xenbus_switch_state(pdev->xdev, XBT_NULL, XenbusStateInitialised); + err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised); if (err) xenbus_dev_fatal(pdev->xdev, err, "Error switching to initialised state!"); @@ -386,7 +386,7 @@ static int pciback_xenbus_probe(struct x } /* wait for xend to configure us */ - err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait); + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto out; diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Tue Apr 11 18:54:18 2006 -0600 @@ -96,10 +96,6 @@ static int pcifront_publish_info(struct if (!err) err = xenbus_printf(trans, pdev->xdev->nodename, "magic", XEN_PCI_MAGIC); - if (!err) - err = - xenbus_switch_state(pdev->xdev, trans, - XenbusStateInitialised); if (err) { xenbus_transaction_end(trans, 1); @@ -117,6 +113,8 @@ static int pcifront_publish_info(struct goto out; } } + + xenbus_switch_state(pdev->xdev, XenbusStateInitialised); dev_dbg(&pdev->xdev->dev, "publishing successful!\n"); @@ -186,7 +184,7 @@ static int pcifront_try_connect(struct p } } - err = xenbus_switch_state(pdev->xdev, XBT_NULL, XenbusStateConnected); + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); if (err) goto out; @@ -205,8 +203,7 @@ static int pcifront_try_disconnect(struc prev_state = xenbus_read_driver_state(pdev->xdev->nodename); if (prev_state < XenbusStateClosing) - err = xenbus_switch_state(pdev->xdev, XBT_NULL, - XenbusStateClosing); + err = xenbus_switch_state(pdev->xdev, XenbusStateClosing); if (!err && prev_state == XenbusStateConnected) pcifront_disconnect(pdev); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue Apr 11 18:54:18 2006 -0600 @@ -277,6 +277,7 @@ static int __init privcmd_init(void) set_bit(__HYPERVISOR_mmu_update, hypercall_permission_map); set_bit(__HYPERVISOR_mmuext_op, hypercall_permission_map); set_bit(__HYPERVISOR_xen_version, hypercall_permission_map); + set_bit(__HYPERVISOR_sched_op, hypercall_permission_map); privcmd_intf = create_xen_proc_entry("privcmd", 0400); if (privcmd_intf != NULL) diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue Apr 11 18:54:18 2006 -0600 @@ -55,6 +55,7 @@ static int tpmback_remove(struct xenbus_ be->backend_watch.node = NULL; } if (be->tpmif) { + vtpm_release_packets(be->tpmif, 0); tpmif_put(be->tpmif); be->tpmif = NULL; } @@ -87,7 +88,7 @@ static int tpmback_probe(struct xenbus_d goto fail; } - err = xenbus_switch_state(dev, XBT_NULL, XenbusStateInitWait); + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) { goto fail; } @@ -175,7 +176,7 @@ static void frontend_changed(struct xenb break; case XenbusStateClosing: - xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing); + xenbus_switch_state(dev, XenbusStateClosing); break; case XenbusStateClosed: @@ -247,18 +248,15 @@ again: goto abort; } - err = xenbus_switch_state(dev, xbt, XenbusStateConnected); - if (err) - goto abort; - - be->tpmif->status = CONNECTED; - err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) goto again; - if (err) { + if (err) xenbus_dev_fatal(be->dev, err, "end of transaction"); - } + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (!err) + be->tpmif->status = CONNECTED; return; abort: xenbus_transaction_end(xbt, 1); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Tue Apr 11 18:54:18 2006 -0600 @@ -65,14 +65,18 @@ static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs); static void tpmif_rx_action(unsigned long unused); -static void tpmif_connect(struct tpm_private *tp, domid_t domid); +static int tpmif_connect(struct xenbus_device *dev, + struct tpm_private *tp, + domid_t domid); static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0); -static int tpm_allocate_buffers(struct tpm_private *tp); +static int tpmif_allocate_tx_buffers(struct tpm_private *tp); +static void tpmif_free_tx_buffers(struct tpm_private *tp); static void tpmif_set_connected_state(struct tpm_private *tp, u8 newstate); static int tpm_xmit(struct tpm_private *tp, const u8 * buf, size_t count, int userbuffer, void *remember); +static void destroy_tpmring(struct tpm_private *tp); #define DPRINTK(fmt, args...) \ pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args) @@ -80,6 +84,8 @@ static int tpm_xmit(struct tpm_private * printk(KERN_INFO "xen_tpm_fr: " fmt, ##args) #define WPRINTK(fmt, args...) \ printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args) + +#define GRANT_INVALID_REF 0 static inline int @@ -119,6 +125,14 @@ static inline struct tx_buffer *tx_buffe } +static inline void tx_buffer_free(struct tx_buffer *txb) +{ + if (txb) { + free_page((long)txb->data); + kfree(txb); + } +} + /************************************************************** Utility function for the tpm_private structure **************************************************************/ @@ -128,21 +142,27 @@ static inline void tpm_private_init(stru init_waitqueue_head(&tp->wait_q); } +static inline void tpm_private_free(void) +{ + tpmif_free_tx_buffers(my_priv); + kfree(my_priv); + my_priv = NULL; +} + static struct tpm_private *tpm_private_get(void) { + int err; if (!my_priv) { my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL); if (my_priv) { tpm_private_init(my_priv); + err = tpmif_allocate_tx_buffers(my_priv); + if (err < 0) { + tpm_private_free(); + } } } return my_priv; -} - -static inline void tpm_private_free(void) -{ - kfree(my_priv); - my_priv = NULL; } /************************************************************** @@ -233,14 +253,14 @@ static int setup_tpmring(struct xenbus_d tpmif_tx_interface_t *sring; int err; + tp->ring_ref = GRANT_INVALID_REF; + sring = (void *)__get_free_page(GFP_KERNEL); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; } tp->tx = sring; - - tpm_allocate_buffers(tp); err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx)); if (err < 0) { @@ -251,14 +271,13 @@ static int setup_tpmring(struct xenbus_d } tp->ring_ref = err; - err = xenbus_alloc_evtchn(dev, &tp->evtchn); + err = tpmif_connect(dev, tp, dev->otherend_id); if (err) goto fail; - tpmif_connect(tp, dev->otherend_id); - return 0; fail: + destroy_tpmring(tp); return err; } @@ -266,14 +285,17 @@ static void destroy_tpmring(struct tpm_p static void destroy_tpmring(struct tpm_private *tp) { tpmif_set_connected_state(tp, 0); - if (tp->tx != NULL) { + + if (tp->ring_ref != GRANT_INVALID_REF) { gnttab_end_foreign_access(tp->ring_ref, 0, (unsigned long)tp->tx); + tp->ring_ref = GRANT_INVALID_REF; tp->tx = NULL; } if (tp->irq) - unbind_from_irqhandler(tp->irq, NULL); + unbind_from_irqhandler(tp->irq, tp); + tp->evtchn = tp->irq = 0; } @@ -377,6 +399,9 @@ static int tpmfront_probe(struct xenbus_ int handle; struct tpm_private *tp = tpm_private_get(); + if (!tp) + return -ENOMEM; + err = xenbus_scanf(XBT_NULL, dev->nodename, "handle", "%i", &handle); if (XENBUS_EXIST_ERR(err)) @@ -402,15 +427,14 @@ static int tpmfront_probe(struct xenbus_ static int tpmfront_remove(struct xenbus_device *dev) { - struct tpm_private *tp = dev->data; + struct tpm_private *tp = (struct tpm_private *)dev->data; destroy_tpmring(tp); return 0; } -static int -tpmfront_suspend(struct xenbus_device *dev) -{ - struct tpm_private *tp = dev->data; +static int tpmfront_suspend(struct xenbus_device *dev) +{ + struct tpm_private *tp = (struct tpm_private *)dev->data; u32 ctr; /* lock, so no app can send */ @@ -437,29 +461,35 @@ tpmfront_suspend(struct xenbus_device *d return 0; } -static int -tpmfront_resume(struct xenbus_device *dev) -{ - struct tpm_private *tp = dev->data; +static int tpmfront_resume(struct xenbus_device *dev) +{ + struct tpm_private *tp = (struct tpm_private *)dev->data; + destroy_tpmring(tp); return talk_to_backend(dev, tp); } -static void -tpmif_connect(struct tpm_private *tp, domid_t domid) +static int tpmif_connect(struct xenbus_device *dev, + struct tpm_private *tp, + domid_t domid) { int err; tp->backend_id = domid; + + err = xenbus_alloc_evtchn(dev, &tp->evtchn); + if (err) + return err; err = bind_evtchn_to_irqhandler(tp->evtchn, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); if (err <= 0) { WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); - return; + return err; } tp->irq = err; + return 0; } static struct xenbus_device_id tpmfront_ids[] = { @@ -488,19 +518,30 @@ static void __exit exit_tpm_xenbus(void) xenbus_unregister_driver(&tpmfront); } - -static int -tpm_allocate_buffers(struct tpm_private *tp) +static int tpmif_allocate_tx_buffers(struct tpm_private *tp) { unsigned int i; - for (i = 0; i < TPMIF_TX_RING_SIZE; i++) + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) { tp->tx_buffers[i] = tx_buffer_alloc(); - return 1; -} - -static void -tpmif_rx_action(unsigned long priv) + if (!tp->tx_buffers[i]) { + tpmif_free_tx_buffers(tp); + return -ENOMEM; + } + } + return 0; +} + +static void tpmif_free_tx_buffers(struct tpm_private *tp) +{ + unsigned int i; + + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) { + tx_buffer_free(tp->tx_buffers[i]); + } +} + +static void tpmif_rx_action(unsigned long priv) { struct tpm_private *tp = (struct tpm_private *)priv; @@ -545,8 +586,7 @@ exit: } -static irqreturn_t -tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs) +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs) { struct tpm_private *tp = tpm_priv; unsigned long flags; @@ -560,10 +600,9 @@ tpmif_int(int irq, void *tpm_priv, struc } -static int -tpm_xmit(struct tpm_private *tp, - const u8 * buf, size_t count, int isuserbuffer, - void *remember) +static int tpm_xmit(struct tpm_private *tp, + const u8 * buf, size_t count, int isuserbuffer, + void *remember) { tpmif_tx_request_t *tx; TPMIF_RING_IDX i; @@ -693,8 +732,7 @@ static void tpmif_set_connected_state(st * ================================================================= */ -static int __init -tpmif_init(void) +static int __init tpmif_init(void) { IPRINTK("Initialising the vTPM driver.\n"); if ( gnttab_alloc_grant_references ( TPMIF_TX_RING_SIZE, @@ -709,8 +747,7 @@ tpmif_init(void) module_init(tpmif_init); -static void __exit -tpmif_exit(void) +static void __exit tpmif_exit(void) { exit_tpm_xenbus(); gnttab_free_grant_references(gref_head); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,4 +1,8 @@ obj-y += xenbus.o obj-y += xenbus.o +obj-$(CONFIG_XEN_BACKEND) += xenbus_be.o + +xenbus_be-objs = +xenbus_be-objs += xenbus_backend_client.o xenbus-objs = xenbus-objs += xenbus_client.o diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Tue Apr 11 18:54:18 2006 -0600 @@ -84,9 +84,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path2); EXPORT_SYMBOL_GPL(xenbus_watch_path2); -int xenbus_switch_state(struct xenbus_device *dev, - xenbus_transaction_t xbt, - XenbusState state) +int xenbus_switch_state(struct xenbus_device *dev, XenbusState state) { /* We check whether the state is currently set to the given value, and if not, then the state is set. We don't want to unconditionally @@ -94,6 +92,12 @@ int xenbus_switch_state(struct xenbus_de unnecessarily. Furthermore, if the node has gone, we don't write to it, as the device will be tearing down, and we don't want to resurrect that directory. + + Note that, because of this cached value of our state, this function + will not work inside a Xenstore transaction (something it was + trying to in the past) because dev->state would not get reset if + the transaction was aborted. + */ int current_state; @@ -102,12 +106,12 @@ int xenbus_switch_state(struct xenbus_de if (state == dev->state) return 0; - err = xenbus_scanf(xbt, dev->nodename, "state", "%d", - ¤t_state); + err = xenbus_scanf(XBT_NULL, dev->nodename, "state", "%d", + ¤t_state); if (err != 1) return 0; - err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); + err = xenbus_printf(XBT_NULL, dev->nodename, "state", "%d", state); if (err) { if (state != XenbusStateClosing) /* Avoid looping */ xenbus_dev_fatal(dev, err, "writing new state"); @@ -193,7 +197,7 @@ void xenbus_dev_fatal(struct xenbus_devi _dev_error(dev, err, fmt, ap); va_end(ap); - xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing); + xenbus_switch_state(dev, XenbusStateClosing); } EXPORT_SYMBOL_GPL(xenbus_dev_fatal); @@ -255,134 +259,6 @@ int xenbus_free_evtchn(struct xenbus_dev xenbus_dev_error(dev, err, "freeing event channel %d", port); return err; } - - -/* Based on Rusty Russell's skeleton driver's map_page */ -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) -{ - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; - struct vm_struct *area; - - *vaddr = NULL; - - area = alloc_vm_area(PAGE_SIZE); - if (!area) - return -ENOMEM; - - op.host_addr = (unsigned long)area->addr; - - lock_vm_area(area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); - unlock_vm_area(area); - - if (op.status != GNTST_okay) { - free_vm_area(area); - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - return op.status; - } - - /* Stuff the handle in an unused field */ - area->phys_addr = (unsigned long)op.handle; - - *vaddr = area->addr; - return 0; -} -EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); - - -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr) -{ - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; - - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); - - if (op.status != GNTST_okay) { - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - } else - *handle = op.handle; - - return op.status; -} -EXPORT_SYMBOL_GPL(xenbus_map_ring); - - -/* Based on Rusty Russell's skeleton driver's unmap_page */ -int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) -{ - struct vm_struct *area; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; - - /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) - * method so that we don't have to muck with vmalloc internals here. - * We could force the user to hang on to their struct vm_struct from - * xenbus_map_ring_valloc, but these 6 lines considerably simplify - * this API. - */ - read_lock(&vmlist_lock); - for (area = vmlist; area != NULL; area = area->next) { - if (area->addr == vaddr) - break; - } - read_unlock(&vmlist_lock); - - if (!area) { - xenbus_dev_error(dev, -ENOENT, - "can't find mapped virtual address %p", vaddr); - return GNTST_bad_virt_addr; - } - - op.handle = (grant_handle_t)area->phys_addr; - - lock_vm_area(area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); - unlock_vm_area(area); - - if (op.status == GNTST_okay) - free_vm_area(area); - else - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - (int16_t)area->phys_addr, op.status); - - return op.status; -} -EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); - - -int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr) -{ - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .handle = handle, - }; - - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); - - if (op.status != GNTST_okay) - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - handle, op.status); - - return op.status; -} -EXPORT_SYMBOL_GPL(xenbus_unmap_ring); XenbusState xenbus_read_driver_state(const char *path) diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue Apr 11 18:54:18 2006 -0600 @@ -3,7 +3,7 @@ * * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 Mike Wray, Hewlett-Packard - * Copyright (C) 2005 XenSource Ltd + * Copyright (C) 2005, 2006 XenSource Ltd * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -49,6 +49,7 @@ #include <xen/xenbus.h> #include <xen/xen_proc.h> #include <xen/evtchn.h> +#include <xen/features.h> #include "xenbus_comms.h" @@ -364,7 +365,7 @@ static int xenbus_dev_probe(struct devic return 0; fail: xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); - xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed); + xenbus_switch_state(dev, XenbusStateClosed); return -ENODEV; } @@ -381,7 +382,7 @@ static int xenbus_dev_remove(struct devi if (drv->remove) drv->remove(dev); - xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed); + xenbus_switch_state(dev, XenbusStateClosed); return 0; } @@ -882,7 +883,7 @@ static int all_devices_ready_(struct dev int *result = data; if (xendev->state != XenbusStateConnected) { - result = 0; + *result = 0; return 1; } @@ -901,8 +902,6 @@ static int all_devices_ready(void) void xenbus_probe(void *unused) { - int i; - BUG_ON((xenstored_ready <= 0)); /* Enumerate devices in xenstore. */ @@ -915,28 +914,6 @@ void xenbus_probe(void *unused) /* Notify others that xenstore is up */ notifier_call_chain(&xenstore_chain, 0, NULL); - - /* On a 10 second timeout, waiting for all devices currently - configured. We need to do this to guarantee that the filesystems - and / or network devices needed for boot are available, before we - can allow the boot to proceed. - - A possible improvement here would be to have the tools add a - per-device flag to the store entry, indicating whether it is needed - at boot time. This would allow people who knew what they were - doing to accelerate their boot slightly, but of course needs tools - or manual intervention to set up those flags correctly. - */ - for (i = 0; i < 10 * HZ; i++) { - if (all_devices_ready()) - return; - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - printk(KERN_WARNING - "XENBUS: Timeout connecting to devices!\n"); } @@ -983,6 +960,7 @@ static int __init xenbus_probe_init(void static int __init xenbus_probe_init(void) { int err = 0, dom0; + unsigned long page = 0; DPRINTK(""); @@ -991,11 +969,9 @@ static int __init xenbus_probe_init(void return -ENODEV; } - /* Register ourselves with the kernel bus & device subsystems */ + /* Register ourselves with the kernel bus subsystem */ bus_register(&xenbus_frontend.bus); bus_register(&xenbus_backend.bus); - device_register(&xenbus_frontend.dev); - device_register(&xenbus_backend.dev); /* * Domain0 doesn't have a store_evtchn or store_mfn yet. @@ -1003,11 +979,7 @@ static int __init xenbus_probe_init(void dom0 = (xen_start_info->store_evtchn == 0); if (dom0) { - - unsigned long page; evtchn_op_t op = { 0 }; - int ret; - /* Allocate page. */ page = get_zeroed_page(GFP_KERNEL); @@ -1023,8 +995,10 @@ static int __init xenbus_probe_init(void op.u.alloc_unbound.dom = DOMID_SELF; op.u.alloc_unbound.remote_dom = 0; - ret = HYPERVISOR_event_channel_op(&op); - BUG_ON(ret); + err = HYPERVISOR_event_channel_op(&op); + if (err == -ENOSYS) + goto err; + BUG_ON(err); xen_start_info->store_evtchn = op.u.alloc_unbound.port; /* And finally publish the above info in /proc/xen */ @@ -1047,16 +1021,64 @@ static int __init xenbus_probe_init(void if (err) { printk(KERN_WARNING "XENBUS: Error initializing xenstore comms: %i\n", err); - return err; - } + goto err; + } + + /* Register ourselves with the kernel device subsystem */ + device_register(&xenbus_frontend.dev); + device_register(&xenbus_backend.dev); if (!dom0) xenbus_probe(NULL); return 0; + + err: + if (page) + free_page(page); + + /* + * Do not unregister the xenbus front/backend buses here. The + * buses must exist because front/backend drivers will use + * them when they are registered. + */ + + return err; } postcore_initcall(xenbus_probe_init); + + +/* + * On a 10 second timeout, wait for all devices currently configured. We need + * to do this to guarantee that the filesystems and / or network devices + * needed for boot are available, before we can allow the boot to proceed. + * + * This needs to be on a late_initcall, to happen after the frontend device + * drivers have been initialised, but before the root fs is mounted. + * + * A possible improvement here would be to have the tools add a per-device + * flag to the store entry, indicating whether it is needed at boot time. + * This would allow people who knew what they were doing to accelerate their + * boot slightly, but of course needs tools or manual intervention to set up + * those flags correctly. + */ +static int __init wait_for_devices(void) +{ + unsigned long timeout = jiffies + 10*HZ; + + while (time_before(jiffies, timeout)) { + if (all_devices_ready()) + return 0; + schedule_timeout_interruptible(HZ/10); + } + + printk(KERN_WARNING "XENBUS: Timeout connecting to devices!\n"); + return 0; +} + +late_initcall(wait_for_devices); + /* * Local variables: diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Apr 11 18:54:18 2006 -0600 @@ -685,6 +685,24 @@ void xs_resume(void) up_write(&xs_state.suspend_mutex); } +static int xenwatch_handle_callback(void *data) +{ + struct xs_stored_msg *msg = data; + + msg->u.watch.handle->callback(msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + + kfree(msg->u.watch.vec); + kfree(msg); + + /* Kill this kthread if we were spawned just for this callback. */ + if (current->pid != xenwatch_pid) + do_exit(0); + + return 0; +} + static int xenwatch_thread(void *unused) { struct list_head *ent; @@ -707,12 +725,11 @@ static int xenwatch_thread(void *unused) if (ent != &watch_events) { msg = list_entry(ent, struct xs_stored_msg, list); - msg->u.watch.handle->callback( - msg->u.watch.handle, - (const char **)msg->u.watch.vec, - msg->u.watch.vec_size); - kfree(msg->u.watch.vec); - kfree(msg); + if (msg->u.watch.handle->flags & XBWF_new_thread) + kthread_run(xenwatch_handle_callback, + msg, "xenwatch_cb"); + else + xenwatch_handle_callback(msg); } mutex_unlock(&xenwatch_mutex); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Tue Apr 11 18:54:18 2006 -0600 @@ -33,10 +33,9 @@ #ifndef __HYPERCALL_H__ #define __HYPERCALL_H__ -#include <xen/interface/xen.h> -#include <xen/interface/sched.h> -#include <xen/interface/nmi.h> -#include <linux/errno.h> +#ifndef __HYPERVISOR_H__ +# error "please don't include this file directly" +#endif #define __STR(x) #x #define STR(x) __STR(x) @@ -167,35 +166,17 @@ HYPERVISOR_fpu_taskswitch( } static inline int +HYPERVISOR_sched_op_compat( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op_compat, cmd, arg); +} + +static inline int HYPERVISOR_sched_op( - int cmd, unsigned long arg) + int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); -} - -static inline int -HYPERVISOR_sched_op_new( - int cmd, void *arg) -{ - return _hypercall2(int, sched_op_new, cmd, arg); -} - -static inline int -HYPERVISOR_poll( - evtchn_port_t *ports, unsigned int nr_ports, u64 timeout) -{ - struct sched_poll sched_poll = { - .ports = ports, - .nr_ports = nr_ports, - .timeout = jiffies_to_st(timeout) - }; - - int rc = HYPERVISOR_sched_op_new(SCHEDOP_poll, &sched_poll); - - if (rc == -ENOSYS) - rc = HYPERVISOR_sched_op(SCHEDOP_yield, 0); - - return rc; } static inline long @@ -327,8 +308,18 @@ HYPERVISOR_suspend( HYPERVISOR_suspend( unsigned long srec) { - return _hypercall3(int, sched_op, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, + &sched_shutdown, srec); + + if (rc == -ENOSYS) + rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); + + return rc; } static inline int @@ -337,6 +328,21 @@ HYPERVISOR_nmi_op( { return _hypercall2(int, nmi_op, op, arg); } + +static inline int +HYPERVISOR_callback_op( + int cmd, void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline int +HYPERVISOR_xenoprof_op( + int op, unsigned long arg1, unsigned long arg2) +{ + return _hypercall3(int, xenoprof_op, op, arg1, arg2); +} + #endif /* __HYPERCALL_H__ */ diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Tue Apr 11 18:54:18 2006 -0600 @@ -37,8 +37,11 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/version.h> +#include <linux/errno.h> #include <xen/interface/xen.h> #include <xen/interface/dom0_ops.h> +#include <xen/interface/sched.h> +#include <xen/interface/nmi.h> #include <asm/ptrace.h> #include <asm/page.h> #if defined(__i386__) @@ -115,6 +118,64 @@ u64 jiffies_to_st(unsigned long jiffies) #define xen_init() (0) +static inline int +HYPERVISOR_yield( + void) +{ + int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); + + return rc; +} + +static inline int +HYPERVISOR_block( + void) +{ + int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0); + + return rc; +} + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown sched_shutdown = { + .reason = reason + }; + + int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason); + + return rc; +} + +static inline int +HYPERVISOR_poll( + evtchn_port_t *ports, unsigned int nr_ports, u64 timeout) +{ + struct sched_poll sched_poll = { + .ports = ports, + .nr_ports = nr_ports, + .timeout = jiffies_to_st(timeout) + }; + + int rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); + + return rc; +} + static inline void MULTI_update_va_mapping( multicall_entry_t *mcl, unsigned long va, diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h Tue Apr 11 18:54:18 2006 -0600 @@ -102,6 +102,7 @@ static inline void * phys_to_virt(unsign */ #define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) #define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page))) +#define page_to_bus(page) (phys_to_machine(page_to_pseudophys(page))) #define bio_to_pseudophys(bio) (page_to_pseudophys(bio_page((bio))) + \ (unsigned long) bio_offset((bio))) diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue Apr 11 18:54:18 2006 -0600 @@ -5,6 +5,8 @@ * This is included late in kernel/setup.c so that it can make * use of all of the static functions. **/ + +#include <xen/interface/callback.h> static char * __init machine_specific_memory_setup(void) { @@ -23,6 +25,14 @@ static void __init machine_specific_arch static void __init machine_specific_arch_setup(void) { struct xen_platform_parameters pp; + struct callback_register event = { + .type = CALLBACKTYPE_event, + .address = { __KERNEL_CS, (unsigned long)hypervisor_callback }, + }; + struct callback_register failsafe = { + .type = CALLBACKTYPE_failsafe, + .address = { __KERNEL_CS, (unsigned long)failsafe_callback }, + }; struct xennmi_callback cb; if (xen_feature(XENFEAT_auto_translated_physmap) && @@ -32,9 +42,8 @@ static void __init machine_specific_arch memset(empty_zero_page, 0, sizeof(empty_zero_page)); } - HYPERVISOR_set_callbacks( - __KERNEL_CS, (unsigned long)hypervisor_callback, - __KERNEL_CS, (unsigned long)failsafe_callback); + HYPERVISOR_callback_op(CALLBACKOP_register, &event); + HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); cb.handler_address = (unsigned long)&nmi; HYPERVISOR_nmi_op(XENNMI_register_callback, &cb); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue Apr 11 18:54:18 2006 -0600 @@ -33,11 +33,9 @@ #ifndef __HYPERCALL_H__ #define __HYPERCALL_H__ -#include <xen/interface/xen.h> -#include <xen/interface/sched.h> -#include <xen/interface/dom0_ops.h> -#include <linux/errno.h> -#include <asm/hypervisor.h> // for jiffies_to_st() +#ifndef __HYPERVISOR_H__ +# error "please don't include this file directly" +#endif /* FIXME: temp place to hold these page related macros */ #include <asm/page.h> @@ -165,35 +163,17 @@ }) static inline int +HYPERVISOR_sched_op_compat( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op_compat, cmd, arg); +} + +static inline int HYPERVISOR_sched_op( - int cmd, unsigned long arg) + int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); -} - -static inline int -HYPERVISOR_sched_op_new( - int cmd, void *arg) -{ - return _hypercall2(int, sched_op_new, cmd, arg); -} - -static inline int -HYPERVISOR_poll( - evtchn_port_t *ports, unsigned int nr_ports, unsigned long timeout) -{ - struct sched_poll sched_poll = { - .ports = ports, - .nr_ports = nr_ports, - .timeout = jiffies_to_st(timeout) - }; - - int rc = HYPERVISOR_sched_op_new(SCHEDOP_poll, &sched_poll); - - if (rc == -ENOSYS) - rc = HYPERVISOR_sched_op(SCHEDOP_yield, 0); - - return rc; } static inline long @@ -273,8 +253,18 @@ HYPERVISOR_suspend( HYPERVISOR_suspend( unsigned long srec) { - return _hypercall3(int, sched_op, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, + &sched_shutdown, srec); + + if (rc == -ENOSYS) + rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); + + return rc; } extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs); diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Tue Apr 11 18:54:18 2006 -0600 @@ -37,8 +37,10 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/version.h> +#include <linux/errno.h> #include <xen/interface/xen.h> #include <xen/interface/dom0_ops.h> +#include <xen/interface/sched.h> #include <asm/ptrace.h> #include <asm/page.h> #include <asm/xen/privop.h> // for running_on_xen @@ -54,6 +56,64 @@ int xen_init(void); #define jiffies_to_st(j) 0 #include <asm/hypercall.h> + +static inline int +HYPERVISOR_yield( + void) +{ + int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); + + return rc; +} + +static inline int +HYPERVISOR_block( + void) +{ + int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0); + + return rc; +} + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown sched_shutdown = { + .reason = reason + }; + + int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason); + + return rc; +} + +static inline int +HYPERVISOR_poll( + evtchn_port_t *ports, unsigned int nr_ports, u64 timeout) +{ + struct sched_poll sched_poll = { + .ports = ports, + .nr_ports = nr_ports, + .timeout = jiffies_to_st(timeout) + }; + + int rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); + + return rc; +} // for drivers/xen/privcmd/privcmd.c #define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e) diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Tue Apr 11 18:54:18 2006 -0600 @@ -37,10 +37,9 @@ #ifndef __HYPERCALL_H__ #define __HYPERCALL_H__ -#include <xen/interface/xen.h> -#include <xen/interface/sched.h> -#include <xen/interface/nmi.h> -#include <linux/errno.h> +#ifndef __HYPERVISOR_H__ +# error "please don't include this file directly" +#endif #define __STR(x) #x #define STR(x) __STR(x) @@ -172,35 +171,17 @@ HYPERVISOR_fpu_taskswitch( } static inline int +HYPERVISOR_sched_op_compat( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op_compat, cmd, arg); +} + +static inline int HYPERVISOR_sched_op( - int cmd, unsigned long arg) + int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); -} - -static inline int -HYPERVISOR_sched_op_new( - int cmd, void *arg) -{ - return _hypercall2(int, sched_op_new, cmd, arg); -} - -static inline int -HYPERVISOR_poll( - evtchn_port_t *ports, unsigned int nr_ports, u64 timeout) -{ - struct sched_poll sched_poll = { - .ports = ports, - .nr_ports = nr_ports, - .timeout = jiffies_to_st(timeout) - }; - - int rc = HYPERVISOR_sched_op_new(SCHEDOP_poll, &sched_poll); - - if (rc == -ENOSYS) - rc = HYPERVISOR_sched_op(SCHEDOP_yield, 0); - - return rc; } static inline long @@ -328,8 +309,18 @@ HYPERVISOR_suspend( HYPERVISOR_suspend( unsigned long srec) { - return _hypercall3(int, sched_op, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, + &sched_shutdown, srec); + + if (rc == -ENOSYS) + rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); + + return rc; } static inline int @@ -337,6 +328,20 @@ HYPERVISOR_nmi_op( unsigned long op, void *arg) { return _hypercall2(int, nmi_op, op, arg); +} + +static inline int +HYPERVISOR_callback_op( + int cmd, void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline int +HYPERVISOR_xenoprof_op( + int op, unsigned long arg1, unsigned long arg2) +{ + return _hypercall3(int, xenoprof_op, op, arg1, arg2); } #endif /* __HYPERCALL_H__ */ diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h Tue Apr 11 18:54:18 2006 -0600 @@ -130,6 +130,7 @@ static inline void * phys_to_virt(unsign */ #define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) #define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page))) +#define page_to_bus(page) (phys_to_machine(page_to_pseudophys(page))) #define bio_to_pseudophys(bio) (page_to_pseudophys(bio_page((bio))) + \ (unsigned long) bio_offset((bio))) diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h Tue Apr 11 18:54:18 2006 -0600 @@ -6,20 +6,33 @@ * use of all of the static functions. **/ +#include <xen/interface/callback.h> + extern void hypervisor_callback(void); extern void failsafe_callback(void); extern void nmi(void); static void __init machine_specific_arch_setup(void) { + struct callback_register event = { + .type = CALLBACKTYPE_event, + .address = (unsigned long) hypervisor_callback, + }; + struct callback_register failsafe = { + .type = CALLBACKTYPE_failsafe, + .address = (unsigned long)failsafe_callback, + }; + struct callback_register syscall = { + .type = CALLBACKTYPE_syscall, + .address = (unsigned long)system_call, + }; #ifdef CONFIG_X86_LOCAL_APIC struct xennmi_callback cb; #endif - HYPERVISOR_set_callbacks( - (unsigned long) hypervisor_callback, - (unsigned long) failsafe_callback, - (unsigned long) system_call); + HYPERVISOR_callback_op(CALLBACKOP_register, &event); + HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); + HYPERVISOR_callback_op(CALLBACKOP_register, &syscall); #ifdef CONFIG_X86_LOCAL_APIC cb.handler_address = (unsigned long)&nmi; diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/linux/skbuff.h --- a/linux-2.6-xen-sparse/include/linux/skbuff.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/linux/skbuff.h Tue Apr 11 18:54:18 2006 -0600 @@ -189,7 +189,7 @@ enum { * @local_df: allow local fragmentation * @cloned: Head may be cloned (check refcnt to be sure) * @nohdr: Payload reference only, must not modify header - * @proto_csum_valid: Protocol csum validated since arriving at localhost + * @proto_data_valid: Protocol data validated since arriving at localhost * @proto_csum_blank: Protocol csum must be added before leaving localhost * @pkt_type: Packet class * @fclone: skbuff clone status @@ -271,7 +271,7 @@ struct sk_buff { ipvs_property:1; #else ipvs_property:1, - proto_csum_valid:1, + proto_data_valid:1, proto_csum_blank:1; #endif __be16 protocol; diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/xen/gnttab.h --- a/linux-2.6-xen-sparse/include/xen/gnttab.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Tue Apr 11 18:54:18 2006 -0600 @@ -110,6 +110,9 @@ void gnttab_grant_foreign_transfer_ref(g #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) #endif +int gnttab_suspend(void); +int gnttab_resume(void); + #endif /* __ASM_GNTTAB_H__ */ /* diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/xen/xenbus.h --- a/linux-2.6-xen-sparse/include/xen/xenbus.h Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Tue Apr 11 18:54:18 2006 -0600 @@ -55,8 +55,17 @@ struct xenbus_watch /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, const char **vec, unsigned int len); -}; - + + /* See XBWF_ definitions below. */ + unsigned long flags; +}; + +/* + * Execute callback in its own kthread. Useful if the callback is long + * running or heavily serialised, to avoid taking out the main xenwatch thread + * for a long period of time (or even unwittingly causing a deadlock). + */ +#define XBWF_new_thread 1 /* A xenbus device. */ struct xenbus_device { @@ -195,14 +204,10 @@ int xenbus_watch_path2(struct xenbus_dev /** * Advertise in the store a change of the given driver to the given new_state. - * Perform the change inside the given transaction xbt. xbt may be NULL, in - * which case this is performed inside its own transaction. Return 0 on - * success, or -errno on error. On error, the device will switch to - * XenbusStateClosing, and the error will be saved in the store. - */ -int xenbus_switch_state(struct xenbus_device *dev, - xenbus_transaction_t xbt, - XenbusState new_state); + * Return 0 on success, or -errno on error. On error, the device will switch + * to XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state); /** diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/net/core/dev.c --- a/linux-2.6-xen-sparse/net/core/dev.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/net/core/dev.c Tue Apr 11 18:54:18 2006 -0600 @@ -1649,12 +1649,12 @@ int netif_receive_skb(struct sk_buff *sk #ifdef CONFIG_XEN switch (skb->ip_summed) { case CHECKSUM_UNNECESSARY: - skb->proto_csum_valid = 1; + skb->proto_data_valid = 1; break; case CHECKSUM_HW: /* XXX Implement me. */ default: - skb->proto_csum_valid = 0; + skb->proto_data_valid = 0; break; } #endif diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/net/core/skbuff.c --- a/linux-2.6-xen-sparse/net/core/skbuff.c Tue Apr 11 13:55:47 2006 -0600 +++ b/linux-2.6-xen-sparse/net/core/skbuff.c Tue Apr 11 18:54:18 2006 -0600 @@ -428,7 +428,7 @@ struct sk_buff *skb_clone(struct sk_buff n->cloned = 1; n->nohdr = 0; #ifdef CONFIG_XEN - C(proto_csum_valid); + C(proto_data_valid); C(proto_csum_blank); #endif C(pkt_type); diff -r 5719550652a1 -r 5cc367720223 tools/Makefile --- a/tools/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -24,14 +24,14 @@ SUBDIRS += pygrub SUBDIRS += pygrub endif -.PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean - +.PHONY: all all: check @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ done $(MAKE) ioemu +.PHONY: install install: check @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ @@ -39,18 +39,22 @@ install: check $(MAKE) ioemuinstall $(INSTALL_DIR) -p $(DESTDIR)/var/xen/dump +.PHONY: clean clean: check_clean @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ done $(MAKE) ioemuclean +.PHONY: check check: $(MAKE) -C check +.PHONY: check_clean check_clean: $(MAKE) -C check clean +.PHONY: ioemu ioemuinstall ioemuclean ifndef XEN_NO_IOEMU ioemu ioemuinstall ioemuclean: [ -f ioemu/config-host.h ] || \ diff -r 5719550652a1 -r 5cc367720223 tools/Rules.mk --- a/tools/Rules.mk Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/Rules.mk Tue Apr 11 18:54:18 2006 -0600 @@ -12,6 +12,8 @@ XEN_LIBXENSTAT = $(XEN_ROOT)/tools/x X11_LDPATH = -L/usr/X11R6/$(LIBDIR) +CFLAGS += -D__XEN_INTERFACE_VERSION__=0x00030101 + %.opic: %.c $(CC) $(CPPFLAGS) -DPIC $(CFLAGS) -fPIC -c -o $@ $< @@ -21,6 +23,7 @@ X11_LDPATH = -L/usr/X11R6/$(LIBDIR) %.o: %.cc $(CC) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< +.PHONY: mk-symlinks mk-symlinks: LINUX_ROOT=$(XEN_ROOT)/linux-2.6-xen-sparse mk-symlinks: mkdir -p xen diff -r 5719550652a1 -r 5cc367720223 tools/blktap/Makefile --- a/tools/blktap/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/blktap/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -39,11 +39,13 @@ IBINS := LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) +.PHONY: all all: mk-symlinks libblktap.so #blkdump @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ done +.PHONY: install install: all $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) $(INSTALL_DIR) -p $(DESTDIR)/usr/include @@ -54,12 +56,14 @@ install: all $(MAKE) -C $$subdir $@; \ done +.PHONY: clean clean: rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ done +.PHONY: rpm rpm: all rm -rf staging mkdir staging @@ -82,6 +86,7 @@ blkdump: libblktap.so .PHONY: TAGS clean install mk-symlinks rpm +.PHONY: TAGS TAGS: etags -t $(SRCS) *.h diff -r 5719550652a1 -r 5cc367720223 tools/blktap/parallax/Makefile --- a/tools/blktap/parallax/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/blktap/parallax/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -43,11 +43,14 @@ OBJS = $(patsubst %.c,%.o,$(SRCS)) OBJS = $(patsubst %.c,%.o,$(SRCS)) IBINS = parallax $(VDI_TOOLS) +.PHONY: all all: $(VDI_TOOLS) parallax blockstored +.PHONY: install install: all $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR) +.PHONY: clean clean: rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest @@ -57,5 +60,4 @@ parallax: $(PLX_SRCS) ${VDI_TOOLS}: %: %.c $(VDI_SRCS) $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS) -.PHONY: TAGS clean install rpm -include $(DEPS) diff -r 5719550652a1 -r 5cc367720223 tools/blktap/ublkback/Makefile --- a/tools/blktap/ublkback/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/blktap/ublkback/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -21,12 +21,16 @@ DEPS = .*.d OBJS = $(patsubst %.c,%.o,$(SRCS)) +.PHONY: all all: $(IBIN) LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse) +.PHONY: install install: $(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR) + +.PHONY: clean clean: rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN) @@ -34,6 +38,4 @@ ublkback: $(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L.. \ -lblktap -laio ublkback.c ublkbacklib.c -pg -.PHONY: clean install - -include $(DEPS) diff -r 5719550652a1 -r 5cc367720223 tools/check/Makefile --- a/tools/check/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/check/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,7 +1,9 @@ +.PHONY: all all: build # Check this machine is OK for building on. +.PHONY: build build: ./chk build @@ -9,8 +11,10 @@ build: # DO NOT use this check from 'make install' in the parent # directory, as that target can be used to make an installable # copy rather than actually installing. +.PHONY: install install: ./chk install +.PHONY: clean clean: - ./chk clean \ No newline at end of file + ./chk clean diff -r 5719550652a1 -r 5cc367720223 tools/console/Makefile --- a/tools/console/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/console/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -16,8 +16,10 @@ CFLAGS += -I $(XEN_XENSTORE) BIN = xenconsoled xenconsole +.PHONY: all all: $(BIN) +.PHONY: clean clean: $(RM) *.a *.so *.o *.rpm $(BIN) $(RM) client/*.o daemon/*.o @@ -30,6 +32,7 @@ xenconsole: $(patsubst %.c,%.o,$(wildcar $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \ -lxenctrl -lxenstore +.PHONY: install install: $(BIN) $(INSTALL_DIR) -p $(DESTDIR)/$(DAEMON_INSTALL_DIR) $(INSTALL_PROG) xenconsoled $(DESTDIR)/$(DAEMON_INSTALL_DIR) diff -r 5719550652a1 -r 5cc367720223 tools/console/client/main.c --- a/tools/console/client/main.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/console/client/main.c Tue Apr 11 18:54:18 2006 -0600 @@ -36,7 +36,6 @@ #include <errno.h> #include <pty.h> -#include "xenctrl.h" #include "xs.h" #define ESCAPE_CHARACTER 0x1d @@ -92,7 +91,7 @@ static void restore_term(int fd, struct tcsetattr(fd, TCSAFLUSH, old); } -static int console_loop(int xc_handle, domid_t domid, int fd) +static int console_loop(int fd) { int ret; @@ -161,7 +160,6 @@ int main(int argc, char **argv) { struct termios attr; int domid; - int xc_handle; char *sopt = "h"; int ch; int opt_ind=0; @@ -206,11 +204,6 @@ int main(int argc, char **argv) err(errno, "Could not contact XenStore"); } - xc_handle = xc_interface_open(); - if (xc_handle == -1) { - err(errno, "xc_interface_open()"); - } - signal(SIGTERM, sighandler); path = xs_get_domain_path(xs, domid); @@ -260,7 +253,7 @@ int main(int argc, char **argv) free(path); init_term(STDIN_FILENO, &attr); - console_loop(xc_handle, domid, spty); + console_loop(spty); restore_term(STDIN_FILENO, &attr); return 0; diff -r 5719550652a1 -r 5cc367720223 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/console/daemon/io.c Tue Apr 11 18:54:18 2006 -0600 @@ -434,25 +434,36 @@ void enum_domains(void) } } +static int ring_free_bytes(struct domain *dom) +{ + struct xencons_interface *intf = dom->interface; + XENCONS_RING_IDX cons, prod, space; + + cons = intf->in_cons; + prod = intf->in_prod; + mb(); + + space = prod - cons; + if (space > sizeof(intf->in)) + return 0; /* ring is screwed: ignore it */ + + return (sizeof(intf->in) - space); +} + static void handle_tty_read(struct domain *dom) { ssize_t len = 0; char msg[80]; int i; struct xencons_interface *intf = dom->interface; - XENCONS_RING_IDX cons, prod; - - cons = intf->in_cons; - prod = intf->in_prod; - mb(); - - if (sizeof(intf->in) > (prod - cons)) - len = sizeof(intf->in) - (prod - cons); + XENCONS_RING_IDX prod; + + len = ring_free_bytes(dom); + if (len == 0) + return; + if (len > sizeof(msg)) len = sizeof(msg); - - if (len == 0) - return; len = read(dom->tty_fd, msg, len); if (len < 1) { @@ -465,6 +476,7 @@ static void handle_tty_read(struct domai shutdown_domain(dom); } } else if (domain_is_valid(dom->domid)) { + prod = intf->in_prod; for (i = 0; i < len; i++) { intf->in[MASK_XENCONS_IDX(prod++, intf->in)] = msg[i]; @@ -514,7 +526,7 @@ static void handle_ring_read(struct doma (void)write_sync(dom->evtchn_fd, &v, sizeof(v)); } -static void handle_xs(int fd) +static void handle_xs(void) { char **vec; int domid; @@ -560,7 +572,7 @@ void handle_io(void) } if (d->tty_fd != -1) { - if (!d->is_dead) + if (!d->is_dead && ring_free_bytes(d)) FD_SET(d->tty_fd, &readfds); if (!buffer_empty(&d->buffer)) @@ -572,7 +584,7 @@ void handle_io(void) ret = select(max_fd + 1, &readfds, &writefds, 0, NULL); if (FD_ISSET(xs_fileno(xs), &readfds)) - handle_xs(xs_fileno(xs)); + handle_xs(); for (d = dom_head; d; d = n) { n = d->next; diff -r 5719550652a1 -r 5cc367720223 tools/console/testsuite/Makefile --- a/tools/console/testsuite/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/console/testsuite/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -3,10 +3,12 @@ include $(XEN_ROOT)/tools/Rules.mk LDFLAGS=-static +.PHONY: all all: console-dom0 console-domU procpipe console-dom0: console-dom0.o console-domU: console-domU.o procpipe: procpipe.o +.PHONY: clean clean:; $(RM) *.o console-domU console-dom0 procpipe diff -r 5719550652a1 -r 5cc367720223 tools/debugger/gdb/README --- a/tools/debugger/gdb/README Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/debugger/gdb/README Tue Apr 11 18:54:18 2006 -0600 @@ -1,16 +1,17 @@ -DomU GDB server for 32-bit (PAE and non-PAE) systems +DomU & HVM GDB server for 32-bit (PAE and non-PAE) and x86_64 systems ---------------------------------------------------- Lines marked below with [*] are optional, if you want full source-level debugging of your kernel image. To build the GDB server: + 0. Build rest of the Xen first from the base directory 1. Run ./gdbbuild from within this directory. 2. Copy ./gdb-6.2.1-linux-i386-xen/gdb/gdbserver/gdbserver-xen to your test machine. -To build a debuggable guest kernel image: +To build a debuggable guest domU kernel image: 1. cd linux-2.6.xx-xenU 2. make menuconfig 3. From within the configurator, enable the following options: @@ -28,7 +29,7 @@ To debug a running guest: # bt # disass -To debug a crashed guest: +To debug a crashed domU guest: 1. Add '(enable-dump yes)' to /etc/xen/xend-config.sxp before starting xend. 2. When the domain crashes, a core file is written to diff -r 5719550652a1 -r 5cc367720223 tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/server.c --- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/server.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/server.c Tue Apr 11 18:54:18 2006 -0600 @@ -287,6 +287,21 @@ handle_v_requests (char *own_buf, char * } void +handle_breakpoint_requests (char *own_buf, char *status, unsigned char *signal) +{ + /* Currently we only support software breakpoints */ + switch (own_buf[1]) { + case '0': /* software breakpoint, int3 based */ + own_buf[0] = '\0'; + break; + case '1': /* hardware breakpoint */ + default: + write_enn (own_buf); + break; + } +} + +void myresume (int step, int sig) { struct thread_resume resume_info[2]; @@ -321,6 +336,18 @@ gdbserver_usage (void) "COMM may either be a tty device (for serial debugging), or \n" "HOST:PORT to listen for a TCP connection.\n"); } + +extern control_c_pressed_flag; +#include <signal.h> + +void ctrl_c_handler(int signo) +{ + printf("Ctrl-C pressed: Quit from the attached gdb first\n"); + control_c_pressed_flag = 1; +} + +struct sigaction ctrl_c_sigaction = { .sa_handler = ctrl_c_handler }; +struct sigaction old_sigaction; int main (int argc, char *argv[]) @@ -396,9 +423,11 @@ main (int argc, char *argv[]) } } + while (1) { remote_open (argv[1]); + sigaction(SIGINT, &ctrl_c_sigaction, &old_sigaction); restart: setjmp (toplevel); @@ -586,6 +615,9 @@ main (int argc, char *argv[]) case 'v': /* Extended (long) request. */ handle_v_requests (own_buf, &status, &signal); + break; + case 'Z': + handle_breakpoint_requests (own_buf, &status, &signal); break; default: /* It is a request we don't understand. Respond with an @@ -643,5 +675,6 @@ main (int argc, char *argv[]) "GDBserver will reopen the connection.\n"); remote_close (); } - } -} + sigaction(SIGINT, &old_sigaction, NULL); + } +} diff -r 5719550652a1 -r 5cc367720223 tools/debugger/libxendebug/Makefile --- a/tools/debugger/libxendebug/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/debugger/libxendebug/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -26,10 +26,14 @@ LIB := libxendebug.a libxendebug.so LIB := libxendebug.a libxendebug.so LIB += libxendebug.so.$(MAJOR) libxendebug.so.$(MAJOR).$(MINOR) +.PHONY: all all: build + +.PHONY: build build: $(MAKE) $(LIB) +.PHONY: install install: build [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR) [ -d $(DESTDIR)/usr/include ] || $(INSTALL_DIR) $(DESTDIR)/usr/include @@ -39,14 +43,15 @@ install: build ln -sf libxendebug.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxendebug.so $(INSTALL_DATA) xendebug.h $(DESTDIR)/usr/include -.PHONY: TAGS clean rpm install all - +.PHONY: TAGS TAGS: etags -t $(SRCS) *.h +.PHONY: clean clean: rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen +.PHONY: rpm rpm: build rm -rf staging mkdir staging diff -r 5719550652a1 -r 5cc367720223 tools/debugger/pdb/Makefile --- a/tools/debugger/pdb/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/debugger/pdb/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -33,6 +33,7 @@ LIBS += unix str # bc = byte-code, dc = debug byte-code # patches = patch linux domU source code +.PHONY: all all : dc SOURCES += pdb_caml_xc.c @@ -51,5 +52,6 @@ include $(OCAMLMAKEFILE) include $(OCAMLMAKEFILE) PATCHDIR = ./linux-2.6-patches +.PHONY: patches patches : make -C $(PATCHDIR) patches diff -r 5719550652a1 -r 5cc367720223 tools/debugger/pdb/linux-2.6-module/Makefile --- a/tools/debugger/pdb/linux-2.6-module/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/debugger/pdb/linux-2.6-module/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -10,10 +10,12 @@ CFLAGS += -Wall CFLAGS += -Wall CFLAGS += -Werror +.PHONY: module module : # make KBUILD_VERBOSE=1 ARCH=xen -C $(KDIR) M=$(PWD) modules make ARCH=xen -C $(KDIR) M=$(PWD) modules +.PHONY: clean clean : make -C $(KDIR) M=$(PWD) clean diff -r 5719550652a1 -r 5cc367720223 tools/debugger/pdb/linux-2.6-patches/Makefile --- a/tools/debugger/pdb/linux-2.6-patches/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/debugger/pdb/linux-2.6-patches/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -3,6 +3,7 @@ KDIR = $(XEN_ROOT)/$(LINUX_DIR) KDIR = $(XEN_ROOT)/$(LINUX_DIR) PATCH_DIR = $(CURDIR) +.PHONY: patches patches : patches-done patches-done : diff -r 5719550652a1 -r 5cc367720223 tools/examples/Makefile --- a/tools/examples/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/examples/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -50,11 +50,16 @@ endif endif endif +.PHONY: all all: + +.PHONY: build build: +.PHONY: install install: all install-initd install-configs install-scripts $(HOTPLUGS) +.PHONY: install-initd install-initd: [ -d $(DESTDIR)/etc/init.d ] || $(INSTALL_DIR) $(DESTDIR)/etc/init.d [ -d $(DESTDIR)/etc/sysconfig ] || $(INSTALL_DIR) $(DESTDIR)/etc/sysconfig @@ -62,6 +67,7 @@ install-initd: $(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)/etc/init.d $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)/etc/sysconfig/xendomains +.PHONY: install-configs install-configs: $(XEN_CONFIGS) [ -d $(DESTDIR)$(XEN_CONFIG_DIR) ] || \ $(INSTALL_DIR) $(DESTDIR)$(XEN_CONFIG_DIR) @@ -72,6 +78,7 @@ install-configs: $(XEN_CONFIGS) $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_CONFIG_DIR); \ done +.PHONY: install-scripts install-scripts: [ -d $(DESTDIR)$(XEN_SCRIPT_DIR) ] || \ $(INSTALL_DIR) $(DESTDIR)$(XEN_SCRIPT_DIR) @@ -84,6 +91,7 @@ install-scripts: $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \ done +.PHONY: install-hotplug install-hotplug: [ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \ $(INSTALL_DIR) $(DESTDIR)$(XEN_HOTPLUG_DIR) @@ -92,6 +100,7 @@ install-hotplug: $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \ done +.PHONY: install-udev install-udev: [ -d $(DESTDIR)$(UDEV_RULES_DIR) ] || \ $(INSTALL_DIR) $(DESTDIR)$(UDEV_RULES_DIR)/rules.d @@ -102,4 +111,5 @@ install-udev: ln -sf ../$$i . ) \ done +.PHONY: clean clean: diff -r 5719550652a1 -r 5cc367720223 tools/examples/README.incompatibilities --- a/tools/examples/README.incompatibilities Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/examples/README.incompatibilities Tue Apr 11 18:54:18 2006 -0600 @@ -24,6 +24,13 @@ xen-network-common.sh. xen-network-common.sh. +ip +-- + +Newer ip commands (from iproute2) do not accept the abbreviated syntax "ip r a +..." etc. "ip route add ..." must be used instead. + + sed --- diff -r 5719550652a1 -r 5cc367720223 tools/examples/block --- a/tools/examples/block Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/examples/block Tue Apr 11 18:54:18 2006 -0600 @@ -72,7 +72,7 @@ check_sharing() then toskip="^$" else - toskip="^[^ ]* [^ ]* [^ ]* ro " + toskip="^[^ ]* [^ ]* [^ ]* ro[, ]" fi for file in $(cat /proc/mounts | grep -v "$toskip" | cut -f 1 -d ' ') diff -r 5719550652a1 -r 5cc367720223 tools/examples/init.d/xend --- a/tools/examples/init.d/xend Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/examples/init.d/xend Tue Apr 11 18:54:18 2006 -0600 @@ -7,7 +7,7 @@ # chkconfig: 2345 98 01 # description: Starts and stops the Xen control daemon. -if ! [ -e /proc/xen/privcmd ]; then +if ! grep -q "control_d" /proc/xen/capabilities ; then exit 0 fi diff -r 5719550652a1 -r 5cc367720223 tools/examples/vif-route --- a/tools/examples/vif-route Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/examples/vif-route Tue Apr 11 18:54:18 2006 -0600 @@ -29,11 +29,11 @@ case "$command" in online) ifconfig ${vif} ${main_ip} netmask 255.255.255.255 up echo 1 >/proc/sys/net/ipv4/conf/${vif}/proxy_arp - ipcmd='a' + ipcmd='add' ;; offline) ifdown ${vif} - ipcmd='d' + ipcmd='del' ;; esac @@ -41,7 +41,7 @@ if [ "${ip}" ] ; then # If we've been given a list of IP addresses, then add routes from dom0 to # the guest using those addresses. for addr in ${ip} ; do - ip r ${ipcmd} ${addr} dev ${vif} src ${main_ip} + ip route ${ipcmd} ${addr} dev ${vif} src ${main_ip} done fi diff -r 5719550652a1 -r 5cc367720223 tools/examples/vtpm-common.sh --- a/tools/examples/vtpm-common.sh Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/examples/vtpm-common.sh Tue Apr 11 18:54:18 2006 -0600 @@ -261,12 +261,6 @@ function vtpm_create_instance () { if [ "$REASON" == "create" ]; then vtpm_reset $instance - elif [ "$REASON" == "resume" ]; then - vtpm_setup $instance - else - #default case for 'now' - #vtpm_reset $instance - true fi xenstore_write $XENBUS_PATH/instance $instance } diff -r 5719550652a1 -r 5cc367720223 tools/firmware/Makefile --- a/tools/firmware/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -13,8 +13,7 @@ SUBDIRS += vmxassist SUBDIRS += vmxassist SUBDIRS += hvmloader -.PHONY: all install clean - +.PHONY: all all: @set -e; if ! `which bcc 1>/dev/null 2>/dev/null`; then \ echo "***********************************************************"; \ @@ -28,10 +27,12 @@ all: fi +.PHONY: install install: all [ -d $(INSTALL_DIR) ] || install -d -m0755 $(INSTALL_DIR) [ ! -e $(TARGET) ] || install -m0644 $(TARGET) $(INSTALL_DIR) +.PHONY: clean clean: @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ diff -r 5719550652a1 -r 5cc367720223 tools/firmware/acpi/Makefile --- a/tools/firmware/acpi/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/acpi/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -33,6 +33,7 @@ IASL_URL=http://developer.intel.com/tech IASL_URL=http://developer.intel.com/technology/iapc/acpi/downloads/$(IASL_VER).tar.gz vpath iasl $(PATH) +.PHONY: all all:$(ACPI_BIN) acpi_dsdt.c:acpi_dsdt.asl @@ -42,6 +43,7 @@ acpi_dsdt.c:acpi_dsdt.asl echo "int DsdtLen=sizeof(AmlCode);" >> acpi_dsdt.c rm *.aml +.PHONY: iasl iasl: @echo @echo "ACPI ASL compiler(iasl) is needed" @@ -60,8 +62,10 @@ iasl: $(ACPI_BIN):$(ACPI_GEN) ./$(ACPI_GEN) $(ACPI_BIN) +.PHONY: clean clean: rm -rf *.o $(ACPI_GEN) $(ACPI_BIN) $(IASL_VER) rm -rf $(IASL_VER).tar.gz +.PHONY: install install: all diff -r 5719550652a1 -r 5cc367720223 tools/firmware/hvmloader/Makefile --- a/tools/firmware/hvmloader/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/hvmloader/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -21,6 +21,7 @@ # External CFLAGS can do more harm than good. CFLAGS := +XEN_TARGET_ARCH = x86_32 XEN_ROOT = ../../.. include $(XEN_ROOT)/Config.mk @@ -38,10 +39,10 @@ CFLAGS += $(call test-gcc-flag,$(CC),-f CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector-all) OBJCOPY = objcopy -CFLAGS += $(DEFINES) -I. $(XENINC) -Wall -fno-builtin -O2 -msoft-float -CFLAGS += -m32 -march=i686 +CFLAGS += $(DEFINES) -I. $(XENINC) -fno-builtin -O2 -msoft-float LDFLAGS = -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,$(LOADADDR) +.PHONY: all all: hvmloader hvmloader: roms.h hvmloader.c acpi_madt.c @@ -57,6 +58,7 @@ roms.h: ../rombios/BIOS-bochs-latest ../ ./mkhex vmxassist ../vmxassist/vmxassist.bin >> roms.h ./mkhex acpi ../acpi/acpi.bin >> roms.h +.PHONY: clean clean: rm -f roms.h acpi.h rm -f hvmloader hvmloader.tmp hvmloader.o $(OBJECTS) diff -r 5719550652a1 -r 5cc367720223 tools/firmware/rombios/Makefile --- a/tools/firmware/rombios/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/rombios/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -3,10 +3,13 @@ BIOS_BUILDS = BIOS-bochs-latest #BIOS_BUILDS += BIOS-bochs-4-processors #BIOS_BUILDS += BIOS-bochs-8-processors +.PHONY: all all: bios +.PHONY: bios bios: biossums ${BIOS_BUILDS} +.PHONY: clean clean: rm -f *.o *.a *.s rombios.bin _rombios*_.c rm -f as86-sym.txt ld86-sym.txt diff -r 5719550652a1 -r 5cc367720223 tools/firmware/vgabios/Makefile --- a/tools/firmware/vgabios/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/vgabios/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -10,17 +10,22 @@ RELVERS = `pwd | sed "s-.*/--" | sed "s/ VGABIOS_DATE = "-DVGABIOS_DATE=\"$(RELDATE)\"" +.PHONY: all all: bios cirrus-bios +.PHONY: bios bios: biossums vgabios.bin vgabios.debug.bin +.PHONY: cirrus-bios cirrus-bios: vgabios-cirrus.bin vgabios-cirrus.debug.bin +.PHONY: clean clean: rm -f biossums *.o *.s *.ld86 \ temp.awk.* vgabios*.orig _vgabios_* _vgabios-debug_* core vgabios*.bin vgabios*.txt $(RELEASE).bin *.bak rm -f VGABIOS-lgpl-latest*.bin +.PHONY: release release: VGABIOS_VERS=\"-DVGABIOS_VERS=\\\"$(RELVERS)\\\"\" make bios cirrus-bios /bin/rm -f *.o *.s *.ld86 \ diff -r 5719550652a1 -r 5cc367720223 tools/firmware/vmxassist/Makefile --- a/tools/firmware/vmxassist/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/vmxassist/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -21,6 +21,7 @@ # External CFLAGS can do more harm than good. CFLAGS := +XEN_TARGET_ARCH = x86_32 XEN_ROOT = ../../.. include $(XEN_ROOT)/Config.mk @@ -37,12 +38,12 @@ CFLAGS += $(call test-gcc-flag,$(CC),-f CPP = cpp -P OBJCOPY = objcopy -p -O binary -R .note -R .comment -R .bss -S --gap-fill=0 -CFLAGS += $(DEFINES) -I. $(XENINC) -Wall -fno-builtin -O2 -msoft-float -CFLAGS += -m32 -march=i686 +CFLAGS += $(DEFINES) -I. $(XENINC) -fno-builtin -O2 -msoft-float LDFLAGS = -m elf_i386 OBJECTS = head.o trap.o vm86.o setup.o util.o +.PHONY: all all: vmxassist.bin vmxassist.bin: vmxassist.ld $(OBJECTS) @@ -53,27 +54,28 @@ vmxassist.bin: vmxassist.ld $(OBJECTS) dd if=vmxassist.tmp of=vmxassist.bin ibs=512 conv=sync rm -f vmxassist.tmp -head.o: machine.h head.S +head.o: machine.h vm86.h head.S $(CC) $(CFLAGS) -D__ASSEMBLY__ $(DEFINES) -c head.S -trap.o: machine.h offsets.h trap.S +trap.o: machine.h vm86.h offsets.h trap.S $(CC) $(CFLAGS) -D__ASSEMBLY__ $(DEFINES) -c trap.S -vm86.o: machine.h vm86.c +vm86.o: machine.h vm86.h vm86.c $(CC) $(CFLAGS) -c vm86.c -setup.o: machine.h setup.c +setup.o: machine.h vm86.h setup.c $(CC) $(CFLAGS) -c setup.c -util.o: machine.h util.c +util.o: machine.h vm86.h util.c $(CC) $(CFLAGS) -c util.c offsets.h: gen ./gen > offsets.h -gen: gen.c +gen: vm86.h gen.c $(HOSTCC) $(HOSTCFLAGS) -I. $(XENINC) -o gen gen.c +.PHONY: clean clean: rm -f vmxassist vmxassist.tmp vmxassist.bin vmxassist.run vmxassist.sym head.s rm -f $(OBJECTS) diff -r 5719550652a1 -r 5cc367720223 tools/firmware/vmxassist/trap.S --- a/tools/firmware/vmxassist/trap.S Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/vmxassist/trap.S Tue Apr 11 18:54:18 2006 -0600 @@ -18,6 +18,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. */ #include "machine.h" +#include "vm86.h" #include "offsets.h" /* diff -r 5719550652a1 -r 5cc367720223 tools/firmware/vmxassist/util.c --- a/tools/firmware/vmxassist/util.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/vmxassist/util.c Tue Apr 11 18:54:18 2006 -0600 @@ -18,7 +18,6 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. */ #include <stdarg.h> -#include <vm86.h> #include "util.h" #include "machine.h" diff -r 5719550652a1 -r 5cc367720223 tools/firmware/vmxassist/vm86.c --- a/tools/firmware/vmxassist/vm86.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/vmxassist/vm86.c Tue Apr 11 18:54:18 2006 -0600 @@ -34,7 +34,7 @@ #define SEG_FS 0x0040 #define SEG_GS 0x0080 -unsigned prev_eip = 0; +static unsigned prev_eip = 0; enum vm86_mode mode = 0; #ifdef DEBUG @@ -50,23 +50,41 @@ static char *rnames[] = { "ax", "cx", "d static char *rnames[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; #endif /* DEBUG */ -unsigned +static unsigned address(struct regs *regs, unsigned seg, unsigned off) { unsigned long long entry; - unsigned addr; - - if (seg == 0) - return off; - - if (seg > oldctx.gdtr_limit) + unsigned seg_base, seg_limit; + unsigned entry_low, entry_high; + + if (seg == 0) { + if (mode == VM86_REAL || mode == VM86_REAL_TO_PROTECTED) + return off; + else + panic("segment is zero, but not in real mode!\n"); + } + + if (mode == VM86_REAL || seg > oldctx.gdtr_limit || + (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg)) return ((seg & 0xFFFF) << 4) + off; entry = ((unsigned long long *) oldctx.gdtr_base)[seg >> 3]; - addr = (((entry >> (56-24)) & 0xFF000000) | - ((entry >> (32-16)) & 0x00FF0000) | - ((entry >> ( 16)) & 0x0000FFFF)) + off; - return addr; + entry_high = entry >> 32; + entry_low = entry & 0xFFFFFFFF; + + seg_base = (entry_high & 0xFF000000) | ((entry >> 16) & 0xFFFFFF); + seg_limit = (entry_high & 0xF0000) | (entry_low & 0xFFFF); + + if (entry_high & 0x8000 && + ((entry_high & 0x800000 && off >> 12 <= seg_limit) || + (!(entry_high & 0x800000) && off <= seg_limit))) + return seg_base + off; + + panic("should never reach here in function address():\n\t" + "entry=0x%08x%08x, mode=%d, seg=0x%08x, offset=0x%08x\n", + entry_high, entry_low, mode, seg, off); + + return 0; } #ifdef DEBUG @@ -194,7 +212,7 @@ fetch8(struct regs *regs) return read8(addr); } -unsigned +static unsigned getreg32(struct regs *regs, int r) { switch (r & 7) { @@ -210,13 +228,13 @@ getreg32(struct regs *regs, int r) return ~0; } -unsigned +static unsigned getreg16(struct regs *regs, int r) { return MASK16(getreg32(regs, r)); } -unsigned +static unsigned getreg8(struct regs *regs, int r) { switch (r & 7) { @@ -232,7 +250,7 @@ getreg8(struct regs *regs, int r) return ~0; } -void +static void setreg32(struct regs *regs, int r, unsigned v) { switch (r & 7) { @@ -247,13 +265,13 @@ setreg32(struct regs *regs, int r, unsig } } -void +static void setreg16(struct regs *regs, int r, unsigned v) { setreg32(regs, r, (getreg32(regs, r) & ~0xFFFF) | MASK16(v)); } -void +static void setreg8(struct regs *regs, int r, unsigned v) { v &= 0xFF; @@ -269,7 +287,7 @@ setreg8(struct regs *regs, int r, unsign } } -unsigned +static unsigned segment(unsigned prefix, struct regs *regs, unsigned seg) { if (prefix & SEG_ES) @@ -287,7 +305,7 @@ segment(unsigned prefix, struct regs *re return seg; } -unsigned +static unsigned sib(struct regs *regs, int mod, unsigned byte) { unsigned scale = (byte >> 6) & 3; @@ -319,7 +337,7 @@ sib(struct regs *regs, int mod, unsigned /* * Operand (modrm) decode */ -unsigned +static unsigned operand(unsigned prefix, struct regs *regs, unsigned modrm) { int mod, disp = 0, seg; @@ -418,7 +436,7 @@ operand(unsigned prefix, struct regs *re /* * Load new IDT */ -int +static int lidt(struct regs *regs, unsigned prefix, unsigned modrm) { unsigned eip = regs->eip - 3; @@ -438,7 +456,7 @@ lidt(struct regs *regs, unsigned prefix, /* * Load new GDT */ -int +static int lgdt(struct regs *regs, unsigned prefix, unsigned modrm) { unsigned eip = regs->eip - 3; @@ -458,7 +476,7 @@ lgdt(struct regs *regs, unsigned prefix, /* * Modify CR0 either through an lmsw instruction. */ -int +static int lmsw(struct regs *regs, unsigned prefix, unsigned modrm) { unsigned eip = regs->eip - 3; @@ -481,7 +499,7 @@ lmsw(struct regs *regs, unsigned prefix, * We need to handle moves that address memory beyond the 64KB segment * limit that VM8086 mode enforces. */ -int +static int movr(struct regs *regs, unsigned prefix, unsigned opc) { unsigned eip = regs->eip - 1; @@ -546,7 +564,7 @@ movr(struct regs *regs, unsigned prefix, /* * Move to and from a control register. */ -int +static int movcr(struct regs *regs, unsigned prefix, unsigned opc) { unsigned eip = regs->eip - 2; @@ -618,7 +636,7 @@ static inline void set_eflags_ZF(unsigne * We need to handle cmp opcodes that address memory beyond the 64KB * segment limit that VM8086 mode enforces. */ -int +static int cmp(struct regs *regs, unsigned prefix, unsigned opc) { unsigned eip = regs->eip - 1; @@ -658,7 +676,7 @@ cmp(struct regs *regs, unsigned prefix, * We need to handle test opcodes that address memory beyond the 64KB * segment limit that VM8086 mode enforces. */ -int +static int test(struct regs *regs, unsigned prefix, unsigned opc) { unsigned eip = regs->eip - 1; @@ -691,7 +709,7 @@ test(struct regs *regs, unsigned prefix, * We need to handle pop opcodes that address memory beyond the 64KB * segment limit that VM8086 mode enforces. */ -int +static int pop(struct regs *regs, unsigned prefix, unsigned opc) { unsigned eip = regs->eip - 1; @@ -721,7 +739,7 @@ pop(struct regs *regs, unsigned prefix, /* * Emulate a segment load in protected mode */ -int +static int load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes) { unsigned long long entry; @@ -768,7 +786,7 @@ load_seg(unsigned long sel, uint32_t *ba /* * Transition to protected mode */ -void +static void protected_mode(struct regs *regs) { regs->eflags &= ~(EFLAGS_TF|EFLAGS_VM); @@ -842,7 +860,7 @@ protected_mode(struct regs *regs) /* * Start real-mode emulation */ -void +static void real_mode(struct regs *regs) { regs->eflags |= EFLAGS_VM | 0x02; @@ -935,7 +953,7 @@ set_mode(struct regs *regs, enum vm86_mo TRACE((regs, 0, states[mode])); } -void +static void jmpl(struct regs *regs, int prefix) { unsigned n = regs->eip; @@ -963,7 +981,7 @@ jmpl(struct regs *regs, int prefix) panic("jmpl"); } -void +static void retl(struct regs *regs, int prefix) { unsigned cs, eip; @@ -990,7 +1008,7 @@ retl(struct regs *regs, int prefix) panic("retl"); } -void +static void interrupt(struct regs *regs, int n) { TRACE((regs, 0, "external interrupt %d", n)); @@ -1008,7 +1026,7 @@ interrupt(struct regs *regs, int n) * interrupt vectors. The following simple state machine catches * these attempts and rewrites them. */ -int +static int outbyte(struct regs *regs, unsigned prefix, unsigned opc) { static char icw2[2] = { 0 }; @@ -1059,7 +1077,7 @@ outbyte(struct regs *regs, unsigned pref return 1; } -int +static int inbyte(struct regs *regs, unsigned prefix, unsigned opc) { int port; @@ -1086,7 +1104,7 @@ enum { OPC_INVALID, OPC_EMULATED }; * a small subset of the opcodes, and not all opcodes are implemented for each * of the four modes we can operate in. */ -int +static int opcode(struct regs *regs) { unsigned eip = regs->eip; @@ -1246,7 +1264,7 @@ opcode(struct regs *regs) if ((mode == VM86_REAL_TO_PROTECTED) || (mode == VM86_PROTECTED_TO_REAL)) { retl(regs, prefix); - return OPC_EMULATED; + return OPC_INVALID; } goto invalid; @@ -1284,7 +1302,7 @@ opcode(struct regs *regs) if ((mode == VM86_REAL_TO_PROTECTED) || (mode == VM86_PROTECTED_TO_REAL)) { jmpl(regs, prefix); - return OPC_EMULATED; + return OPC_INVALID; } goto invalid; diff -r 5719550652a1 -r 5cc367720223 tools/firmware/vmxassist/vm86.h --- a/tools/firmware/vmxassist/vm86.h Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/firmware/vmxassist/vm86.h Tue Apr 11 18:54:18 2006 -0600 @@ -58,7 +58,6 @@ extern struct vmx_assist_context newctx; extern struct vmx_assist_context newctx; extern void emulate(struct regs *); -extern void interrupt(struct regs *, int); extern void dump_regs(struct regs *); extern void trace(struct regs *, int, char *, ...); diff -r 5719550652a1 -r 5cc367720223 tools/guest-headers/Makefile --- a/tools/guest-headers/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/guest-headers/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -2,12 +2,16 @@ XEN_ROOT=../.. XEN_ROOT=../.. linuxsparsetree = $(XEN_ROOT)/linux-2.6-xen-sparse +.PHONY: all all: +.PHONY: check check: +.PHONY: install install: mkdir -p $(DESTDIR)/usr/include/xen/linux install -m0644 $(linuxsparsetree)/include/xen/public/*.h $(DESTDIR)/usr/include/xen/linux +.PHONY: clean clean: diff -r 5719550652a1 -r 5cc367720223 tools/ioemu/Makefile --- a/tools/ioemu/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/ioemu/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -19,6 +19,7 @@ endif endif #DOCS=qemu-doc.html qemu-tech.html qemu.1 +.PHONY: all all: $(DOCS) HEADERS for d in $(TARGET_DIRS); do \ $(MAKE) -C $$d $@ || exit 1 ; \ @@ -30,6 +31,7 @@ dyngen$(EXESUF): dyngen.c dyngen$(EXESUF): dyngen.c $(HOST_CC) $(CFLAGS) $(DEFINES) -o $@ $^ +.PHONY: clean clean: # avoid old build problems by removing potentially incorrect old files rm -f config.mak config.h op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h @@ -41,6 +43,7 @@ clean: rm -f config-host.mak config-host.h rm -f keysym_adapter_sdl.h keysym_adapter_vnc.h +.PHONY: distclean distclean: clean rm -f config-host.mak config-host.h rm -f keysym_adapter_sdl.h keysym_adapter_vnc.h @@ -52,6 +55,7 @@ ar de en-us fi fr-be hr ar de en-us fi fr-be hr it lv nl pl ru th \ common de-ch es fo fr-ca hu ja mk nl-be pt sl tr +.PHONY: install install: all mkdir -p "$(bindir)" mkdir -p "$(DESTDIR)/$(datadir)" @@ -62,9 +66,11 @@ install: all done # various test targets +.PHONY: test speed test2 test speed test2: all $(MAKE) -C tests $@ +.PHONY: TAGS TAGS: etags *.[ch] tests/*.[ch] @@ -79,6 +85,7 @@ FILE=qemu-$(shell cat VERSION) FILE=qemu-$(shell cat VERSION) # tar release (use 'make -k tar' on a checkouted tree) +.PHONY: tar tar: rm -rf /tmp/$(FILE) cp -r . /tmp/$(FILE) @@ -86,6 +93,7 @@ tar: rm -rf /tmp/$(FILE) # generate a binary distribution +.PHONY: tarbin tarbin: ( cd $(DESTDIR) ; tar zcvf ~/qemu-$(VERSION)-i386.tar.gz \ $(DESTDIR)/$(bindir)/qemu $(DESTDIR)/$(bindir)/qemu-fast \ @@ -100,6 +108,7 @@ include .depend include .depend endif +.PHONY: HEADERS HEADERS: ifdef CONFIG_SDL diff -r 5719550652a1 -r 5cc367720223 tools/ioemu/hw/vga.c --- a/tools/ioemu/hw/vga.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/ioemu/hw/vga.c Tue Apr 11 18:54:18 2006 -0600 @@ -1369,10 +1369,16 @@ static inline unsigned int cpuid_edx(uns { unsigned int eax, edx; - __asm__("cpuid" +#ifdef __x86_64__ +#define __bx "rbx" +#else +#define __bx "ebx" +#endif + __asm__("push %%"__bx"; cpuid; pop %%"__bx : "=a" (eax), "=d" (edx) : "0" (op) - : "bx", "cx"); + : "cx"); +#undef __bx return edx; } diff -r 5719550652a1 -r 5cc367720223 tools/ioemu/target-i386-dm/Makefile --- a/tools/ioemu/target-i386-dm/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/ioemu/target-i386-dm/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -329,6 +329,7 @@ sdlaudio.o: sdlaudio.c sdlaudio.o: sdlaudio.c $(CC) $(CFLAGS) $(DEFINES) $(SDL_CFLAGS) -c -o $@ $< +.PHONY: depend depend: $(SRCS) $(CC) -MM $(CFLAGS) $(DEFINES) $^ 1>.depend @@ -382,12 +383,15 @@ mixeng.o: mixeng.c mixeng.h mixeng_templ %.o: %.S $(CC) $(DEFINES) -c -o $@ $< +.PHONY: clean clean: rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp rm -rf config.mak config.h +.PHONY: distclean distclean: clean +.PHONY: install install: all if [ ! -d $(INSTALL_DIR) ];then mkdir -p $(INSTALL_DIR);fi if [ ! -d $(DESTDIR)$(configdir) ];then mkdir -p $(DESTDIR)$(configdir);fi diff -r 5719550652a1 -r 5cc367720223 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/ioemu/target-i386-dm/helper2.c Tue Apr 11 18:54:18 2006 -0600 @@ -409,12 +409,20 @@ void void destroy_hvm_domain(void) { - extern FILE* logfile; - char destroy_cmd[32]; - - sprintf(destroy_cmd, "xm destroy %d", domid); - if (system(destroy_cmd) == -1) - fprintf(logfile, "%s failed.!\n", destroy_cmd); + int xcHandle; + int sts; + + xcHandle = xc_interface_open(); + if (xcHandle < 0) + fprintf(logfile, "Cannot acquire xenctrl handle\n"); + else { + sts = xc_domain_shutdown(xcHandle, domid, SHUTDOWN_poweroff); + if (sts != 0) + fprintf(logfile, "? xc_domain_shutdown failed to issue poweroff, sts %d, errno %d\n", sts, errno); + else + fprintf(logfile, "Issued domain %d poweroff\n", domid); + xc_interface_close(xcHandle); + } } fd_set wakeup_rfds; @@ -480,13 +488,24 @@ int main_loop(void) static void qemu_hvm_reset(void *unused) { - char cmd[64]; - - /* pause domain first, to avoid repeated reboot request*/ - xc_domain_pause(xc_handle, domid); - - sprintf(cmd, "xm shutdown -R %d", domid); - system(cmd); + int xcHandle; + int sts; + + /* pause domain first, to avoid repeated reboot request*/ + xc_domain_pause(xc_handle, domid); + + xcHandle = xc_interface_open(); + if (xcHandle < 0) + fprintf(logfile, "Cannot acquire xenctrl handle\n"); + else { + sts = xc_domain_shutdown(xcHandle, domid, SHUTDOWN_reboot); + if (sts != 0) + fprintf(logfile, "? xc_domain_shutdown failed to issue reboot, sts %d\n", sts); + else + fprintf(logfile, "Issued domain %d reboot\n", domid); + xc_interface_close(xcHandle); + } + } CPUState * cpu_init() diff -r 5719550652a1 -r 5cc367720223 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/ioemu/vl.c Tue Apr 11 18:54:18 2006 -0600 @@ -2556,8 +2556,10 @@ static int set_mm_mapping(int xc_handle, return -1; } +#if 0 /* Generates lots of log file output - turn on for debugging */ for (i = 0; i < nr_pages; i++) fprintf(stderr, "set_map result i %x result %lx\n", i, extent_start[i]); +#endif return 0; } @@ -3244,8 +3246,17 @@ int main(int argc, char **argv) /* we always create the cdrom drive, even if no disk is there */ bdrv_init(); if (has_cdrom) { - bs_table[2] = bdrv_new("cdrom"); - bdrv_set_type_hint(bs_table[2], BDRV_TYPE_CDROM); + int fd; + if ( (fd = open(hd_filename[2], O_RDONLY | O_BINARY)) < 0) { + hd_filename[2]=NULL; + bs_table[2]=NULL; + fprintf(logfile, "Could not open CD %s.\n", hd_filename[i]); + } + else { + close(fd); + bs_table[2] = bdrv_new("cdrom"); + bdrv_set_type_hint(bs_table[2], BDRV_TYPE_CDROM); + } } /* open the virtual block devices */ diff -r 5719550652a1 -r 5cc367720223 tools/libxc/Makefile --- a/tools/libxc/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/libxc/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -67,10 +67,14 @@ LIB += libxenguest.a LIB += libxenguest.a LIB += libxenguest.so libxenguest.so.$(MAJOR) libxenguest.so.$(MAJOR).$(MINOR) +.PHONY: all all: build + +.PHONY: build build: check-for-zlib mk-symlinks $(MAKE) $(LIB) +.PHONY: check-for-zlib check-for-zlib: @if [ ! -e /usr/include/zlib.h ]; then \ echo "***********************************************************"; \ @@ -79,6 +83,7 @@ check-for-zlib: false; \ fi +.PHONY: install install: build [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR) [ -d $(DESTDIR)/usr/include ] || $(INSTALL_DIR) $(DESTDIR)/usr/include @@ -94,14 +99,15 @@ install: build ln -sf libxenguest.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenguest.so $(INSTALL_DATA) xenguest.h $(DESTDIR)/usr/include -.PHONY: TAGS clean rpm install all - +.PHONY: TAGS TAGS: etags -t *.c *.h +.PHONY: clean clean: rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen +.PHONY: rpm rpm: build rm -rf staging mkdir staging diff -r 5719550652a1 -r 5cc367720223 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/libxc/xc_domain.c Tue Apr 11 18:54:18 2006 -0600 @@ -57,6 +57,35 @@ int xc_domain_destroy(int xc_handle, op.u.destroydomain.domain = (domid_t)domid; return do_dom0_op(xc_handle, &op); } + +int xc_domain_shutdown(int xc_handle, + uint32_t domid, + int reason) +{ + int ret = -1; + sched_remote_shutdown_t arg; + DECLARE_HYPERCALL; + + hypercall.op = __HYPERVISOR_sched_op; + hypercall.arg[0] = (unsigned long)SCHEDOP_remote_shutdown; + hypercall.arg[1] = (unsigned long)&arg; + arg.domain_id = domid; + arg.reason = reason; + + if ( mlock(&arg, sizeof(arg)) != 0 ) + { + PERROR("Could not lock memory for Xen hypercall"); + goto out1; + } + + ret = do_xen_hypercall(xc_handle, &hypercall); + + safe_munlock(&arg, sizeof(arg)); + + out1: + return ret; +} + int xc_vcpu_setaffinity(int xc_handle, uint32_t domid, diff -r 5719550652a1 -r 5cc367720223 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/libxc/xc_linux_build.c Tue Apr 11 18:54:18 2006 -0600 @@ -110,10 +110,10 @@ static int parse_features( if ( i == XENFEAT_NR_SUBMAPS*32 ) { - ERROR("Unknown feature \"%.*s\".\n", (int)(p-feats), feats); + ERROR("Unknown feature \"%.*s\".", (int)(p-feats), feats); if ( req ) { - ERROR("Kernel requires an unknown hypervisor feature.\n"); + ERROR("Kernel requires an unknown hypervisor feature."); return -EINVAL; } } @@ -579,6 +579,31 @@ static int setup_guest(int xc_handle, return -1; } #else /* x86 */ + +/* Check if the platform supports the guest kernel format */ +static int compat_check(int xc_handle, struct domain_setup_info *dsi) +{ + xen_capabilities_info_t xen_caps = ""; + + if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) { + ERROR("Cannot determine host capabilities."); + return 0; + } + + if (strstr(xen_caps, "xen-3.0-x86_32p")) { + if (!dsi->pae_kernel) { + ERROR("Non PAE-kernel on PAE host."); + return 0; + } + } else if (dsi->pae_kernel) { + ERROR("PAE-kernel on non-PAE host."); + return 0; + } + + return 1; +} + + static int setup_guest(int xc_handle, uint32_t dom, const char *image, unsigned long image_size, @@ -635,9 +660,12 @@ static int setup_guest(int xc_handle, if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 ) { - PERROR("Guest OS must load to a page boundary.\n"); - goto error_out; - } + PERROR("Guest OS must load to a page boundary."); + goto error_out; + } + + if (!compat_check(xc_handle, &dsi)) + goto error_out; /* Parse and validate kernel features. */ p = strstr(dsi.xen_guest_string, "FEATURES="); @@ -647,7 +675,7 @@ static int setup_guest(int xc_handle, supported_features, required_features) ) { - ERROR("Failed to parse guest kernel features.\n"); + ERROR("Failed to parse guest kernel features."); goto error_out; } @@ -659,7 +687,7 @@ static int setup_guest(int xc_handle, { if ( (supported_features[i]&required_features[i]) != required_features[i] ) { - ERROR("Guest kernel does not support a required feature.\n"); + ERROR("Guest kernel does not support a required feature."); goto error_out; } } diff -r 5719550652a1 -r 5cc367720223 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/libxc/xc_linux_restore.c Tue Apr 11 18:54:18 2006 -0600 @@ -646,18 +646,14 @@ int xc_linux_restore(int xc_handle, int goto out; } - if ((pt_levels == 2) && ((pfn_type[pfn]<ABTYPE_MASK) != L2TAB)) { + if ( (pfn_type[pfn] & LTABTYPE_MASK) != + ((unsigned long)pt_levels<<LTAB_SHIFT) ) { ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", - pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB); - goto out; - } - - if ((pt_levels == 3) && ((pfn_type[pfn]<ABTYPE_MASK) != L3TAB)) { - ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", - pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB); - goto out; - } - + pfn, max_pfn, pfn_type[pfn], + (unsigned long)pt_levels<<LTAB_SHIFT); + goto out; + } + ctxt.ctrlreg[3] = p2m[pfn] << PAGE_SHIFT; /* clear any pending events and the selector */ diff -r 5719550652a1 -r 5cc367720223 tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/libxc/xc_load_elf.c Tue Apr 11 18:54:18 2006 -0600 @@ -66,6 +66,21 @@ static int parseelfimage(const char *ima if ( !IS_ELF(*ehdr) ) { ERROR("Kernel image does not have an ELF header."); + return -EINVAL; + } + + if ( +#if defined(__i386__) + (ehdr->e_ident[EI_CLASS] != ELFCLASS32) || + (ehdr->e_machine != EM_386) || +#elif defined(__x86_64__) + (ehdr->e_ident[EI_CLASS] != ELFCLASS64) || + (ehdr->e_machine != EM_X86_64) || +#endif + (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || + (ehdr->e_type != ET_EXEC) ) + { + ERROR("Kernel not a Xen-compatible Elf image."); return -EINVAL; } diff -r 5719550652a1 -r 5cc367720223 tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/libxc/xc_ptrace.c Tue Apr 11 18:54:18 2006 -0600 @@ -153,190 +153,58 @@ online_vcpus_changed(cpumap_t cpumap) } /* --------------------- */ +/* XXX application state */ +static long nr_pages = 0; +static unsigned long *page_array = NULL; static void * -map_domain_va_pae( +map_domain_va_32( int xc_handle, int cpu, void *guest_va, int perm) { - unsigned long l2p, l1p, p, va = (unsigned long)guest_va; - uint64_t *l3, *l2, *l1; - static void *v; - - if (fetch_regs(xc_handle, cpu, NULL)) - return NULL; - - l3 = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); - if ( l3 == NULL ) - return NULL; - - l2p = l3[l3_table_offset_pae(va)] >> PAGE_SHIFT; - l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p); - if ( l2 == NULL ) - return NULL; - - l1p = l2[l2_table_offset_pae(va)] >> PAGE_SHIFT; - l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p); - if ( l1 == NULL ) - return NULL; - - p = l1[l1_table_offset_pae(va)] >> PAGE_SHIFT; - if ( v != NULL ) - munmap(v, PAGE_SIZE); - v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p); - if ( v == NULL ) - return NULL; - - return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1))); -} - -#ifdef __x86_64__ -static void * -map_domain_va( - int xc_handle, - int cpu, - void *guest_va, - int perm) -{ - unsigned long l3p, l2p, l1p, p, va = (unsigned long)guest_va; - uint64_t *l4, *l3, *l2, *l1; - static void *v; - - if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */ - return map_domain_va_pae(xc_handle, cpu, guest_va, perm); - - if (fetch_regs(xc_handle, cpu, NULL)) - return NULL; - - l4 = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); - if ( l4 == NULL ) - return NULL; - - l3p = l4[l4_table_offset(va)] >> PAGE_SHIFT; - l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l3p); - if ( l3 == NULL ) - return NULL; - - l2p = l3[l3_table_offset(va)] >> PAGE_SHIFT; - l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p); - if ( l2 == NULL ) - return NULL; - - l1p = l2[l2_table_offset(va)] >> PAGE_SHIFT; - l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p); - if ( l1 == NULL ) - return NULL; - - p = l1[l1_table_offset(va)] >> PAGE_SHIFT; - if ( v != NULL ) - munmap(v, PAGE_SIZE); - v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p); - if ( v == NULL ) - return NULL; - - return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1))); -} -#endif - -#ifdef __i386__ -/* XXX application state */ -static long nr_pages = 0; -static unsigned long *page_array = NULL; - -static void * -map_domain_va( - int xc_handle, - int cpu, - void *guest_va, - int perm) -{ - unsigned long pde, page; unsigned long va = (unsigned long)guest_va; - long npgs = xc_get_tot_pages(xc_handle, current_domid); - - - static uint32_t cr3_phys[MAX_VIRT_CPUS]; - static unsigned long *cr3_virt[MAX_VIRT_CPUS]; + + static unsigned long cr3_phys[MAX_VIRT_CPUS]; + static uint32_t *cr3_virt[MAX_VIRT_CPUS]; static unsigned long pde_phys[MAX_VIRT_CPUS]; - static unsigned long *pde_virt[MAX_VIRT_CPUS]; + static uint32_t *pde_virt[MAX_VIRT_CPUS]; static unsigned long page_phys[MAX_VIRT_CPUS]; - static unsigned long *page_virt[MAX_VIRT_CPUS]; + static uint32_t *page_virt[MAX_VIRT_CPUS]; static int prev_perm[MAX_VIRT_CPUS]; - static enum { MODE_UNKNOWN, MODE_32, MODE_PAE, MODE_64 } mode; - - if ( mode == MODE_UNKNOWN ) - { - xen_capabilities_info_t caps; - (void)xc_version(xc_handle, XENVER_capabilities, caps); - if ( strstr(caps, "-x86_64") ) - mode = MODE_64; - else if ( strstr(caps, "-x86_32p") ) - mode = MODE_PAE; - else if ( strstr(caps, "-x86_32") ) - mode = MODE_32; - } - - if ( mode == MODE_PAE ) - return map_domain_va_pae(xc_handle, cpu, guest_va, perm); - - if ( nr_pages != npgs ) - { - if ( nr_pages > 0 ) - free(page_array); - nr_pages = npgs; - if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) - { - printf("Could not allocate memory\n"); - return NULL; - } - if ( xc_get_pfn_list(xc_handle, current_domid, - page_array, nr_pages) != nr_pages ) - { - printf("Could not get the page frame list\n"); - return NULL; - } - } - - if (fetch_regs(xc_handle, cpu, NULL)) - return NULL; - - if (paging_enabled(&ctxt[cpu])) { - if ( ctxt[cpu].ctrlreg[3] != cr3_phys[cpu] ) - { - cr3_phys[cpu] = ctxt[cpu].ctrlreg[3]; - if ( cr3_virt[cpu] ) - munmap(cr3_virt[cpu], PAGE_SIZE); - cr3_virt[cpu] = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, - cr3_phys[cpu] >> PAGE_SHIFT); - if ( cr3_virt[cpu] == NULL ) - return NULL; - } - if ( (pde = cr3_virt[cpu][vtopdi(va)]) == 0 ) - return NULL; - if ( (ctxt[cpu].flags & VGCF_HVM_GUEST) && paging_enabled(&ctxt[cpu]) ) - pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT; - if ( pde != pde_phys[cpu] ) - { - pde_phys[cpu] = pde; - if ( pde_virt[cpu] ) - munmap(pde_virt[cpu], PAGE_SIZE); - pde_virt[cpu] = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, - pde_phys[cpu] >> PAGE_SHIFT); - if ( pde_virt[cpu] == NULL ) - return NULL; - } - if ( (page = pde_virt[cpu][vtopti(va)]) == 0 ) - return NULL; - } else { - page = va; - } + + if (ctxt[cpu].ctrlreg[3] == 0) + return NULL; + if ( ctxt[cpu].ctrlreg[3] != cr3_phys[cpu] ) + { + cr3_phys[cpu] = ctxt[cpu].ctrlreg[3]; + if ( cr3_virt[cpu] ) + munmap(cr3_virt[cpu], PAGE_SIZE); + cr3_virt[cpu] = xc_map_foreign_range( + xc_handle, current_domid, PAGE_SIZE, PROT_READ, + cr3_phys[cpu] >> PAGE_SHIFT); + if ( cr3_virt[cpu] == NULL ) + return NULL; + } + if ( (pde = cr3_virt[cpu][vtopdi(va)]) == 0 ) + return NULL; + if ( (ctxt[cpu].flags & VGCF_HVM_GUEST) && paging_enabled(&ctxt[cpu]) ) + pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT; + if ( pde != pde_phys[cpu] ) + { + pde_phys[cpu] = pde; + if ( pde_virt[cpu] ) + munmap(pde_virt[cpu], PAGE_SIZE); + pde_virt[cpu] = xc_map_foreign_range( + xc_handle, current_domid, PAGE_SIZE, PROT_READ, + pde_phys[cpu] >> PAGE_SHIFT); + if ( pde_virt[cpu] == NULL ) + return NULL; + } + if ( (page = pde_virt[cpu][vtopti(va)]) == 0 ) + return NULL; if (ctxt[cpu].flags & VGCF_HVM_GUEST) page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT; if ( (page != page_phys[cpu]) || (perm != prev_perm[cpu]) ) @@ -358,7 +226,182 @@ map_domain_va( return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK)); } + +static void * +map_domain_va_pae( + int xc_handle, + int cpu, + void *guest_va, + int perm) +{ + unsigned long l2p, l1p, p, va = (unsigned long)guest_va; + uint64_t *l3, *l2, *l1; + static void *v; + + l3 = xc_map_foreign_range( + xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); + if ( l3 == NULL ) + return NULL; + + l2p = l3[l3_table_offset_pae(va)] >> PAGE_SHIFT; + l2p = page_array[l2p]; + l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p); + munmap(l3, PAGE_SIZE); + if ( l2 == NULL ) + return NULL; + + l1p = l2[l2_table_offset_pae(va)] >> PAGE_SHIFT; + l1p = page_array[l1p]; + l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p); + munmap(l2, PAGE_SIZE); + if ( l1 == NULL ) + return NULL; + + p = l1[l1_table_offset_pae(va)] >> PAGE_SHIFT; + p = page_array[p]; + if ( v != NULL ) + munmap(v, PAGE_SIZE); + v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p); + munmap(l1, PAGE_SIZE); + if ( v == NULL ) + return NULL; + + return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1))); +} + +#ifdef __x86_64__ +static void * +map_domain_va_64( + int xc_handle, + int cpu, + void *guest_va, + int perm) +{ + unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va; + uint64_t *l4, *l3, *l2, *l1; + static void *v; + + if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */ + return map_domain_va_32(xc_handle, cpu, guest_va, perm); + + l4 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, + PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); + if ( l4 == NULL ) + return NULL; + + l3p = l4[l4_table_offset(va)] >> PAGE_SHIFT; + l3p = page_array[l3p]; + l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l3p); + munmap(l4, PAGE_SIZE); + if ( l3 == NULL ) + return NULL; + + l2p = l3[l3_table_offset(va)] >> PAGE_SHIFT; + l2p = page_array[l2p]; + l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p); + munmap(l3, PAGE_SIZE); + if ( l2 == NULL ) + return NULL; + + l1 = NULL; + l1e = l2[l2_table_offset(va)]; + l1p = l1e >> PAGE_SHIFT; + if (l1e & 0x80) { /* 2M pages */ + p = (l1p + l1_table_offset(va)); + } else { /* 4K pages */ + l1p = page_array[l1p]; + l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p); + munmap(l2, PAGE_SIZE); + if ( l1 == NULL ) + return NULL; + + p = l1[l1_table_offset(va)] >> PAGE_SHIFT; + } + p = page_array[p]; + if ( v != NULL ) + munmap(v, PAGE_SIZE); + v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p); + if (l1) + munmap(l1, PAGE_SIZE); + if ( v == NULL ) + return NULL; + + return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1))); +} #endif + +static void * +map_domain_va( + int xc_handle, + int cpu, + void *guest_va, + int perm) +{ + unsigned long va = (unsigned long) guest_va; + long npgs = xc_get_tot_pages(xc_handle, current_domid); + static enum { MODE_UNKNOWN, MODE_64, MODE_32, MODE_PAE } mode; + + if ( mode == MODE_UNKNOWN ) + { + xen_capabilities_info_t caps; + (void)xc_version(xc_handle, XENVER_capabilities, caps); + if ( strstr(caps, "-x86_64") ) + mode = MODE_64; + else if ( strstr(caps, "-x86_32p") ) + mode = MODE_PAE; + else if ( strstr(caps, "-x86_32") ) + mode = MODE_32; + } + + if ( nr_pages != npgs ) + { + if ( nr_pages > 0 ) + free(page_array); + nr_pages = npgs; + if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) + { + printf("Could not allocate memory\n"); + return NULL; + } + if ( xc_get_pfn_list(xc_handle, current_domid, + page_array, nr_pages) != nr_pages ) + { + printf("Could not get the page frame list\n"); + return NULL; + } + } + + if (fetch_regs(xc_handle, cpu, NULL)) + return NULL; + + if (!paging_enabled(&ctxt[cpu])) { + static void * v; + unsigned long page; + + if ( v != NULL ) + munmap(v, PAGE_SIZE); + + page = page_array[va >> PAGE_SHIFT] << PAGE_SHIFT; + + v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, + perm, page >> PAGE_SHIFT); + + if ( v == NULL ) + return NULL; + + return (void *)(((unsigned long)v) | (va & BSD_PAGE_MASK)); + } +#ifdef __x86_64__ + if ( mode == MODE_64 ) + return map_domain_va_64(xc_handle, cpu, guest_va, perm); +#endif + if ( mode == MODE_PAE ) + return map_domain_va_pae(xc_handle, cpu, guest_va, perm); + /* else ( mode == MODE_32 ) */ + return map_domain_va_32(xc_handle, cpu, guest_va, perm); +} + +int control_c_pressed_flag = 0; static int __xc_waitdomain( @@ -378,7 +421,6 @@ __xc_waitdomain( op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = domain; - retry: retval = do_dom0_op(xc_handle, &op); if ( retval || (op.u.getdomaininfo.domain != domain) ) @@ -391,12 +433,17 @@ __xc_waitdomain( if ( options & WNOHANG ) goto done; + if (control_c_pressed_flag) { + xc_domain_pause(xc_handle, domain); + control_c_pressed_flag = 0; + goto done; + } + if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ) { nanosleep(&ts,NULL); goto retry; } - /* XXX check for ^C here */ done: if (get_online_cpumap(xc_handle, &op.u.getdomaininfo, &cpumap)) printf("get_online_cpumap failed\n"); @@ -470,7 +517,7 @@ xc_ptrace( break; case PTRACE_SETREGS: - if (!current_isfile) + if (current_isfile) goto out_unspported; /* XXX not yet supported */ SET_XC_REGS(((struct gdb_regs *)data), ctxt[cpu].user_regs); if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, @@ -492,7 +539,7 @@ xc_ptrace( case PTRACE_CONT: case PTRACE_DETACH: - if (!current_isfile) + if (current_isfile) goto out_unspported; /* XXX not yet supported */ if ( request != PTRACE_SINGLESTEP ) { diff -r 5719550652a1 -r 5cc367720223 tools/libxc/xc_ptrace.h --- a/tools/libxc/xc_ptrace.h Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/libxc/xc_ptrace.h Tue Apr 11 18:54:18 2006 -0600 @@ -31,7 +31,7 @@ struct gdb_regs unsigned long orig_rax; unsigned long rip; unsigned long xcs; - unsigned long eflags; + unsigned long rflags; unsigned long rsp; unsigned long xss; unsigned long fs_base; @@ -61,7 +61,7 @@ struct gdb_regs pt.rax = xc.rax; \ pt.rip = xc.rip; \ pt.xcs = xc.cs; \ - pt.eflags = xc.eflags; \ + pt.rflags = xc.rflags; \ pt.rsp = xc.rsp; \ pt.xss = xc.ss; \ pt.xes = xc.es; \ @@ -89,7 +89,7 @@ struct gdb_regs xc.rax = pt->rax; \ xc.rip = pt->rip; \ xc.cs = pt->xcs; \ - xc.eflags = pt->eflags; \ + xc.rflags = pt->rflags & 0xffffffff; \ xc.rsp = pt->rsp; \ xc.ss = pt->xss; \ xc.es = pt->xes; \ diff -r 5719550652a1 -r 5cc367720223 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/libxc/xenctrl.h Tue Apr 11 18:54:18 2006 -0600 @@ -206,6 +206,21 @@ int xc_domain_destroy(int xc_handle, int xc_domain_destroy(int xc_handle, uint32_t domid); +/** + * This function will shutdown a domain. This is intended for use in + * fully-virtualized domains where this operation is analogous to the + * sched_op operations in a paravirtualized domain. The caller is + * expected to give the reason for the shutdown. + * + * @parm xc_handle a handle to an open hypervisor interface + * @parm domid the domain id to destroy + * @parm reason is the reason (SHUTDOWN_xxx) for the shutdown + * @return 0 on success, -1 on failure + */ +int xc_domain_shutdown(int xc_handle, + uint32_t domid, + int reason); + int xc_vcpu_setaffinity(int xc_handle, uint32_t domid, int vcpu, diff -r 5719550652a1 -r 5cc367720223 tools/misc/Makefile --- a/tools/misc/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/misc/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -18,7 +18,10 @@ INSTALL_BIN = $(TARGETS) xencons INSTALL_BIN = $(TARGETS) xencons INSTALL_SBIN = netfix xm xen-bugtool xend xenperf +.PHONY: all all: build + +.PHONY: build build: $(TARGETS) $(MAKE) -C miniterm $(MAKE) -C cpuperf @@ -27,6 +30,7 @@ endif endif $(MAKE) -C lomount +.PHONY: install install: build [ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin [ -d $(DESTDIR)/usr/sbin ] || $(INSTALL_DIR) $(DESTDIR)/usr/sbin @@ -39,6 +43,7 @@ install: build # Likewise mbootpack # $(MAKE) -C mbootpack install +.PHONY: clean clean: $(RM) *.o $(TARGETS) *~ $(MAKE) -C miniterm clean diff -r 5719550652a1 -r 5cc367720223 tools/misc/cpuperf/Makefile --- a/tools/misc/cpuperf/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/misc/cpuperf/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -26,8 +26,10 @@ INSTALL_BIN = $(TARGETS) INSTALL_BIN = $(TARGETS) +.PHONY: all all: $(TARGETS) +.PHONY: clean clean: $(RM) *.o $(TARGETS) @@ -40,6 +42,7 @@ cpuperf-perfcntr: cpuperf.c $(HDRS) Make cpuperf-perfcntr: cpuperf.c $(HDRS) Makefile $(CC) $(CFLAGS) -DPERFCNTR -o $@ $< +.PHONY: install install: all $(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin diff -r 5719550652a1 -r 5cc367720223 tools/misc/lomount/Makefile --- a/tools/misc/lomount/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/misc/lomount/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -13,12 +13,17 @@ OBJS = $(patsubst %.c,%.o,$(wildcard BIN = lomount +.PHONY: all all: build + +.PHONY: build build: $(BIN) +.PHONY: install install: build $(INSTALL_PROG) $(BIN) $(SCRIPTS) $(DESTDIR)/usr/bin +.PHONY: clean clean: $(RM) *.a *.so *.o *.rpm $(BIN) diff -r 5719550652a1 -r 5cc367720223 tools/misc/mbootpack/Makefile --- a/tools/misc/mbootpack/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/misc/mbootpack/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -8,9 +8,13 @@ XEN_ROOT=../../.. XEN_ROOT=../../.. include $(XEN_ROOT)/tools/Rules.mk +.PHONY: all all: build + +.PHONY: build build: mbootpack +.PHONY: install install: build $(INSTALL_PROG) mbootpack $(DESTDIR)/usr/bin @@ -33,6 +37,7 @@ mbootpack: $(OBJS) mbootpack: $(OBJS) $(HOSTCC) -o $@ $(filter-out %.a, $^) +.PHONY: clean clean: $(RM) mbootpack *.o $(DEPS) bootsect setup bzimage_header.c bin2c @@ -60,7 +65,6 @@ buildimage.c: bzimage_header.c %.o: %.c $(HOSTCC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@ -.PHONY: all clean gdb .PRECIOUS: $(OBJS) $(OBJS:.o=.c) $(DEPS) .SUFFIXES: diff -r 5719550652a1 -r 5cc367720223 tools/misc/miniterm/Makefile --- a/tools/misc/miniterm/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/misc/miniterm/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -7,12 +7,15 @@ INSTALL_DIR = $(INSTALL) -d -m0755 TARGET = miniterm +.PHONY: all all: $(TARGET) +.PHONY: install install: all [ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin $(INSTALL_PROG) $(TARGET) $(DESTDIR)/usr/bin +.PHONY: clean clean: $(RM) *.o $(TARGET) *~ diff -r 5719550652a1 -r 5cc367720223 tools/misc/nsplitd/Makefile --- a/tools/misc/nsplitd/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/misc/nsplitd/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -8,10 +8,13 @@ OBJS = $(patsubst %.c,%.o,$(wildcard TARGET = nsplitd +.PHONY: all all: $(TARGET) +.PHONY: install install: all +.PHONY: clean clean: $(RM) *.o $(TARGET) *~ diff -r 5719550652a1 -r 5cc367720223 tools/pygrub/Makefile --- a/tools/pygrub/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/pygrub/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -2,10 +2,13 @@ XEN_ROOT = ../.. XEN_ROOT = ../.. include $(XEN_ROOT)/tools/Rules.mk +.PHONY: all all: build +.PHONY: build build: CFLAGS="$(CFLAGS)" python setup.py build +.PHONY: install ifndef XEN_PYTHON_NATIVE_INSTALL install: all CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" @@ -14,5 +17,6 @@ install: all CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" endif +.PHONY: clean clean: - rm -rf build tmp *.pyc *.pyo *.o *.a *~ + rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out diff -r 5719550652a1 -r 5cc367720223 tools/pygrub/src/fsys/ext2/ext2module.c --- a/tools/pygrub/src/fsys/ext2/ext2module.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/pygrub/src/fsys/ext2/ext2module.c Tue Apr 11 18:54:18 2006 -0600 @@ -213,7 +213,9 @@ ext2_fs_open (Ext2Fs *fs, PyObject *args int flags = 0, superblock = 0, offset = 0, err; unsigned int block_size = 0; ext2_filsys efs; +#ifdef HAVE_EXT2FS_OPEN2 char offsetopt[30]; +#endif if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|iiii", kwlist, &name, &flags, &superblock, @@ -225,19 +227,27 @@ ext2_fs_open (Ext2Fs *fs, PyObject *args return NULL; } - if (offset != 0) { +#ifdef HAVE_EXT2FS_OPEN2 + if (offset == 0) { + offsetopt[0] = '\0'; + } + else { snprintf(offsetopt, 29, "offset=%d", offset); } -#ifdef HAVE_EXT2FS_OPEN2 err = ext2fs_open2(name, offsetopt, flags, superblock, block_size, unix_io_manager, &efs); #else + if (offset != 0) { + PyErr_SetString(PyExc_ValueError, "offset argument not supported"); + return NULL; + } + err = ext2fs_open(name, flags, superblock, block_size, unix_io_manager, &efs); #endif if (err) { - PyErr_SetString(PyExc_ValueError, "unable to open file"); + PyErr_SetString(PyExc_ValueError, "unable to open filesystem"); return NULL; } diff -r 5719550652a1 -r 5cc367720223 tools/python/Makefile --- a/tools/python/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,12 +1,14 @@ XEN_ROOT = ../.. XEN_ROOT = ../.. include $(XEN_ROOT)/tools/Rules.mk -.PHONY: all build install clean +.PHONY: all +all: build -all: build +.PHONY: build build: CFLAGS="$(CFLAGS)" python setup.py build +.PHONY: install ifndef XEN_PYTHON_NATIVE_INSTALL install: all CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --force @@ -15,8 +17,10 @@ install: all CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" --force endif +.PHONY: test test: export LD_LIBRARY_PATH=$$(readlink -f ../libxc):$$(readlink -f ../xenstore); python test.py -b -u +.PHONY: clean clean: rm -rf build *.pyc *.pyo *.o *.a *~ diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/util/diagnose.py --- a/tools/python/xen/util/diagnose.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/util/diagnose.py Tue Apr 11 18:54:18 2006 -0600 @@ -41,7 +41,7 @@ def diagnose(dom): global dompath try: - domain = server.xend_domain(dom) + domain = server.xend.domain(dom) state = sxp.child_value(domain, 'state') domid = int(sxp.child_value(domain, 'domid')) name = sxp.child_value(domain, 'name') diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/util/xmlrpclib2.py --- a/tools/python/xen/util/xmlrpclib2.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/util/xmlrpclib2.py Tue Apr 11 18:54:18 2006 -0600 @@ -20,11 +20,18 @@ An enhanced XML-RPC client/server interf An enhanced XML-RPC client/server interface for Python. """ +import string +import types + from httplib import HTTPConnection, HTTP from xmlrpclib import Transport from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler -import xmlrpclib, socket, os, traceback +import xmlrpclib, socket, os, stat import SocketServer + +import xen.xend.XendClient +from xen.xend.XendLogging import log + # A new ServerProxy that also supports httpu urls. An http URL comes in the # form: @@ -48,6 +55,18 @@ class UnixTransport(Transport): def make_connection(self, host): return HTTPUnix(self.__handler) + +# See _marshalled_dispatch below. +def conv_string(x): + if (isinstance(x, types.StringType) or + isinstance(x, unicode)): + s = string.replace(x, "'", r"\047") + exec "s = '" + s + "'" + return s + else: + return x + + class ServerProxy(xmlrpclib.ServerProxy): def __init__(self, uri, transport=None, encoding=None, verbose=0, allow_none=1): @@ -59,9 +78,18 @@ class ServerProxy(xmlrpclib.ServerProxy) xmlrpclib.ServerProxy.__init__(self, uri, transport, encoding, verbose, allow_none) + + def __request(self, methodname, params): + response = xmlrpclib.ServerProxy.__request(self, methodname, params) + + if isinstance(response, tuple): + return tuple([conv_string(x) for x in response]) + else: + return conv_string(response) + + # This is a base XML-RPC server for TCP. It sets allow_reuse_address to -# true, and has an improved marshaller that serializes unknown exceptions -# with full traceback information. +# true, and has an improved marshaller that logs and serializes exceptions. class TCPXMLRPCServer(SocketServer.ThreadingMixIn, SimpleXMLRPCServer): allow_reuse_address = True @@ -74,16 +102,28 @@ class TCPXMLRPCServer(SocketServer.Threa else: response = self._dispatch(method, params) + # With either Unicode or normal strings, we can only transmit + # \t, \n, \r, \u0020-\ud7ff, \ue000-\ufffd, and \u10000-\u10ffff + # in an XML document. xmlrpclib does not escape these values + # properly, and then breaks when it comes to parse the document. + # To hack around this problem, we use repr here and exec above + # to transmit the string using Python encoding. + # Thanks to David Mertz <mertz@xxxxxxxxx> for the trick (buried + # in xml_pickle.py). + if (isinstance(response, types.StringType) or + isinstance(response, unicode)): + response = repr(response)[1:-1] + response = (response,) response = xmlrpclib.dumps(response, methodresponse=1, allow_none=1) except xmlrpclib.Fault, fault: response = xmlrpclib.dumps(fault) - except: + except Exception, exn: + log.exception(exn) response = xmlrpclib.dumps( - xmlrpclib.Fault(1, traceback.format_exc()) - ) + xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn))) return response @@ -102,10 +142,13 @@ class UnixXMLRPCServer(TCPXMLRPCServer): address_family = socket.AF_UNIX def __init__(self, addr, logRequests): - if self.allow_reuse_address: - try: + parent = os.path.dirname(addr) + if os.path.exists(parent): + os.chown(parent, os.geteuid(), os.getegid()) + os.chmod(parent, stat.S_IRWXU) + if self.allow_reuse_address and os.path.exists(addr): os.unlink(addr) - except OSError, exc: - pass + else: + os.makedirs(parent, stat.S_IRWXU) TCPXMLRPCServer.__init__(self, addr, UnixXMLRPCRequestHandler, logRequests) diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xend/XendClient.py --- a/tools/python/xen/xend/XendClient.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xend/XendClient.py Tue Apr 11 18:54:18 2006 -0600 @@ -19,10 +19,10 @@ from xen.util.xmlrpclib2 import ServerProxy -XML_RPC_SOCKET = "/var/run/xend-xmlrpc.sock" +XML_RPC_SOCKET = "/var/run/xend/xmlrpc.sock" ERROR_INTERNAL = 1 ERROR_GENERIC = 2 ERROR_INVALID_DOMAIN = 3 -server = ServerProxy('httpu:///var/run/xend-xmlrpc.sock') +server = ServerProxy('httpu:///var/run/xend/xmlrpc.sock') diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xend/XendDomain.py Tue Apr 11 18:54:18 2006 -0600 @@ -34,7 +34,7 @@ import XendDomainInfo from xen.xend import XendRoot from xen.xend import XendCheckpoint -from xen.xend.XendError import XendError +from xen.xend.XendError import XendError, XendInvalidDomain from xen.xend.XendLogging import log from xen.xend.xenstore.xstransact import xstransact from xen.xend.xenstore.xswatch import xswatch @@ -357,6 +357,8 @@ class XendDomain: """Unpause domain execution.""" try: dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) log.info("Domain %s (%d) unpaused.", dominfo.getName(), dominfo.getDomid()) return dominfo.unpause() @@ -368,6 +370,8 @@ class XendDomain: """Pause domain execution.""" try: dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) log.info("Domain %s (%d) paused.", dominfo.getName(), dominfo.getDomid()) return dominfo.pause() @@ -395,6 +399,8 @@ class XendDomain: """Start domain migration.""" dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) if dominfo.getDomid() == PRIV_DOMAIN: raise XendError("Cannot migrate privileged domain %i" % domid) @@ -420,6 +426,8 @@ class XendDomain: try: dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) if dominfo.getDomid() == PRIV_DOMAIN: raise XendError("Cannot save privileged domain %i" % domid) @@ -440,9 +448,9 @@ class XendDomain: @param cpumap: string repr of list of usable cpus """ dominfo = self.domain_lookup_by_name_or_id_nr(domid) - # convert cpumap string into a list of ints - cpumap = map(lambda x: int(x), - cpumap.replace("[", "").replace("]", "").split(",")) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: return xc.vcpu_setaffinity(dominfo.getDomid(), vcpu, cpumap) except Exception, ex: @@ -453,6 +461,8 @@ class XendDomain: """Set BVT (Borrowed Virtual Time) scheduler parameters for a domain. """ dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) try: return xc.bvtsched_domain_set(dom=dominfo.getDomid(), mcuadv=mcuadv, @@ -466,6 +476,8 @@ class XendDomain: """Get BVT (Borrowed Virtual Time) scheduler parameters for a domain. """ dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) try: return xc.bvtsched_domain_get(dominfo.getDomid()) except Exception, ex: @@ -477,6 +489,8 @@ class XendDomain: """Set Simple EDF scheduler parameters for a domain. """ dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) try: return xc.sedf_domain_set(dominfo.getDomid(), period, slice_, latency, extratime, weight) @@ -487,8 +501,9 @@ class XendDomain: """Get Simple EDF scheduler parameters for a domain. """ dominfo = self.domain_lookup_by_name_or_id_nr(domid) - try: - + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: sedf_info = xc.sedf_domain_get(dominfo.getDomid()) # return sxpr return ['sedf', @@ -509,6 +524,8 @@ class XendDomain: @return: 0 on success, -1 on error """ dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) maxmem = int(mem) * 1024 try: return xc.domain_setmaxmem(dominfo.getDomid(), maxmem) @@ -523,6 +540,8 @@ class XendDomain: @return: 0 on success, -1 on error """ dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) nr_ports = last - first + 1 try: return xc.domain_ioport_permission(dominfo.getDomid(), @@ -540,6 +559,8 @@ class XendDomain: @return: 0 on success, -1 on error """ dominfo = self.domain_lookup_by_name_or_id_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) nr_ports = last - first + 1 try: return xc.domain_ioport_permission(dominfo.getDomid(), diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue Apr 11 18:54:18 2006 -0600 @@ -333,7 +333,7 @@ def parseConfig(config): else: # remove this element from the list if c[0] == '^': - cpus = [x for x in cpus if x != int(c[1])] + cpus = [x for x in cpus if x != int(c[1:])] else: cpus.append(int(c)) diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xend/balloon.py Tue Apr 11 18:54:18 2006 -0600 @@ -152,7 +152,7 @@ def free(required): 'I cannot release any more. I need %d MiB but ' 'only have %d.') % (need_mem, free_mem)) - elif new_alloc >= dom0_min_mem: + elif new_alloc < dom0_min_mem: raise VmError( ('I need %d MiB, but dom0_min_mem is %d and shrinking to ' '%d MiB would leave only %d MiB free.') % diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xend/server/DevController.py Tue Apr 11 18:54:18 2006 -0600 @@ -85,6 +85,13 @@ class DevController: front) import xen.xend.XendDomain + xd = xen.xend.XendDomain.instance() + backdom_name = sxp.child_value(config, 'backend') + if backdom_name is None: + backdom = xen.xend.XendDomain.PRIV_DOMAIN + else: + bd = xd.domain_lookup_by_name_or_id_nr(backdom_name) + backdom = bd.getDomid() count = 0 while True: t = xstransact() @@ -112,9 +119,13 @@ class DevController: t.mkdir(backpath) t.set_permissions(backpath, - {'dom': xen.xend.XendDomain.PRIV_DOMAIN }, + {'dom': backdom }, {'dom' : self.vm.getDomid(), 'read' : True }) + t.mkdir(frontpath) + t.set_permissions(frontpath, + {'dom': self.vm.getDomid()}, + {'dom': backdom, 'read': True}) t.write2(frontpath, front) t.write2(backpath, back) diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xend/server/XMLRPCServer.py --- a/tools/python/xen/xend/server/XMLRPCServer.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xend/server/XMLRPCServer.py Tue Apr 11 18:54:18 2006 -0600 @@ -24,6 +24,7 @@ from xen.util.xmlrpclib2 import UnixXMLR from xen.xend.XendClient import XML_RPC_SOCKET, ERROR_INVALID_DOMAIN from xen.xend.XendError import * +from types import ListType def lookup(domid): info = XendDomain.instance().domain_lookup_by_name_or_id(domid) @@ -35,24 +36,36 @@ def dispatch(domid, fn, args): info = lookup(domid) return getattr(info, fn)(*args) +# vcpu_avail is a long and is not needed by the clients. It's far easier +# to just remove it then to try and marshal the long. +def fixup_sxpr(sexpr): + ret = [] + for k in sexpr: + if type(k) is ListType: + if len(k) != 2 or k[0] != 'vcpu_avail': + ret.append(fixup_sxpr(k)) + else: + ret.append(k) + return ret + def domain(domid): info = lookup(domid) - return info.sxpr() + return fixup_sxpr(info.sxpr()) def domains(detail=1): if detail < 1: return XendDomain.instance().list_names() else: domains = XendDomain.instance().list_sorted() - return map(lambda dom: dom.sxpr(), domains) + return map(lambda dom: fixup_sxpr(dom.sxpr()), domains) def domain_create(config): info = XendDomain.instance().domain_create(config) - return info.sxpr() + return fixup_sxpr(info.sxpr()) def domain_restore(src): info = XendDomain.instance().domain_restore(src) - return info.sxpr() + return fixup_sxpr(info.sxpr()) def get_log(): f = open(XendLogging.getLogFilename(), 'r') diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xend/server/netif.py --- a/tools/python/xen/xend/server/netif.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xend/server/netif.py Tue Apr 11 18:54:18 2006 -0600 @@ -22,6 +22,7 @@ import os import random +import re from xen.xend import sxp from xen.xend import XendRoot @@ -50,6 +51,86 @@ def randomMAC(): random.randint(0x00, 0xff) ] return ':'.join(map(lambda x: "%02x" % x, mac)) +rate_re = re.compile("^([0-9]+)([GMK]?)([Bb])/s(@([0-9]+)([mu]?)s)?$") + +def parseRate(ratestr): + """if parsing fails this will return default of unlimited rate""" + bytes_per_interval = 0xffffffffL # 0xffffffff # big default + interval_usecs = 0L # disabled + + m = rate_re.match(ratestr) + if m: + bytes_per_sec = long(m.group(1)) + + if m.group(2) == 'G': + bytes_per_sec *= 1000 * 1000 * 1000 + elif m.group(2) == 'M': + bytes_per_sec *= 1000 * 1000 + elif m.group(2) == 'K': + bytes_per_sec *= 1000 + + if m.group(3) == 'b': + bytes_per_sec /= 8 + + if m.group(5) is None: + interval_usecs = 50000L # 50ms default + else: + interval_usecs = long(m.group(5)) + if m.group(6) == '': + interval_usecs *= 1000 * 1000 + elif m.group(6) == 'm': + interval_usecs *= 1000 + + bytes_per_interval = (bytes_per_sec * interval_usecs) / 1000000L + + # overflow / underflow checking: default to unlimited rate + if bytes_per_interval == 0 or bytes_per_interval > 0xffffffffL or \ + interval_usecs == 0 or interval_usecs > 0xffffffffL: + bytes_per_interval = 0xffffffffL + interval_usecs = 0L + + return "%lu,%lu" % (bytes_per_interval, interval_usecs) + + +write_rate_G_re = re.compile('^([0-9]+)000000000(B/s@[0-9]+us)$') +write_rate_M_re = re.compile('^([0-9]+)000000(B/s@[0-9]+us)$') +write_rate_K_re = re.compile('^([0-9]+)000(B/s@[0-9]+us)$') +write_rate_s_re = re.compile('^([0-9]+[GMK]?B/s@[0-9]+)000000us$') +write_rate_m_re = re.compile('^([0-9]+[GMK]?B/s@[0-9]+)000us$') + +def formatRate(rate): + (bytes_per_interval, interval_usecs) = map(long, rate.split(',')) + + if interval_usecs != 0: + bytes_per_second = (bytes_per_interval * 1000 * 1000) / interval_usecs + else: + bytes_per_second = 0xffffffffL + + ratestr = "%uB/s@%uus" % (bytes_per_second, interval_usecs) + + # look for '000's + m = write_rate_G_re.match(ratestr) + if m: + ratestr = m.group(1) + "G" + m.group(2) + else: + m = write_rate_M_re.match(ratestr) + if m: + ratestr = m.group(1) + "M" + m.group(2) + else: + m = write_rate_K_re.match(ratestr) + if m: + ratestr = m.group(1) + "K" + m.group(2) + + m = write_rate_s_re.match(ratestr) + if m: + ratestr = m.group(1) + "s" + else: + m = write_rate_m_re.match(ratestr) + if m: + ratestr = m.group(1) + "ms" + + return ratestr + class NetifController(DevController): """Network interface controller. Handles all network devices for a domain. @@ -75,6 +156,7 @@ class NetifController(DevController): bridge = sxp.child_value(config, 'bridge') mac = sxp.child_value(config, 'mac') vifname = sxp.child_value(config, 'vifname') + rate = sxp.child_value(config, 'rate') ipaddr = _get_config_ipaddr(config) devid = self.allocateDeviceID() @@ -98,6 +180,8 @@ class NetifController(DevController): back['bridge'] = bridge if vifname: back['vifname'] = vifname + if rate: + back['rate'] = parseRate(rate) return (devid, back, front) @@ -107,8 +191,8 @@ class NetifController(DevController): result = DevController.configuration(self, devid) - (script, ip, bridge, mac, typ, vifname) = self.readBackend( - devid, 'script', 'ip', 'bridge', 'mac', 'type', 'vifname') + (script, ip, bridge, mac, typ, vifname, rate) = self.readBackend( + devid, 'script', 'ip', 'bridge', 'mac', 'type', 'vifname', 'rate') if script: result.append(['script', @@ -125,5 +209,7 @@ class NetifController(DevController): result.append(['type', typ]) if vifname: result.append(['vifname', vifname]) + if rate: + result.append(['rate', formatRate(rate)]) return result diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xend/server/pciif.py Tue Apr 11 18:54:18 2006 -0600 @@ -115,7 +115,7 @@ class PciController(DevController): dev = PciDevice(domain, bus, slot, func) except Exception, e: raise VmError("pci: failed to locate device and "+ - "parse it's resources - %s"+str(e)) + "parse it's resources - "+str(e)) if dev.driver!='pciback': raise VmError(("pci: PCI Backend does not own device "+ \ @@ -131,7 +131,7 @@ class PciController(DevController): nr_ports = size, allow_access = True) if rc<0: raise VmError(('pci: failed to configure I/O ports on device '+ - '%s - errno=%d')&(dev.name,rc)) + '%s - errno=%d')%(dev.name,rc)) for (start, size) in dev.iomem: # Convert start/size from bytes to page frame sizes @@ -147,7 +147,7 @@ class PciController(DevController): allow_access = True) if rc<0: raise VmError(('pci: failed to configure I/O memory on device '+ - '%s - errno=%d')&(dev.name,rc)) + '%s - errno=%d')%(dev.name,rc)) if dev.irq>0: log.debug('pci: enabling irq %d'%dev.irq) @@ -155,7 +155,7 @@ class PciController(DevController): allow_access = True) if rc<0: raise VmError(('pci: failed to configure irq on device '+ - '%s - errno=%d')&(dev.name,rc)) + '%s - errno=%d')%(dev.name,rc)) def waitForBackend(self,devid): return (0, "ok - no hotplug") diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xend/tests/test_uuid.py --- a/tools/python/xen/xend/tests/test_uuid.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xend/tests/test_uuid.py Tue Apr 11 18:54:18 2006 -0600 @@ -21,9 +21,9 @@ class test_uuid(unittest.TestCase): self.assertEqual(uuid.toString(inp), expected) self.assertEqual(uuid.fromString(expected), inp) - t([0 for _ in range(0, 16)], "00000000-00000000-00000000-00000000") + t([0 for _ in range(0, 16)], "00000000-0000-0000-0000-000000000000") t([185, 158, 125, 206, 250, 178, 125, 57, 2, 6, 162, 74, 178, 236, - 196, 5], "b99e7dce-fab27d39-0206a24a-b2ecc405") + 196, 5], "b99e7dce-fab2-7d39-0206-a24ab2ecc405") def test_suite(): diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xm/create.py Tue Apr 11 18:54:18 2006 -0600 @@ -27,9 +27,11 @@ import commands import commands import time import re +import xmlrpclib from xen.xend import sxp from xen.xend import PrettyPrint +import xen.xend.XendClient from xen.xend.XendClient import server from xen.xend.XendBootloader import bootloader from xen.util import blkif @@ -550,7 +552,7 @@ def configure_vifs(config_devs, vals): def f(k): if k not in ['backend', 'bridge', 'ip', 'mac', 'script', 'type', - 'vifname']: + 'vifname', 'rate']: err('Invalid vif option: ' + k) config_vif.append([k, d[k]]) @@ -814,6 +816,14 @@ def make_domain(opts, config): try: dominfo = server.xend.domain.create(config) + except xmlrpclib.Fault, ex: + import signal + if vncpid: + os.kill(vncpid, signal.SIGKILL) + if ex.faultCode == xen.xend.XendClient.ERROR_INVALID_DOMAIN: + err("the domain '%s' does not exist." % ex.faultString) + else: + err("%s" % ex.faultString) except Exception, ex: import signal if vncpid: @@ -824,6 +834,9 @@ def make_domain(opts, config): try: server.xend.domain.waitForDevices(dom) + except xmlrpclib.Fault, ex: + server.xend.domain.destroy(dom) + err("%s" % ex.faultString) except: server.xend.domain.destroy(dom) err("Device creation failed for domain %s" % dom) @@ -836,6 +849,18 @@ def make_domain(opts, config): err("Failed to unpause domain %s" % dom) opts.info("Started domain %s" % (dom)) return int(sxp.child_value(dominfo, 'domid')) + + +def get_xauthority(): + xauth = os.getenv("XAUTHORITY") + if not xauth: + home = os.getenv("HOME") + if not home: + import posix, pwd + home = pwd.getpwuid(posix.getuid())[5] + xauth = home + "/.Xauthority" + return xauth + def parseCommandLine(argv): gopts.reset() @@ -851,7 +876,7 @@ def parseCommandLine(argv): gopts.vals.display = os.getenv("DISPLAY") if not gopts.vals.xauthority: - gopts.vals.xauthority = os.getenv("XAUTHORITY") + gopts.vals.xauthority = get_xauthority() # Process remaining args as config variables. for arg in args: diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xm/main.py Tue Apr 11 18:54:18 2006 -0600 @@ -291,18 +291,6 @@ def err(msg): def err(msg): print >>sys.stderr, "Error:", msg -def handle_xend_error(cmd, args, ex): - non_option = filter(lambda x: x[0] != '-', args) - dom = len(non_option) > 0 and non_option[0] or None - - error = str(ex) - if error == "Not found" and dom != None: - err("Domain '%s' not found when running 'xm %s'" % (dom, cmd)) - else: - err(error) - - sys.exit(1) - ######################################################################### # @@ -1106,9 +1094,10 @@ def main(argv=sys.argv): sys.exit(1) except xmlrpclib.Fault, ex: if ex.faultCode == xen.xend.XendClient.ERROR_INVALID_DOMAIN: - print "Error: the domain '%s' does not exist." % ex.faultString + print >>sys.stderr, ( + "Error: the domain '%s' does not exist." % ex.faultString) else: - print "Error: %s" % ex.faultString + print >>sys.stderr, "Error: %s" % ex.faultString sys.exit(1) except: print "Unexpected error:", sys.exc_info()[0] diff -r 5719550652a1 -r 5cc367720223 tools/python/xen/xm/tests/test_create.py --- a/tools/python/xen/xm/tests/test_create.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/python/xen/xm/tests/test_create.py Tue Apr 11 18:54:18 2006 -0600 @@ -51,6 +51,7 @@ class test_create(unittest.TestCase): 'path' : '.:/etc/xen', 'builder' : 'linux', 'nics' : -1, + 'xauthority': xen.xm.create.get_xauthority(), }) @@ -99,6 +100,8 @@ on_crash = 'destroy' 'interface' : 'eth0', 'path' : '.:/etc/xen', 'builder' : 'linux', + + 'xauthority' : xen.xm.create.get_xauthority(), }) @@ -138,6 +141,8 @@ cpu_weight = 0.75 'path' : '.:/etc/xen', 'builder' : 'linux', 'nics' : -1, + + 'xauthority' : xen.xm.create.get_xauthority(), }) @@ -188,6 +193,8 @@ ne2000=0 'dhcp' : 'off', 'interface' : 'eth0', 'path' : '.:/etc/xen', + + 'xauthority' : xen.xm.create.get_xauthority(), }) diff -r 5719550652a1 -r 5cc367720223 tools/security/Makefile --- a/tools/security/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/security/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -54,8 +54,10 @@ ACM_LABEL_SUFFIX = security_label_templ ACM_LABEL_SUFFIX = security_label_template.xml ifeq ($(ACM_SECURITY),y) +.PHONY: all all: build +.PHONY: install install: all $(ACM_CONFIG_FILE) $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin $(INSTALL_PROG) -p $(ACM_INST_TOOLS) $(DESTDIR)/usr/sbin @@ -79,11 +81,14 @@ else python python/setup.py install --root="$(DESTDIR)" endif else +.PHONY: all all: +.PHONY: install install: endif +.PHONY: build build: mk-symlinks $(ACM_INST_TOOLS) $(ACM_NOINST_TOOLS) python python/setup.py build chmod 700 $(ACM_SCRIPTS) @@ -100,6 +105,7 @@ xensec_gen: xensec_gen.py xensec_gen: xensec_gen.py cp -f $^ $@ +.PHONY: clean clean: $(RM) $(ACM_INST_TOOLS) $(ACM_NOINST_TOOLS) $(RM) $(ACM_OBJS) @@ -107,8 +113,10 @@ clean: $(RM) -r xen $(RM) -r build +.PHONY: mrproper mrproper: clean +.PHONY: boot_install boot_install: install $(ACM_SCRIPT_DIR)/updategrub.sh $(POLICY) $(KERNEL_VERSION) diff -r 5719550652a1 -r 5cc367720223 tools/sv/Makefile --- a/tools/sv/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/sv/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,2 +1,3 @@ +.PHONY: all all: diff -r 5719550652a1 -r 5cc367720223 tools/tests/Makefile --- a/tools/tests/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/tests/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -6,14 +6,17 @@ TARGET := test_x86_emulator HOSTCFLAGS += -D__TEST_HARNESS__ +.PHONY: all all: $(TARGET) $(TARGET): x86_emulate.o test_x86_emulator.o $(HOSTCC) -o $@ $^ +.PHONY: clean clean: rm -rf $(TARGET) *.o *~ core +.PHONY: install install: x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/x86_emulate.c diff -r 5719550652a1 -r 5cc367720223 tools/vnet/Makefile --- a/tools/vnet/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vnet/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -4,10 +4,6 @@ export VNET_ROOT = $(shell pwd) export VNET_ROOT = $(shell pwd) include $(VNET_ROOT)/Make.env endif - -.PHONY: all compile install dist clean pristine -.PHONY: gc-all gc-install gc-clean -.PHONY: help SUBDIRS:= SUBDIRS+= examples @@ -17,11 +13,13 @@ SUBDIRS+= vnetd SUBDIRS+= vnetd SUBDIRS+= vnet-module +.PHONY: all all: compile gc.tar.gz: wget http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/$@ +.PHONY: gc gc: gc.tar.gz tar xfz gc.tar.gz ln -sf gc?.? gc @@ -31,10 +29,13 @@ gc: gc.tar.gz make -C gc DESTDIR="" make -C gc install +.PHONY: gc-all gc-all: $(GC_LIB_A) +.PHONY: gc-install gc-install: +.PHONY: gc-clean gc-clean: -@$(RM) -r gc?.? gc @@ -50,19 +51,25 @@ subtgt = $(patsubst %,%-$(1),$(SUBDIRS)) %-install: $(call submak,install) +.PHONY: compile compile: $(call subtgt,all) +.PHONY: install install: DESTDIR= install: dist +.PHONY: dist dist: compile $(call subtgt,install) +.PHONY: clean clean: $(call subtgt,clean) -@$(RM) -r build +.PHONY: pristine pristine: clean -@$(RM) gc.tar.gz +.PHONY: help help: @echo 'Cleaning targets:' @echo ' clean - clean subdirs and remove the build dir' diff -r 5719550652a1 -r 5cc367720223 tools/vnet/doc/Makefile --- a/tools/vnet/doc/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vnet/doc/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -23,8 +23,10 @@ DOC_MAN5 := $(patsubst man/%.pod.5,man5/ .PHONY: all man clean install +.PHONY: all all: man +.PHONY: man man: @if which $(POD2MAN) 1>/dev/null 2>/dev/null; then \ $(MAKE) $(DOC_MAN1) $(DOC_MAN5); fi @@ -39,10 +41,12 @@ man5/%.5: man/%.pod.5 Makefile $(POD2MAN) --release=$(VERSION) --name=`echo $@ | sed 's/^man5.//'| \ sed 's/.5//'` -s 5 -c $(HEADER) $< $@ +.PHONY: clean clean: @$(RM) -rf man5 @$(RM) -rf man1 +.PHONY: install install: all $(INSTALL_DIR) $(DESTDIR)$(MAN_DIR) $(CP) -dR man1 $(DESTDIR)$(MAN_DIR) diff -r 5719550652a1 -r 5cc367720223 tools/vnet/examples/Makefile --- a/tools/vnet/examples/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vnet/examples/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -7,13 +7,14 @@ INSTALL_DIR = $(INSTALL) -d -m0755 XEN_SCRIPT_DIR = $(DESTDIR)/etc/xen/scripts -.PHONY: all install clean - +.PHONY: all all: +.PHONY: install install: $(INSTALL_DIR) $(XEN_SCRIPT_DIR) $(INSTALL_PROG) network-vnet $(XEN_SCRIPT_DIR) $(INSTALL_PROG) vnet-insert $(XEN_SCRIPT_DIR) -clean: \ No newline at end of file +.PHONY: clean +clean: diff -r 5719550652a1 -r 5cc367720223 tools/vnet/libxutil/Makefile --- a/tools/vnet/libxutil/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vnet/libxutil/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -42,8 +42,10 @@ LIB += libxutil.so.$(MAJOR).$(MINOR LIB += libxutil.so.$(MAJOR).$(MINOR) LIB += libxutil.a +.PHONY: all all: build +.PHONY: build build: #check-for-zlib $(MAKE) $(LIB) @@ -61,6 +63,7 @@ libxutil.a: $(LIB_OBJS) libxutil.a: $(LIB_OBJS) $(AR) rc $@ $^ +.PHONY: check-for-zlib check-for-zlib: @if [ ! -e /usr/include/zlib.h ]; then \ echo "***********************************************************"; \ @@ -69,6 +72,7 @@ check-for-zlib: false; \ fi +.PHONY: install install: build [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) $(INSTALL_PROG) libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR) @@ -76,6 +80,7 @@ install: build ln -sf libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxutil.so.$(MAJOR) ln -sf libxutil.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxutil.so +.PHONY: clean clean: -@$(RM) *.a *.so* *.o *.opic *.rpm -@$(RM) *~ diff -r 5719550652a1 -r 5cc367720223 tools/vnet/scripts/Makefile --- a/tools/vnet/scripts/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vnet/scripts/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -7,12 +7,13 @@ INSTALL_DIR = $(INSTALL) -d -m0755 SBIN_DIR = $(DESTDIR)/usr/sbin -.PHONY: all install clean - +.PHONY: all all: +.PHONY: install install: $(INSTALL_DIR) $(SBIN_DIR) $(INSTALL_PROG) vn $(SBIN_DIR) -clean: \ No newline at end of file +.PHONY: clean +clean: diff -r 5719550652a1 -r 5cc367720223 tools/vnet/vnetd/Makefile --- a/tools/vnet/vnetd/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vnet/vnetd/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -19,6 +19,7 @@ VNET_ROOT = $(shell cd .. && pwd) VNET_ROOT = $(shell cd .. && pwd) include $(VNET_ROOT)/Make.env +.PHONY: all all: vnetd #---------------------------------------------------------------------------- @@ -104,10 +105,12 @@ vnetd: $(VNETD_OBJ) vnetd: $(VNETD_OBJ) $(CC) $(CFLAGS) -o $@ $^ $(VNETD_LIBS) -ldl -lpthread +.PHONY: install install: vnetd mkdir -p $(DESTDIR)$(VNETD_INSTALL_DIR) install -m 0755 vnetd $(DESTDIR)$(VNETD_INSTALL_DIR) +.PHONY: clean clean: -@$(RM) *.a *.o *~ -@$(RM) vnetd diff -r 5719550652a1 -r 5cc367720223 tools/vtpm/Makefile --- a/tools/vtpm/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vtpm/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -13,14 +13,18 @@ TPM_EMULATOR_TARFILE = tpm_emulator-0.2b GMP_HEADER = /usr/include/gmp.h +.PHONY: all all: build +.PHONY: build build: $(TPM_EMULATOR_DIR) $(VTPM_DIR) build_sub +.PHONY: install install: build $(MAKE) -C $(TPM_EMULATOR_DIR) $@ $(MAKE) -C $(VTPM_DIR) $@ +.PHONY: clean clean: @if [ -d $(TPM_EMULATOR_DIR) ]; \ then $(MAKE) -C $(TPM_EMULATOR_DIR) clean; \ @@ -29,6 +33,7 @@ clean: then $(MAKE) -C $(VTPM_DIR) clean; \ fi +.PHONY: mrproper mrproper: rm -f $(TPM_EMULATOR_TARFILE) rm -rf $(TPM_EMULATOR_DIR) @@ -58,6 +63,7 @@ mrproper: patch -p1 < ../tpm_emulator-0.2b-x86_64.patch; \ patch -p1 <../vtpm.patch +.PHONY: build_sub build_sub: @if [ -e $(GMP_HEADER) ]; then \ $(MAKE) -C $(VTPM_DIR); \ diff -r 5719550652a1 -r 5cc367720223 tools/vtpm_manager/Makefile --- a/tools/vtpm_manager/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vtpm_manager/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -6,8 +6,10 @@ SUBDIRS = crypto tcs util manager SUBDIRS = crypto tcs util manager OPENSSL_HEADER = /usr/include/openssl/crypto.h +.PHONY: all all: build +.PHONY: build build: @if [ -e $(OPENSSL_HEADER) ]; then \ @set -e; for subdir in $(SUBDIRS); do \ @@ -17,17 +19,20 @@ build: echo "*** Cannot build vtpm_manager: OpenSSL developement files missing."; \ fi +.PHONY: install install: build @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ done +.PHONY: clean clean: @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ done +.PHONY: mrproper mrproper: @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ diff -r 5719550652a1 -r 5cc367720223 tools/vtpm_manager/crypto/Makefile --- a/tools/vtpm_manager/crypto/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vtpm_manager/crypto/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -3,15 +3,20 @@ include $(XEN_ROOT)/tools/vtpm_manager/R BIN = libtcpaCrypto.a +.PHONY: all all: build +.PHONY: build build: $(BIN) +.PHONY: install install: build +.PHONY: clean clean: rm -f *.a *.so *.o *.rpm $(DEP_FILES) +.PHONY: mrproper mrproper: clean rm -f *~ diff -r 5719550652a1 -r 5cc367720223 tools/vtpm_manager/manager/Makefile --- a/tools/vtpm_manager/manager/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vtpm_manager/manager/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -3,19 +3,24 @@ include $(XEN_ROOT)/tools/vtpm_manager/R BIN = vtpm_managerd +.PHONY: all all: build +.PHONY: build build: $(BIN) +.PHONY: install install: build if [ ! -d "$(DESTDIR)/var/vtpm/fifos" ]; \ then mkdir -p $(DESTDIR)/var/vtpm/fifos; \ fi $(INSTALL_PROG) $(BIN) $(TOOLS_INSTALL_DIR) +.PHONY: clean clean: rm -f *.a *.so *.o *.rpm $(DEP_FILES) +.PHONY: mrproper mrproper: clean rm -f $(BIN) *~ diff -r 5719550652a1 -r 5cc367720223 tools/vtpm_manager/tcs/Makefile --- a/tools/vtpm_manager/tcs/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vtpm_manager/tcs/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -3,15 +3,20 @@ include $(XEN_ROOT)/tools/vtpm_manager/R BIN = libTCS.a +.PHONY: all all: build +.PHONY: build build: $(BIN) +.PHONY: install install: build +.PHONY: clean clean: rm -f *.a *.so *.o *.rpm $(DEP_FILES) +.PHONY: mrproper mrproper: clean rm -f *~ diff -r 5719550652a1 -r 5cc367720223 tools/vtpm_manager/util/Makefile --- a/tools/vtpm_manager/util/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/vtpm_manager/util/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -3,15 +3,20 @@ include $(XEN_ROOT)/tools/vtpm_manager/R BIN = libTCGUtils.a +.PHONY: all all: build +.PHONY: build build: $(BIN) +.PHONY: install install: build +.PHONY: clean clean: rm -f *.a *.so *.o *.rpm $(DEP_FILES) +.PHONY: mrproper mrproper: clean rm -f *~ diff -r 5719550652a1 -r 5cc367720223 tools/xcutils/Makefile --- a/tools/xcutils/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xcutils/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -32,6 +32,8 @@ LDLIBS = -L$(XEN_LIBXC) -lxenguest -lx .PHONY: all all: build + +.PHONY: build build: $(PROGRAMS) $(PROGRAMS): %: %.o @@ -44,6 +46,7 @@ install: build $(INSTALL_PROG) $(PROGRAMS) $(DESTDIR)$(PROGRAMS_INSTALL_DIR) +.PHONY: clean clean: $(RM) *.o $(PROGRAMS) $(RM) $(PROG_DEP) diff -r 5719550652a1 -r 5cc367720223 tools/xenmon/Makefile --- a/tools/xenmon/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xenmon/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -28,16 +28,20 @@ BIN = setmask xenbaked BIN = setmask xenbaked SCRIPTS = xenmon.py +.PHONY: all all: build +.PHONY: build build: $(BIN) +.PHONY: install install: xenbaked setmask [ -d $(DESTDIR)$(sbindir) ] || $(INSTALL_DIR) $(DESTDIR)$(sbindir) $(INSTALL_PROG) xenbaked $(DESTDIR)$(sbindir)/xenbaked $(INSTALL_PROG) setmask $(DESTDIR)$(sbindir)/setmask $(INSTALL_PROG) xenmon.py $(DESTDIR)$(sbindir)/xenmon.py +.PHONY: clean clean: rm -f $(BIN) diff -r 5719550652a1 -r 5cc367720223 tools/xenstat/libxenstat/Makefile --- a/tools/xenstat/libxenstat/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xenstat/libxenstat/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -41,6 +41,7 @@ CFLAGS+=-Isrc -I$(XEN_LIBXC) -I$(XEN_XEN CFLAGS+=-Isrc -I$(XEN_LIBXC) -I$(XEN_XENSTORE) LDFLAGS+=-Lsrc +.PHONY: all all: $(LIB) $(LIB): $(OBJECTS) @@ -62,6 +63,7 @@ src/libxenstat.so: src/libxenstat.so.$(M src/libxenstat.so: src/libxenstat.so.$(MAJOR) $(MAKE_LINK) $(<F) $@ +.PHONY: install install: all #install: all # $(INSTALL_DATA) src/xenstat.h $(DESTDIR)$(includedir)/xenstat.h @@ -84,6 +86,7 @@ BINDINGSRC=$(PYSRC) $(PERLSRC) BINDINGSRC=$(PYSRC) $(PERLSRC) # The all-bindings target builds all the language bindings +.PHONY: all-bindings all-bindings: perl-bindings python-bindings # The install-bindings target installs all the language bindings @@ -110,6 +113,7 @@ install-python-bindings: $(PYLIB) $(PYMO $(INSTALL_PROG) $(PYMOD) $(DESTDIR)$(pythonlibdir)/xenstat.py ifeq ($(XENSTAT_PYTHON_BINDINGS),y) +.PHONY: all all: python-bindings install: install-python-bindings endif @@ -122,8 +126,10 @@ PERL_FLAGS=`perl -MConfig -e 'print "$$C $(PERLLIB): $(PERLSRC) $(CC) $(CFLAGS) $(LDFLAGS) $(PERL_FLAGS) -shared -lxenstat -o $@ $< +.PHONY: perl-bindings perl-bindings: $(PERLLIB) $(PERLMOD) +.PHONY: install-perl-bindings perllibdir=$(prefix)/lib/perl5 perlmoddir=$(prefix)/share/perl5 install-perl-bindings: $(PERLLIB) $(PERLMOD) @@ -131,10 +137,14 @@ install-perl-bindings: $(PERLLIB) $(PERL $(INSTALL_PROG) $(PERLMOD) $(DESTDIR)$(perlmoddir)/xenstat.pm ifeq ($(XENSTAT_PERL_BINDINGS),y) +.PHONY: all all: perl-bindings + +.PHONY: install install: install-perl-bindings endif +.PHONY: clean clean: rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS) \ $(BINDINGS) $(BINDINGSRC) diff -r 5719550652a1 -r 5cc367720223 tools/xenstat/xentop/Makefile --- a/tools/xenstat/xentop/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xenstat/xentop/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -14,6 +14,7 @@ include $(XEN_ROOT)/tools/Rules.mk include $(XEN_ROOT)/tools/Rules.mk ifneq ($(XENSTAT_XENTOP),y) +.PHONY: all install xentop all install xentop: else @@ -30,15 +31,16 @@ LDFLAGS += -L$(XEN_LIBXENSTAT) LDFLAGS += -L$(XEN_LIBXENSTAT) LDLIBS += -lxenstat -lncurses +.PHONY: all all: xentop -xentop: xentop.o - +.PHONY: install install: xentop xentop.1 $(INSTALL_PROG) xentop $(DESTDIR)$(sbindir)/xentop $(INSTALL_DATA) xentop.1 $(DESTDIR)$(man1dir)/xentop.1 endif +.PHONY: clean clean: rm -f xentop xentop.o diff -r 5719550652a1 -r 5cc367720223 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xenstore/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -27,11 +27,13 @@ CLIENTS += xenstore-write CLIENTS += xenstore-write CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS)) +.PHONY: all all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls test_interleaved_transactions: test_interleaved_transactions.o $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ +.PHONY: testcode testcode: xs_test xenstored_test xs_random xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o @@ -63,6 +65,7 @@ xs_crashme: xs_crashme.o xs_lib.o talloc speedtest: speedtest.o xs.o xs_lib.o utils.o talloc.o +.PHONY: check-speed check-speed: speedtest xenstored_test $(TESTDIR) $(TESTENV) time ./speedtest 100 @@ -80,6 +83,7 @@ libxenstore.so: xs.opic xs_lib.opic libxenstore.so: xs.opic xs_lib.opic $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so -shared -o $@ $^ -lpthread +.PHONY: clean clean: testsuite-clean rm -f *.o *.opic *.so rm -f xenstored xs_random xs_stress xs_crashme @@ -87,55 +91,69 @@ clean: testsuite-clean rm -f $(CLIENTS) $(RM) $(PROG_DEP) +.PHONY: print-dir print-dir: @echo -n tools/xenstore: +.PHONY: print-end print-end: @echo +.PHONY: check check: print-dir testsuite-fast randomcheck-fast print-end +.PHONY: fullcheck fullcheck: testsuite-run randomcheck stresstest $(TESTDIR): mkdir $@ +.PHONY: testsuite-run testsuite-run: xenstored_test xs_test $(TESTDIR) $(TESTENV) testsuite/test.sh && echo +.PHONY: testsuite-fast testsuite-fast: xenstored_test xs_test $(TESTDIR) @$(TESTENV) testsuite/test.sh --fast +.PHONY: testsuite-clean testsuite-clean: rm -rf $(TESTDIR) # Make this visible so they can see repeat tests without --fast if they # fail. RANDSEED=$(shell date +%s) +.PHONY: randomcheck randomcheck: xs_random xenstored_test $(TESTDIR) $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000 $(RANDSEED) && echo $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED) && echo # $(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED) +.PHONY: crashme crashme: xs_crashme xenstored_test $(TESTDIR) rm -rf $(TESTDIR)/store $(TESTDIR)/transactions /tmp/xs_crashme.vglog* /tmp/trace export $(TESTENV); ./xs_crashme 5000 $(RANDSEED) 2>/dev/null if [ -n "`cat /tmp/xs_crashme.vglog*`" ]; then echo Valgrind complained; cat /tmp/xs_crashme.vglog*; exit 1; fi rm -rf $(TESTDIR)/store $(TESTDIR)/transactions /tmp/xs_crashme.vglog* /tmp/trace +.PHONY: randomcheck-fast randomcheck-fast: xs_random xenstored_test $(TESTDIR) @$(TESTENV) ./xs_random --fast /tmp/xs_random 2000 $(RANDSEED) +.PHONY: stresstest stresstest: xs_stress xenstored_test $(TESTDIR) rm -rf $(TESTDIR)/store $(TESTDIR)/transactions export $(TESTENV); PID=`./xenstored_test --output-pid --trace-file=/tmp/trace`; ./xs_stress 5000; ret=$$?; kill $$PID; exit $$ret +.PHONY: TAGS TAGS: etags `find . -name '*.[ch]'` +.PHONY: tarball tarball: clean cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/ +.PHONY: install install: all $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored diff -r 5719550652a1 -r 5cc367720223 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xenstore/xenstored_core.c Tue Apr 11 18:54:18 2006 -0600 @@ -469,6 +469,7 @@ static bool write_node(struct connection /* TDB should set errno, but doesn't even set ecode AFAICT. */ if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) { + corrupt(conn, "Write of %s = %s failed", key, data); errno = ENOSPC; return false; } diff -r 5719550652a1 -r 5cc367720223 tools/xentrace/Makefile --- a/tools/xentrace/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xentrace/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -28,9 +28,13 @@ LIBBIN += xenctx LIBBIN += xenctx endif +.PHONY: all all: build + +.PHONY: build build: $(BIN) $(LIBBIN) +.PHONY: install install: build [ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin [ -z "$(LIBBIN)" ] || [ -d $(DESTDIR)/usr/$(LIBDIR)/xen/bin ] || \ @@ -44,6 +48,7 @@ install: build $(INSTALL_DATA) $(MAN1) $(DESTDIR)/usr/share/man/man1 $(INSTALL_DATA) $(MAN8) $(DESTDIR)/usr/share/man/man8 +.PHONY: clean clean: $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN) diff -r 5719550652a1 -r 5cc367720223 tools/xm-test/README --- a/tools/xm-test/README Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xm-test/README Tue Apr 11 18:54:18 2006 -0600 @@ -140,9 +140,11 @@ and test lists as required. Default grou # ./runtest.sh -g <groupname> <logfile> * NOTE: There is a quick set of tests in group mode, that was added to -run certain casenames and tests. It is not a substitute for the full +run certain casenames and tests, and there is a "medium" group, which is a +medium-length run (around 20 minutes). Neither is a substitute for the full xm-test test suite. # ./runtest.sh -g quick <logfile> + # ./runtest.sh -g medium <logfile> diff -r 5719550652a1 -r 5cc367720223 tools/xm-test/tests/vtpm/02_vtpm-cat_pcrs.py --- a/tools/xm-test/tests/vtpm/02_vtpm-cat_pcrs.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xm-test/tests/vtpm/02_vtpm-cat_pcrs.py Tue Apr 11 18:54:18 2006 -0600 @@ -46,6 +46,7 @@ except ConsoleError, e: FAIL(str(e)) if re.search("No such file",run["output"]): + vtpm_cleanup(domName) FAIL("TPM frontend support not compiled into (domU?) kernel") console.closeConsole() diff -r 5719550652a1 -r 5cc367720223 tools/xm-test/tests/vtpm/03_vtpm-susp_res.py --- a/tools/xm-test/tests/vtpm/03_vtpm-susp_res.py Tue Apr 11 13:55:47 2006 -0600 +++ b/tools/xm-test/tests/vtpm/03_vtpm-susp_res.py Tue Apr 11 18:54:18 2006 -0600 @@ -47,6 +47,7 @@ except ConsoleError, e: FAIL(str(e)) if re.search("No such file",run["output"]): + vtpm_cleanup(domName) FAIL("TPM frontend support not compiled into (domU?) kernel") console.closeConsole() diff -r 5719550652a1 -r 5cc367720223 xen/Makefile --- a/xen/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,30 +1,27 @@ INSTALL = install -INSTALL = install -INSTALL_DATA = $(INSTALL) -m0644 -INSTALL_DIR = $(INSTALL) -d -m0755 - # This is the correct place to edit the build version. # All other places this is stored (eg. compile.h) should be autogenerated. -export XEN_VERSION = 3 -export XEN_SUBVERSION = 0 -export XEN_EXTRAVERSION = -unstable -export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) +export XEN_VERSION := 3 +export XEN_SUBVERSION := 0 +export XEN_EXTRAVERSION := -unstable +export XEN_FULLVERSION := $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -export BASEDIR := $(CURDIR) +export BASEDIR := $(CURDIR) -include Rules.mk +.PHONY: default +default: build -default: build -$(TARGET).gz: $(TARGET) - gzip -f -9 < $< > $@.new - mv $@.new $@ +ifeq ($(XEN_ROOT),) -debug: - objdump -D -S $(TARGET)-syms > $(TARGET).s +.PHONY: build install clean +build install clean: + make -f Rules.mk $@ -dist: install +else +.PHONY: build build: $(TARGET).gz +.PHONY: install install: $(TARGET).gz [ -d $(DESTDIR)/boot ] || $(INSTALL_DIR) $(DESTDIR)/boot $(INSTALL_DATA) $(TARGET).gz $(DESTDIR)/boot/$(notdir $(TARGET))-$(XEN_FULLVERSION).gz @@ -38,27 +35,42 @@ install: $(TARGET).gz $(INSTALL_DATA) include/public/io/*.h $(DESTDIR)/usr/include/xen/io $(INSTALL_DATA) include/public/COPYING $(DESTDIR)/usr/include/xen -clean: delete-unfresh-files +.PHONY: clean +clean:: delete-unfresh-files $(MAKE) -C tools clean - $(MAKE) -C common clean - $(MAKE) -C drivers clean - $(MAKE) -C acm clean - $(MAKE) -C arch/$(TARGET_ARCH) clean + $(MAKE) -f $(BASEDIR)/Rules.mk -C common clean + $(MAKE) -f $(BASEDIR)/Rules.mk -C drivers clean + $(MAKE) -f $(BASEDIR)/Rules.mk -C acm clean + $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) clean rm -f include/asm *.o $(TARGET)* *~ core rm -f include/asm-*/asm-offsets.h rm -f include/xen/acm_policy.h +endif + +.PHONY: dist +dist: install + +.PHONY: debug +debug: + objdump -D -S $(TARGET)-syms > $(TARGET).s + +$(TARGET).gz: $(TARGET) + gzip -f -9 < $< > $@.new + mv $@.new $@ + $(TARGET): delete-unfresh-files $(MAKE) -C tools - $(MAKE) include/xen/compile.h - $(MAKE) include/xen/acm_policy.h + $(MAKE) -f $(BASEDIR)/Rules.mk include/xen/compile.h + $(MAKE) -f $(BASEDIR)/Rules.mk include/xen/acm_policy.h [ -e include/asm ] || ln -sf asm-$(TARGET_ARCH) include/asm - $(MAKE) -C arch/$(TARGET_ARCH) asm-offsets.s - $(MAKE) include/asm-$(TARGET_ARCH)/asm-offsets.h - $(MAKE) -C arch/$(TARGET_ARCH) $(TARGET) + $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) asm-offsets.s + $(MAKE) -f $(BASEDIR)/Rules.mk include/asm-$(TARGET_ARCH)/asm-offsets.h + $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) $(TARGET) # drivers/char/console.o contains static banner/compile info. Blow it away. # Don't refresh these files during e.g., 'sudo make install' +.PHONY: delete-unfresh-files delete-unfresh-files: @if [ ! -r include/xen/compile.h -o -O include/xen/compile.h ]; then \ rm -f include/xen/{banner,compile}.h; \ @@ -115,8 +127,6 @@ include/asm-$(TARGET_ARCH)/asm-offsets.h echo ""; \ echo "#endif") <$< >$@ -.PHONY: default debug install dist clean delete-unfresh-files TAGS tags - SUBDIRS = acm arch/$(TARGET_ARCH) common drivers define all_sources ( find include/asm-$(TARGET_ARCH) -name SCCS -prune -o -name '*.h' -print; \ @@ -124,12 +134,20 @@ define all_sources -name config \) -prune -o -name '*.h' -print; \ find $(SUBDIRS) -name SCCS -prune -o -name '*.[chS]' -print ) endef + +.PHONY: TAGS TAGS: $(all_sources) | etags - + +.PHONY: tags tags: $(all_sources) | xargs ctags + +.PHONY: cscope cscope: $(all_sources) > cscope.files cscope -k -b -q + +.PHONY: MAP MAP: $(NM) $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map diff -r 5719550652a1 -r 5cc367720223 xen/Rules.mk --- a/xen/Rules.mk Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/Rules.mk Tue Apr 11 18:54:18 2006 -0600 @@ -26,17 +26,23 @@ override COMPILE_ARCH := $(patsubst x override COMPILE_ARCH := $(patsubst x86%,x86,$(XEN_COMPILE_ARCH)) override TARGET_ARCH := $(patsubst x86%,x86,$(XEN_TARGET_ARCH)) -TARGET := $(BASEDIR)/xen -HDRS := $(wildcard $(BASEDIR)/include/xen/*.h) -HDRS += $(wildcard $(BASEDIR)/include/public/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h) -HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h) -# Do not depend on auto-generated header files. -HDRS := $(subst $(BASEDIR)/include/asm-$(TARGET_ARCH)/asm-offsets.h,,$(HDRS)) -HDRS := $(subst $(BASEDIR)/include/xen/banner.h,,$(HDRS)) -HDRS := $(subst $(BASEDIR)/include/xen/compile.h,,$(HDRS)) +TARGET := $(BASEDIR)/xen + +HDRS := $(wildcard $(BASEDIR)/include/xen/*.h) +HDRS += $(wildcard $(BASEDIR)/include/public/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h) + +INSTALL := install +INSTALL_DATA := $(INSTALL) -m0644 +INSTALL_DIR := $(INSTALL) -d -m0755 include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk + +# Do not depend on auto-generated header files. +HDRS := $(subst $(BASEDIR)/include/asm-$(TARGET_ARCH)/asm-offsets.h,,$(HDRS)) +HDRS := $(subst $(BASEDIR)/include/xen/banner.h,,$(HDRS)) +HDRS := $(subst $(BASEDIR)/include/xen/compile.h,,$(HDRS)) # Note that link order matters! ALL_OBJS-y += $(BASEDIR)/common/built_in.o @@ -51,11 +57,50 @@ CFLAGS-$(perfc) += -DPERF_COUNTER CFLAGS-$(perfc) += -DPERF_COUNTERS CFLAGS-$(perfc_arrays) += -DPERF_ARRAYS +ifneq ($(max_phys_cpus),) +CFLAGS-y += -DMAX_PHYS_CPUS=$(max_phys_cpus) +endif + +AFLAGS-y += -D__ASSEMBLY__ + ALL_OBJS := $(ALL_OBJS-y) CFLAGS := $(strip $(CFLAGS) $(CFLAGS-y)) +AFLAGS := $(strip $(AFLAGS) $(AFLAGS-y)) + +include Makefile + +# Ensure each subdirectory has exactly one trailing slash. +subdir-n := $(patsubst %,%/,$(patsubst %/,%,$(subdir-n))) +subdir-y := $(patsubst %,%/,$(patsubst %/,%,$(subdir-y))) + +# Add explicitly declared subdirectories to the object list. +obj-y += $(patsubst %/,%/built_in.o,$(subdir-y)) + +# Add implicitly declared subdirectories (in the object list) to the +# subdirectory list, and rewrite the object-list entry. +subdir-y += $(filter %/,$(obj-y)) +obj-y := $(patsubst %/,%/built-in.o,$(obj-y)) + +subdir-all := $(subdir-y) $(subdir-n) + +built_in.o: $(obj-y) + $(LD) $(LDFLAGS) -r -o $@ $^ + +# Force execution of pattern rules (for which PHONY cannot be directly used). +.PHONY: FORCE +FORCE: + +%/built_in.o: FORCE + $(MAKE) -f $(BASEDIR)/Rules.mk -C $* built_in.o + +.PHONY: clean +clean:: $(addprefix _clean_, $(subdir-all)) + rm -f *.o *~ core +_clean_%/: FORCE + $(MAKE) -f $(BASEDIR)/Rules.mk -C $* clean %.o: %.c $(HDRS) Makefile $(CC) $(CFLAGS) -c $< -o $@ %.o: %.S $(HDRS) Makefile - $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@ + $(CC) $(CFLAGS) $(AFLAGS) -c $< -o $@ diff -r 5719550652a1 -r 5cc367720223 xen/acm/Makefile --- a/xen/acm/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/acm/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,9 +1,5 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += acm_core.o obj-y += acm_policy.o obj-y += acm_simple_type_enforcement_hooks.o obj-y += acm_chinesewall_hooks.o obj-y += acm_null_hooks.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/Makefile --- a/xen/arch/ia64/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,21 +1,17 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - subdir-y += xen subdir-y += vmx subdir-y += linux subdir-y += linux-xen -include $(BASEDIR)/Post.mk - $(TARGET)-syms: linux-xen/head.o $(ALL_OBJS) xen.lds.s $(LD) $(LDFLAGS) -T xen.lds.s -N \ -Map map.out linux-xen/head.o $(ALL_OBJS) -o $@ $(NM) -n $@ | $(BASEDIR)/tools/symbols > $(BASEDIR)/xen-syms.S - $(MAKE) $(BASEDIR)/xen-syms.o + $(MAKE) -f $(BASEDIR)/Rules.mk $(BASEDIR)/xen-syms.o $(LD) $(LDFLAGS) -T xen.lds.s -N \ -Map map.out linux-xen/head.o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@ $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S - $(MAKE) $(BASEDIR)/xen-syms.o + $(MAKE) -f $(BASEDIR)/Rules.mk $(BASEDIR)/xen-syms.o $(LD) $(LDFLAGS) -T xen.lds.s -N \ -Map map.out linux-xen/head.o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@ rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o @@ -78,10 +74,11 @@ asm-xsi-offsets.s: asm-xsi-offsets.c $(H # I'm sure a Makefile wizard would know a better way to do this xen.lds.s: xen/xen.lds.S - $(CC) -E $(CPPFLAGS) -P -DXEN -D__ASSEMBLY__ \ + $(CC) -E $(CPPFLAGS) -P -DXEN $(AFLAGS) \ -o xen.lds.s xen/xen.lds.S -clean:: FORCE +.PHONY: clean +clean:: rm -f *.o *~ core xen.lds.s $(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s map.out rm -f asm-xsi-offsets.s $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h rm -f $(BASEDIR)/System.map diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/linux-xen/Makefile --- a/xen/arch/ia64/linux-xen/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/linux-xen/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += efi.o obj-y += entry.o obj-y += irq_ia64.o @@ -15,5 +13,3 @@ obj-y += tlb.o obj-y += tlb.o obj-y += unaligned.o obj-y += unwind.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/linux/Makefile --- a/xen/arch/ia64/linux/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/linux/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,6 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - - obj-y += bitop.o obj-y += clear_page.o obj-y += cmdline.o @@ -25,8 +22,6 @@ obj-y += __udivdi3.o obj-y += __udivdi3.o obj-y += __moddi3.o obj-y += __umoddi3.o - -include $(BASEDIR)/Post.mk ## variants of divide/modulo ## see files in xen/arch/ia64/linux/lib (linux/arch/ia64/lib) diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/tools/privop/Makefile --- a/xen/arch/ia64/tools/privop/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/tools/privop/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,10 +1,12 @@ CC=gcc CC=gcc CFLAGS=-O -Wall +.PHONY: all all: postat postat: postat.c pohcalls.o +.PHONY: clean clean: $(RM) -f *.o postat *.s *~ diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/vmx/Makefile --- a/xen/arch/ia64/vmx/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/vmx/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += hvm_vioapic.o obj-y += mm.o obj-y += mmio.o @@ -19,5 +17,3 @@ obj-y += vmx_virt.o obj-y += vmx_virt.o obj-y += vmx_vsa.o obj-y += vtlb.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/vmx/vmx_hypercall.c --- a/xen/arch/ia64/vmx/vmx_hypercall.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Tue Apr 11 18:54:18 2006 -0600 @@ -35,7 +35,7 @@ #include <asm/dom_fw.h> #include <xen/domain.h> -extern long do_sched_op(int cmd, unsigned long arg); +extern long do_sched_op_compat(int cmd, unsigned long arg); extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr); void hyper_not_support(void) @@ -83,13 +83,13 @@ void hyper_dom_mem_op(void) } -void hyper_sched_op(void) +void hyper_sched_op_compat(void) { VCPU *vcpu=current; u64 r32,r33,ret; vcpu_get_gr_nat(vcpu,16,&r32); vcpu_get_gr_nat(vcpu,17,&r33); - ret=do_sched_op(r32,r33); + ret=do_sched_op_compat(r32,r33); vcpu_set_gr(vcpu, 8, ret, 0); vmx_vcpu_increment_iip(vcpu); diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/vmx/vmx_ivt.S --- a/xen/arch/ia64/vmx/vmx_ivt.S Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/vmx/vmx_ivt.S Tue Apr 11 18:54:18 2006 -0600 @@ -1125,7 +1125,7 @@ hyper_call_table: data8 hyper_not_support //hyper_stack_switch data8 hyper_not_support //hyper_set_callbacks data8 hyper_not_support //hyper_fpu_taskswitch /* 5 */ - data8 hyper_sched_op + data8 hyper_sched_op_compat data8 hyper_dom0_op data8 hyper_not_support //hyper_set_debugreg data8 hyper_not_support //hyper_get_debugreg diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/vmx/vmx_support.c --- a/xen/arch/ia64/vmx/vmx_support.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/vmx/vmx_support.c Tue Apr 11 18:54:18 2006 -0600 @@ -43,7 +43,7 @@ void vmx_wait_io(void) do { if (!test_bit(port, &d->shared_info->evtchn_pending[0])) - do_sched_op(SCHEDOP_block, 0); + do_sched_op_compat(SCHEDOP_block, 0); /* Unblocked when some event is coming. Clear pending indication * immediately if deciding to go for io assist diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/xen/Makefile --- a/xen/arch/ia64/xen/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/xen/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += acpi.o obj-y += dom0_ops.o obj-y += domain.o @@ -26,5 +24,3 @@ obj-y += flushd.o obj-y += flushd.o obj-$(crash_debug) += gdbstub.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/xen/hypercall.c Tue Apr 11 18:54:18 2006 -0600 @@ -38,7 +38,7 @@ hypercall_t ia64_hypercall_table[] = (hypercall_t)do_ni_hypercall, /* do_stack_switch */ (hypercall_t)do_ni_hypercall, /* do_set_callbacks */ (hypercall_t)do_ni_hypercall, /* do_fpu_taskswitch */ /* 5 */ - (hypercall_t)do_ni_hypercall, /* do_sched_op */ + (hypercall_t)do_sched_op_compat, (hypercall_t)do_dom0_op, (hypercall_t)do_ni_hypercall, /* do_set_debugreg */ (hypercall_t)do_ni_hypercall, /* do_get_debugreg */ @@ -61,7 +61,7 @@ hypercall_t ia64_hypercall_table[] = (hypercall_t)do_ni_hypercall, /* do_mmuext_op */ (hypercall_t)do_ni_hypercall, /* do_acm_op */ (hypercall_t)do_ni_hypercall, /* do_nmi_op */ - (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_sched_op, (hypercall_t)do_ni_hypercall, /* */ /* 30 */ (hypercall_t)do_ni_hypercall /* */ }; @@ -70,6 +70,11 @@ xen_hypercall (struct pt_regs *regs) xen_hypercall (struct pt_regs *regs) { switch (regs->r2) { + case __HYPERVISOR_sched_op_compat: + regs->r8 = do_sched_op_compat((int) regs->r14, + (unsigned long) regs->r15); + break; + case __HYPERVISOR_dom0_op: regs->r8 = do_dom0_op(guest_handle_from_ptr(regs->r14, dom0_op_t)); @@ -103,6 +108,11 @@ xen_hypercall (struct pt_regs *regs) case __HYPERVISOR_multicall: regs->r8 = do_multicall(guest_handle_from_ptr(regs->r14, multicall_entry_t), (unsigned int) regs->r15); + break; + + case __HYPERVISOR_sched_op: + regs->r8 = do_sched_op((int) regs->r14, + guest_handle_from_ptr(regs->r15, void)); break; default: @@ -148,7 +158,7 @@ fw_hypercall (struct pt_regs *regs) } else { pal_halt_light_count++; - do_sched_op(SCHEDOP_yield, 0); + do_sched_op_compat(SCHEDOP_yield, 0); } regs->r8 = 0; regs->r9 = 0; diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/xen/irq.c --- a/xen/arch/ia64/xen/irq.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/xen/irq.c Tue Apr 11 18:54:18 2006 -0600 @@ -416,25 +416,20 @@ int pirq_guest_unmask(struct domain *d) int pirq_guest_unmask(struct domain *d) { irq_desc_t *desc; - int i, j, pirq; - u32 m; + int pirq; shared_info_t *s = d->shared_info; - for ( i = 0; i < ARRAY_SIZE(d->pirq_mask); i++ ) - { - m = d->pirq_mask[i]; - while ( (j = ffs(m)) != 0 ) - { - m &= ~(1 << --j); - pirq = (i << 5) + j; - desc = &irq_desc[pirq]; - spin_lock_irq(&desc->lock); - if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && - test_and_clear_bit(pirq, &d->pirq_mask) && - (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) - desc->handler->end(pirq); - spin_unlock_irq(&desc->lock); - } + for ( pirq = find_first_bit(d->pirq_mask, NR_PIRQS); + pirq < NR_PIRQS; + pirq = find_next_bit(d->pirq_mask, NR_PIRQS, pirq+1) ) + { + desc = &irq_desc[pirq]; + spin_lock_irq(&desc->lock); + if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && + test_and_clear_bit(pirq, &d->pirq_mask) && + (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) + desc->handler->end(pirq); + spin_unlock_irq(&desc->lock); } return 0; diff -r 5719550652a1 -r 5cc367720223 xen/arch/ia64/xen/xentime.c --- a/xen/arch/ia64/xen/xentime.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/ia64/xen/xentime.c Tue Apr 11 18:54:18 2006 -0600 @@ -84,7 +84,13 @@ s_time_t get_s_time(void) return now; } -void update_dom_time(struct vcpu *v) +void update_vcpu_system_time(struct vcpu *v) +{ + /* N-op here, and let dom0 to manage system time directly */ + return; +} + +void update_domain_wallclock_time(struct domain *d) { /* N-op here, and let dom0 to manage system time directly */ return; @@ -239,6 +245,6 @@ int reprogram_timer(s_time_t timeout) void send_timer_event(struct vcpu *v) { - send_guest_virq(v, VIRQ_TIMER); -} - + send_guest_vcpu_virq(v, VIRQ_TIMER); +} + diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,9 +1,8 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - subdir-y += acpi subdir-y += cpu subdir-y += genapic subdir-y += hvm +subdir-y += oprofile subdir-$(x86_32) += x86_32 subdir-$(x86_64) += x86_64 @@ -30,6 +29,7 @@ obj-y += physdev.o obj-y += physdev.o obj-y += rwlock.o obj-y += setup.o +obj-y += shutdown.o obj-y += smp.o obj-y += smpboot.o obj-y += string.o @@ -49,8 +49,6 @@ obj-$(x86_64) += shadow.o shadow_public. obj-$(crash_debug) += gdbstub.o -include $(BASEDIR)/Post.mk - $(TARGET): $(TARGET)-syms boot/mkelf32 ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \ `$(NM) $(TARGET)-syms | sort | tail -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'` @@ -59,11 +57,11 @@ include $(BASEDIR)/Post.mk $(LD) $(LDFLAGS) -T xen.lds -N \ boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@ $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S - $(MAKE) $(BASEDIR)/xen-syms.o + $(MAKE) -f $(BASEDIR)/Rules.mk $(BASEDIR)/xen-syms.o $(LD) $(LDFLAGS) -T xen.lds -N \ boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@ $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S - $(MAKE) $(BASEDIR)/xen-syms.o + $(MAKE) -f $(BASEDIR)/Rules.mk $(BASEDIR)/xen-syms.o $(LD) $(LDFLAGS) -T xen.lds -N \ boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@ rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o @@ -72,12 +70,13 @@ asm-offsets.s: $(TARGET_SUBARCH)/asm-off $(CC) $(CFLAGS) -S -o $@ $< xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS) - $(CC) $(CFLAGS) -P -E -Ui386 -D__ASSEMBLY__ -o $@ $< + $(CC) $(CFLAGS) -P -E -Ui386 $(AFLAGS) -o $@ $< boot/mkelf32: boot/mkelf32.c $(HOSTCC) $(HOSTCFLAGS) -o $@ $< shadow_guest32.o: shadow.c -clean:: FORCE +.PHONY: clean +clean:: rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32 diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/Rules.mk --- a/xen/arch/x86/Rules.mk Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/Rules.mk Tue Apr 11 18:54:18 2006 -0600 @@ -46,6 +46,10 @@ x86_64 := y x86_64 := y endif +HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/svm/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/vmx/*.h) + # Test for at least GCC v3.2.x. gcc-ver = $(shell $(CC) -dumpversion | sed -e 's/^\(.\)\.\(.\)\.\(.\)/\$(1)/') ifeq ($(call gcc-ver,1),1) diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/acpi/Makefile --- a/xen/arch/x86/acpi/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/acpi/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,1 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += boot.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/apic.c Tue Apr 11 18:54:18 2006 -0600 @@ -657,9 +657,10 @@ void __init init_apic_mappings(void) * zeroes page to simulate the local APIC and another * one for the IO-APIC. */ - if (!smp_found_config && detect_init_APIC()) + if (!smp_found_config && detect_init_APIC()) { apic_phys = __pa(alloc_xenheap_page()); - else + memset(__va(apic_phys), 0, PAGE_SIZE); + } else apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); @@ -693,6 +694,7 @@ void __init init_apic_mappings(void) } else { fake_ioapic_page: ioapic_phys = __pa(alloc_xenheap_page()); + memset(__va(ioapic_phys), 0, PAGE_SIZE); } set_fixmap_nocache(idx, ioapic_phys); apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/cpu/Makefile --- a/xen/arch/x86/cpu/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/cpu/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - subdir-y += mcheck subdir-y += mtrr @@ -12,5 +10,3 @@ obj-$(x86_32) += cyrix.o obj-$(x86_32) += cyrix.o obj-$(x86_32) += rise.o obj-$(x86_32) += transmeta.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/cpu/common.c --- a/xen/arch/x86/cpu/common.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/cpu/common.c Tue Apr 11 18:54:18 2006 -0600 @@ -427,6 +427,17 @@ void __devinit identify_cpu(struct cpuin } #ifdef CONFIG_X86_HT +/* cpuid returns the value latched in the HW at reset, not the APIC ID + * register's value. For any box whose BIOS changes APIC IDs, like + * clustered APIC systems, we must use hard_smp_processor_id. + * + * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. + */ +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/cpu/mcheck/Makefile --- a/xen/arch/x86/cpu/mcheck/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/cpu/mcheck/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += k7.o obj-y += mce.o obj-y += non-fatal.o @@ -7,5 +5,3 @@ obj-y += p5.o obj-y += p5.o obj-y += p6.o obj-y += winchip.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/cpu/mtrr/Makefile --- a/xen/arch/x86/cpu/mtrr/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/cpu/mtrr/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,10 +1,6 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += amd.o obj-y += centaur.o obj-y += cyrix.o obj-y += generic.o obj-y += main.o obj-y += state.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/domain.c Tue Apr 11 18:54:18 2006 -0600 @@ -41,10 +41,6 @@ #include <xen/kernel.h> #include <xen/multicall.h> -/* opt_noreboot: If true, machine will need manual reset on error. */ -static int opt_noreboot = 0; -boolean_param("noreboot", opt_noreboot); - struct percpu_ctxt { struct vcpu *curr_vcpu; unsigned int dirty_segment_mask; @@ -79,15 +75,12 @@ void idle_loop(void) for ( ; ; ) { - irq_stat[cpu].idle_timestamp = jiffies; - - while ( !softirq_pending(cpu) ) - { - page_scrub_schedule_work(); - default_idle(); - } - - do_softirq(); + page_scrub_schedule_work(); + + default_idle(); + + if ( softirq_pending(cpu) ) + do_softirq(); } } @@ -102,84 +95,6 @@ void startup_cpu_idle_loop(void) reset_stack_and_jump(idle_loop); } -static long no_idt[2]; -static int reboot_mode; - -static inline void kb_wait(void) -{ - int i; - - for ( i = 0; i < 0x10000; i++ ) - if ( (inb_p(0x64) & 0x02) == 0 ) - break; -} - -void __attribute__((noreturn)) __machine_halt(void *unused) -{ - for ( ; ; ) - safe_halt(); -} - -void machine_halt(void) -{ - watchdog_disable(); - console_start_sync(); - smp_call_function(__machine_halt, NULL, 1, 0); - __machine_halt(NULL); -} - -void machine_restart(char * __unused) -{ - int i; - - if ( opt_noreboot ) - { - printk("Reboot disabled on cmdline: require manual reset\n"); - machine_halt(); - } - - watchdog_disable(); - console_start_sync(); - - local_irq_enable(); - - /* Ensure we are the boot CPU. */ - if ( GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid ) - { - smp_call_function((void *)machine_restart, NULL, 1, 0); - for ( ; ; ) - safe_halt(); - } - - /* - * Stop all CPUs and turn off local APICs and the IO-APIC, so - * other OSs see a clean IRQ state. - */ - smp_send_stop(); - disable_IO_APIC(); - hvm_disable(); - - /* Rebooting needs to touch the page at absolute address 0. */ - *((unsigned short *)__va(0x472)) = reboot_mode; - - for ( ; ; ) - { - /* Pulse the keyboard reset line. */ - for ( i = 0; i < 100; i++ ) - { - kb_wait(); - udelay(50); - outb(0xfe,0x64); /* pulse reset low */ - udelay(50); - } - - /* That didn't work - force a triple fault.. */ - __asm__ __volatile__("lidt %0": "=m" (no_idt)); - __asm__ __volatile__("int3"); - } -} - - void dump_pageframe_info(struct domain *d) { struct page_info *page; @@ -208,6 +123,11 @@ void dump_pageframe_info(struct domain * } } +void set_current_execstate(struct vcpu *v) +{ + percpu_ctxt[smp_processor_id()].curr_vcpu = v; +} + struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id) { struct vcpu *v; @@ -219,15 +139,8 @@ struct vcpu *alloc_vcpu_struct(struct do v->arch.flags = TF_kernel_mode; - if ( is_idle_domain(d) ) - { - percpu_ctxt[vcpu_id].curr_vcpu = v; - v->arch.schedule_tail = continue_idle_domain; - } - else - { - v->arch.schedule_tail = continue_nonidle_domain; - } + v->arch.schedule_tail = is_idle_domain(d) ? + continue_idle_domain : continue_nonidle_domain; v->arch.ctxt_switch_from = paravirt_ctxt_switch_from; v->arch.ctxt_switch_to = paravirt_ctxt_switch_to; @@ -395,7 +308,7 @@ int arch_set_info_guest( } else if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) { - hvm_modify_guest_state(v); + hvm_load_cpu_guest_regs(v, &v->arch.guest_context.user_regs); } if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) @@ -450,7 +363,7 @@ int arch_set_info_guest( update_pagetables(v); if ( v->vcpu_id == 0 ) - init_domain_time(d); + update_domain_wallclock_time(d); /* Don't redo final setup */ set_bit(_VCPUF_initialised, &v->vcpu_flags); @@ -1048,6 +961,10 @@ void domain_relinquish_resources(struct /* Relinquish every page of memory. */ relinquish_memory(d, &d->xenpage_list); relinquish_memory(d, &d->page_list); + + /* Free page used by xen oprofile buffer */ + free_xenoprof_pages(d); + } void arch_dump_domain_info(struct domain *d) diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/domain_build.c Tue Apr 11 18:54:18 2006 -0600 @@ -773,7 +773,7 @@ int construct_dom0(struct domain *d, zap_low_mappings(idle_pg_table_l2); #endif - init_domain_time(d); + update_domain_wallclock_time(d); set_bit(_VCPUF_initialised, &v->vcpu_flags); diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/genapic/Makefile --- a/xen/arch/x86/genapic/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/genapic/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,10 +1,7 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += bigsmp.o obj-y += default.o +obj-y += delivery.o obj-y += es7000.o obj-y += es7000plat.o obj-y += probe.o obj-y += summit.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/genapic/bigsmp.c --- a/xen/arch/x86/genapic/bigsmp.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/genapic/bigsmp.c Tue Apr 11 18:54:18 2006 -0600 @@ -1,8 +1,3 @@ -/* - * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs. - * Drives the local APIC in "clustered mode". - */ -#define APIC_DEFINITION 1 #include <xen/config.h> #include <xen/cpumask.h> #include <asm/current.h> @@ -14,9 +9,6 @@ #include <xen/smp.h> #include <xen/init.h> #include <xen/dmi.h> -#include <asm/mach-bigsmp/mach_apic.h> -#include <asm/mach-bigsmp/mach_apicdef.h> -#include <asm/mach-bigsmp/mach_ipi.h> #include <asm/mach-default/mach_mpparse.h> static int dmi_bigsmp; /* can be set by dmi scanners */ @@ -52,4 +44,7 @@ static __init int probe_bigsmp(void) return dmi_bigsmp; } -struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp); +struct genapic apic_bigsmp = { + APIC_INIT("bigsmp", probe_bigsmp), + GENAPIC_PHYS +}; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/genapic/default.c --- a/xen/arch/x86/genapic/default.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/genapic/default.c Tue Apr 11 18:54:18 2006 -0600 @@ -1,12 +1,10 @@ /* * Default generic APIC driver. This handles upto 8 CPUs. */ -#define APIC_DEFINITION 1 #include <xen/config.h> #include <xen/cpumask.h> #include <asm/current.h> #include <asm/mpspec.h> -#include <asm/mach-default/mach_apicdef.h> #include <asm/genapic.h> #include <asm/fixmap.h> #include <asm/apicdef.h> @@ -14,18 +12,7 @@ #include <xen/string.h> #include <xen/smp.h> #include <xen/init.h> -#include <asm/mach-default/mach_apic.h> -#include <asm/mach-default/mach_ipi.h> #include <asm/mach-default/mach_mpparse.h> - -#ifdef CONFIG_HOTPLUG_CPU -#define DEFAULT_SEND_IPI (1) -#else -#define DEFAULT_SEND_IPI (0) -#endif - -int no_broadcast = DEFAULT_SEND_IPI; -integer_param("no_ipi_broadcast", no_broadcast); /* should be called last. */ static __init int probe_default(void) @@ -33,13 +20,7 @@ static __init int probe_default(void) return 1; } -struct genapic apic_default = APIC_INIT("default", probe_default); - -static int __init print_ipi_mode(void) -{ - if (genapic == &apic_default) - printk("Using IPI %sShortcut mode\n", - no_broadcast ? "No-" : ""); - return 0; -} -__initcall(print_ipi_mode); +struct genapic apic_default = { + APIC_INIT("default", probe_default), + GENAPIC_FLAT +}; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/genapic/es7000.c --- a/xen/arch/x86/genapic/es7000.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/genapic/es7000.c Tue Apr 11 18:54:18 2006 -0600 @@ -1,7 +1,6 @@ /* * APIC driver for the Unisys ES7000 chipset. */ -#define APIC_DEFINITION 1 #include <xen/config.h> #include <xen/cpumask.h> #include <asm/current.h> @@ -14,11 +13,7 @@ #include <xen/string.h> #include <xen/smp.h> #include <xen/init.h> -#include <asm/mach-es7000/mach_apicdef.h> -#include <asm/mach-es7000/mach_apic.h> -#include <asm/mach-es7000/mach_ipi.h> #include <asm/mach-es7000/mach_mpparse.h> -#include <asm/mach-es7000/mach_wakecpu.h> static __init int probe_es7000(void) { @@ -26,4 +21,7 @@ static __init int probe_es7000(void) return 0; } -struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000); +struct genapic apic_es7000 = { + APIC_INIT("es7000", probe_es7000), + GENAPIC_PHYS +}; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/genapic/probe.c --- a/xen/arch/x86/genapic/probe.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/genapic/probe.c Tue Apr 11 18:54:18 2006 -0600 @@ -103,8 +103,3 @@ int __init acpi_madt_oem_check(char *oem } return 0; } - -int hard_smp_processor_id(void) -{ - return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); -} diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/genapic/summit.c --- a/xen/arch/x86/genapic/summit.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/genapic/summit.c Tue Apr 11 18:54:18 2006 -0600 @@ -1,7 +1,6 @@ /* * APIC driver for the IBM "Summit" chipset. */ -#define APIC_DEFINITION 1 #include <xen/config.h> #include <xen/cpumask.h> #include <asm/current.h> @@ -13,9 +12,6 @@ #include <xen/string.h> #include <xen/smp.h> #include <xen/init.h> -#include <asm/mach-summit/mach_apic.h> -#include <asm/mach-summit/mach_apicdef.h> -#include <asm/mach-summit/mach_ipi.h> #include <asm/mach-summit/mach_mpparse.h> static __init int probe_summit(void) @@ -24,4 +20,7 @@ static __init int probe_summit(void) return 0; } -struct genapic apic_summit = APIC_INIT("summit", probe_summit); +struct genapic apic_summit = { + APIC_INIT("summit", probe_summit), + GENAPIC_PHYS +}; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/Makefile --- a/xen/arch/x86/hvm/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - subdir-y += svm subdir-y += vmx @@ -10,5 +8,3 @@ obj-y += platform.o obj-y += platform.o obj-y += vioapic.o obj-y += vlapic.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/intercept.c Tue Apr 11 18:54:18 2006 -0600 @@ -123,6 +123,16 @@ static inline void hvm_mmio_access(struc req->u.data = tmp1; break; + case IOREQ_TYPE_XCHG: + /* + * Note that we don't need to be atomic here since VCPU is accessing + * its own local APIC. + */ + tmp1 = read_handler(v, req->addr, req->size); + write_handler(v, req->addr, req->size, (unsigned long) req->u.data); + req->u.data = tmp1; + break; + default: printk("error ioreq type for local APIC %x\n", req->type); domain_crash_synchronous(); @@ -143,7 +153,7 @@ int hvm_mmio_intercept(ioreq_t *p) if ( hvm_mmio_handlers[i]->check_handler(v, p->addr) ) { hvm_mmio_access(v, p, hvm_mmio_handlers[i]->read_handler, - hvm_mmio_handlers[i]->write_handler); + hvm_mmio_handlers[i]->write_handler); return 1; } } diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/io.c Tue Apr 11 18:54:18 2006 -0600 @@ -365,44 +365,46 @@ static void hvm_pio_assist(struct cpu_us unsigned long old_eax; int sign = p->df ? -1 : 1; - if (p->dir == IOREQ_WRITE) { - if (p->pdata_valid) { + if ( p->pdata_valid || (mmio_opp->flags & OVERLAP) ) + { + if ( mmio_opp->flags & REPZ ) + regs->ecx -= p->count; + if ( p->dir == IOREQ_READ ) + { + regs->edi += sign * p->count * p->size; + if ( mmio_opp->flags & OVERLAP ) + { + unsigned long addr = regs->edi; + if (hvm_realmode(current)) + addr += regs->es << 4; + if (sign > 0) + addr -= p->size; + hvm_copy(&p->u.data, addr, p->size, HVM_COPY_OUT); + } + } + else /* p->dir == IOREQ_WRITE */ + { + ASSERT(p->dir == IOREQ_WRITE); regs->esi += sign * p->count * p->size; - if (mmio_opp->flags & REPZ) - regs->ecx -= p->count; - } - } else { - if (mmio_opp->flags & OVERLAP) { - unsigned long addr; - - regs->edi += sign * p->count * p->size; - if (mmio_opp->flags & REPZ) - regs->ecx -= p->count; - - addr = regs->edi; - if (sign > 0) - addr -= p->size; - hvm_copy(&p->u.data, addr, p->size, HVM_COPY_OUT); - } else if (p->pdata_valid) { - regs->edi += sign * p->count * p->size; - if (mmio_opp->flags & REPZ) - regs->ecx -= p->count; - } else { - old_eax = regs->eax; - switch (p->size) { - case 1: - regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff); - break; - case 2: - regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff); - break; - case 4: - regs->eax = (p->u.data & 0xffffffff); - break; - default: - printk("Error: %s unknown port size\n", __FUNCTION__); - domain_crash_synchronous(); - } + } + } + else if ( p->dir == IOREQ_READ ) + { + old_eax = regs->eax; + switch ( p->size ) + { + case 1: + regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff); + break; + case 2: + regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff); + break; + case 4: + regs->eax = (p->u.data & 0xffffffff); + break; + default: + printk("Error: %s unknown port size\n", __FUNCTION__); + domain_crash_synchronous(); } } } @@ -713,7 +715,7 @@ void hvm_wait_io(void) if ( !test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) break; - do_sched_op(SCHEDOP_block, 0); + do_sched_op_compat(SCHEDOP_block, 0); } /* @@ -743,7 +745,7 @@ void hvm_safe_block(void) if ( test_bit(port, &d->shared_info->evtchn_pending[0]) ) break; - do_sched_op(SCHEDOP_block, 0); + do_sched_op_compat(SCHEDOP_block, 0); } /* Reflect pending event in selector and master flags. */ diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/platform.c Tue Apr 11 18:54:18 2006 -0600 @@ -401,6 +401,11 @@ static int hvm_decode(int realmode, unsi GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); return reg_mem(instr->op_size, opcode, instr, rex); + case 0x3B: /* cmp m32/16, r32/16 */ + instr->instr = INSTR_CMP; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return mem_reg(instr->op_size, opcode, instr, rex); + case 0x80: case 0x81: { @@ -438,6 +443,14 @@ static int hvm_decode(int realmode, unsi instr->op_size = BYTE; GET_OP_SIZE_FOR_BYTE(size_reg); return mem_reg(size_reg, opcode, instr, rex); + + case 0x87: /* xchg {r/m16|r/m32}, {m/r16|m/r32} */ + instr->instr = INSTR_XCHG; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + if (((*(opcode+1)) & 0xc7) == 5) + return reg_mem(instr->op_size, opcode, instr, rex); + else + return mem_reg(instr->op_size, opcode, instr, rex); case 0x88: /* mov r8, m8 */ instr->instr = INSTR_MOV; @@ -936,6 +949,17 @@ void handle_mmio(unsigned long va, unsig break; } + case INSTR_XCHG: + mmio_opp->flags = mmio_inst.flags; + mmio_opp->instr = mmio_inst.instr; + mmio_opp->operand[0] = mmio_inst.operand[0]; /* source */ + mmio_opp->operand[1] = mmio_inst.operand[1]; /* destination */ + + /* send the request and wait for the value */ + send_mmio_req(IOREQ_TYPE_XCHG, gpa, 1, + mmio_inst.op_size, 0, IOREQ_WRITE, 0); + break; + default: printf("Unhandled MMIO instruction\n"); domain_crash_synchronous(); diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/svm/Makefile --- a/xen/arch/x86/hvm/svm/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/svm/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - subdir-$(x86_32) += x86_32 subdir-$(x86_64) += x86_64 @@ -8,5 +6,3 @@ obj-y += intr.o obj-y += intr.o obj-y += svm.o obj-y += vmcb.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/svm/intr.c Tue Apr 11 18:54:18 2006 -0600 @@ -81,7 +81,7 @@ interrupt_post_injection(struct vcpu * v } vpit->inject_point = NOW(); - vpit->last_pit_gtime += vpit->period; + vpit->last_pit_gtime += vpit->period_cycles; svm_set_guest_time(v, vpit->last_pit_gtime); } diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Tue Apr 11 18:54:18 2006 -0600 @@ -382,11 +382,6 @@ static inline int long_mode_do_msr_write return 1; } -void svm_modify_guest_state(struct vcpu *v) -{ - svm_modify_vmcb(v, &v->arch.guest_context.user_regs); -} - int svm_realmode(struct vcpu *v) { unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0; @@ -448,8 +443,6 @@ int start_svm(void) hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs; hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs; - - hvm_funcs.modify_guest_state = svm_modify_guest_state; hvm_funcs.realmode = svm_realmode; hvm_funcs.paging_enabled = svm_paging_enabled; @@ -674,9 +667,10 @@ static void svm_freeze_time(struct vcpu { struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit; - v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v); - if ( vpit->first_injected ) + if ( vpit->first_injected && !v->domain->arch.hvm_domain.guest_time ) { + v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v); stop_timer(&(vpit->pit_timer)); + } } static void svm_ctxt_switch_from(struct vcpu *v) @@ -1169,16 +1163,12 @@ static unsigned int check_for_null_selec seg = vmcb->ds; break; default: - if (dir == IOREQ_READ) + if (dir == IOREQ_READ) /* IN/INS instruction? */ seg = vmcb->es; else seg = vmcb->ds; } - /* In real Mode */ - if (real) - seg.base = seg.sel << 4; - if (base) *base = seg.base; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Tue Apr 11 18:54:18 2006 -0600 @@ -161,23 +161,6 @@ static int construct_vmcb_controls(struc /* - * modify guest eflags and execption bitmap for gdb - */ -int svm_modify_vmcb(struct vcpu *v, struct cpu_user_regs *regs) -{ - int error; - if ((error = load_vmcb(&v->arch.hvm_svm, v->arch.hvm_svm.host_save_pa))) - { - printk("svm_modify_vmcb: load_vmcb failed: VMCB = %lx\n", - (unsigned long) v->arch.hvm_svm.host_save_pa); - return -EINVAL; - } - svm_load_cpu_user_regs(v,regs); - return 0; -} - - -/* * Initially set the same environement as host. */ static int construct_init_vmcb_guest(struct arch_svm_struct *arch_svm, @@ -498,8 +481,11 @@ void svm_do_resume(struct vcpu *v) svm_stts(v); /* pick up the elapsed PIT ticks and re-enable pit_timer */ - if ( vpit->first_injected) { - svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time); + if ( vpit->first_injected ) { + if ( v->domain->arch.hvm_domain.guest_time ) { + svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time); + v->domain->arch.hvm_domain.guest_time = 0; + } pickup_deactive_ticks(vpit); } @@ -510,7 +496,6 @@ void svm_do_resume(struct vcpu *v) /* We can't resume the guest if we're waiting on I/O */ ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); } - void svm_launch_fail(unsigned long eflags) { diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/svm/x86_32/Makefile --- a/xen/arch/x86/hvm/svm/x86_32/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/svm/x86_32/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,1 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += exits.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/svm/x86_64/Makefile --- a/xen/arch/x86/hvm/svm/x86_64/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/svm/x86_64/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,1 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += exits.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/vmx/Makefile --- a/xen/arch/x86/hvm/vmx/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,10 +1,6 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - subdir-$(x86_32) += x86_32 subdir-$(x86_64) += x86_64 obj-y += io.o obj-y += vmcs.o obj-y += vmx.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Apr 11 18:54:18 2006 -0600 @@ -487,32 +487,6 @@ void destroy_vmcs(struct arch_vmx_struct arch_vmx->io_bitmap_b = NULL; } -/* - * modify guest eflags and execption bitmap for gdb - */ -int modify_vmcs(struct arch_vmx_struct *arch_vmx, - struct cpu_user_regs *regs) -{ - int error; - u64 vmcs_phys_ptr, old, old_phys_ptr; - vmcs_phys_ptr = (u64) virt_to_maddr(arch_vmx->vmcs); - - old_phys_ptr = virt_to_maddr(&old); - __vmptrst(old_phys_ptr); - if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) { - printk("modify_vmcs: load_vmcs failed: VMCS = %lx\n", - (unsigned long) vmcs_phys_ptr); - return -EINVAL; - } - -/* XXX VMX change modify_vmcs arg to v */ - hvm_load_cpu_guest_regs(current, regs); - - __vmptrld(old_phys_ptr); - - return 0; -} - void vm_launch_fail(unsigned long eflags) { unsigned long error; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Apr 11 18:54:18 2006 -0600 @@ -400,7 +400,7 @@ void vmx_migrate_timers(struct vcpu *v) migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor); } -struct vmx_store_cpu_guest_regs_callback_info { +struct vmx_cpu_guest_regs_callback_info { struct vcpu *v; struct cpu_user_regs *regs; unsigned long *crs; @@ -409,10 +409,19 @@ static void vmx_store_cpu_guest_regs( static void vmx_store_cpu_guest_regs( struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs); +static void vmx_load_cpu_guest_regs( + struct vcpu *v, struct cpu_user_regs *regs); + static void vmx_store_cpu_guest_regs_callback(void *data) { - struct vmx_store_cpu_guest_regs_callback_info *info = data; + struct vmx_cpu_guest_regs_callback_info *info = data; vmx_store_cpu_guest_regs(info->v, info->regs, info->crs); +} + +static void vmx_load_cpu_guest_regs_callback(void *data) +{ + struct vmx_cpu_guest_regs_callback_info *info = data; + vmx_load_cpu_guest_regs(info->v, info->regs); } static void vmx_store_cpu_guest_regs( @@ -426,7 +435,7 @@ static void vmx_store_cpu_guest_regs( if ( v->arch.hvm_vmx.launch_cpu != smp_processor_id() ) { /* Get register details from remote CPU. */ - struct vmx_store_cpu_guest_regs_callback_info info = { + struct vmx_cpu_guest_regs_callback_info info = { .v = v, .regs = regs, .crs = crs }; cpumask_t cpumask = cpumask_of_cpu(v->arch.hvm_vmx.launch_cpu); on_selected_cpus(cpumask, vmx_store_cpu_guest_regs_callback, @@ -479,8 +488,33 @@ static void vmx_store_cpu_guest_regs( void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs) { + if ( v != current ) + { + /* Non-current VCPUs must be paused to set the register snapshot. */ + ASSERT(atomic_read(&v->pausecnt) != 0); + + if ( v->arch.hvm_vmx.launch_cpu != smp_processor_id() ) + { + struct vmx_cpu_guest_regs_callback_info info = { + .v = v, .regs = regs }; + cpumask_t cpumask = cpumask_of_cpu(v->arch.hvm_vmx.launch_cpu); + on_selected_cpus(cpumask, vmx_load_cpu_guest_regs_callback, + &info, 1, 1); + return; + } + + /* Register details are on this CPU. Load the correct VMCS. */ + __vmptrld(virt_to_maddr(v->arch.hvm_vmx.vmcs)); + } + + ASSERT(v->arch.hvm_vmx.launch_cpu == smp_processor_id()); + #if defined (__x86_64__) __vmwrite(GUEST_SS_SELECTOR, regs->ss); + __vmwrite(GUEST_DS_SELECTOR, regs->ds); + __vmwrite(GUEST_ES_SELECTOR, regs->es); + __vmwrite(GUEST_GS_SELECTOR, regs->gs); + __vmwrite(GUEST_FS_SELECTOR, regs->fs); __vmwrite(GUEST_RSP, regs->rsp); __vmwrite(GUEST_RFLAGS, regs->rflags); @@ -493,6 +527,11 @@ void vmx_load_cpu_guest_regs(struct vcpu __vmwrite(GUEST_RIP, regs->rip); #elif defined (__i386__) __vmwrite(GUEST_SS_SELECTOR, regs->ss); + __vmwrite(GUEST_DS_SELECTOR, regs->ds); + __vmwrite(GUEST_ES_SELECTOR, regs->es); + __vmwrite(GUEST_GS_SELECTOR, regs->gs); + __vmwrite(GUEST_FS_SELECTOR, regs->fs); + __vmwrite(GUEST_RSP, regs->esp); __vmwrite(GUEST_RFLAGS, regs->eflags); @@ -503,14 +542,11 @@ void vmx_load_cpu_guest_regs(struct vcpu __vmwrite(GUEST_CS_SELECTOR, regs->cs); __vmwrite(GUEST_RIP, regs->eip); -#else -#error Unsupported architecture #endif -} - -void vmx_modify_guest_state(struct vcpu *v) -{ - modify_vmcs(&v->arch.hvm_vmx, &v->arch.guest_context.user_regs); + + /* Reload current VCPU's VMCS if it was temporarily unloaded. */ + if ( (v != current) && hvm_guest(current) ) + __vmptrld(virt_to_maddr(current->arch.hvm_vmx.vmcs)); } int vmx_realmode(struct vcpu *v) @@ -660,8 +696,6 @@ int start_vmx(void) hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs; hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs; - - hvm_funcs.modify_guest_state = vmx_modify_guest_state; hvm_funcs.realmode = vmx_realmode; hvm_funcs.paging_enabled = vmx_paging_enabled; @@ -1913,7 +1947,7 @@ static inline void vmx_vmexit_do_extint( && !(vector & INTR_INFO_VALID_MASK)) __hvm_bug(regs); - vector &= 0xff; + vector &= INTR_INFO_VECTOR_MASK; local_irq_disable(); TRACE_VMEXIT(1,vector); @@ -2043,10 +2077,8 @@ asmlinkage void vmx_vmexit_handler(struc return; } - { - __vmread(GUEST_RIP, &eip); - TRACE_VMEXIT(0,exit_reason); - } + __vmread(GUEST_RIP, &eip); + TRACE_VMEXIT(0,exit_reason); switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: @@ -2063,7 +2095,7 @@ asmlinkage void vmx_vmexit_handler(struc if ((error = __vmread(VM_EXIT_INTR_INFO, &vector)) || !(vector & INTR_INFO_VALID_MASK)) __hvm_bug(®s); - vector &= 0xff; + vector &= INTR_INFO_VECTOR_MASK; TRACE_VMEXIT(1,vector); perfc_incra(cause_vector, vector); @@ -2094,6 +2126,14 @@ asmlinkage void vmx_vmexit_handler(struc domain_pause_for_debugger(); + break; + } + case TRAP_int3: + { + if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) ) + domain_pause_for_debugger(); + else + vmx_inject_exception(v, TRAP_int3, VMX_DELIVER_NO_ERROR_CODE); break; } #endif diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/vmx/x86_32/Makefile --- a/xen/arch/x86/hvm/vmx/x86_32/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/x86_32/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,1 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += exits.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/hvm/vmx/x86_64/Makefile --- a/xen/arch/x86/hvm/vmx/x86_64/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/x86_64/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,1 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += exits.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/io_apic.c Tue Apr 11 18:54:18 2006 -0600 @@ -75,6 +75,7 @@ static struct irq_pin_list { static struct irq_pin_list { int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; +static int irq_2_pin_free_entry = NR_IRQS; int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; @@ -85,20 +86,57 @@ int vector_irq[NR_VECTORS] __read_mostly */ static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - static int first_free_entry = NR_IRQS; struct irq_pin_list *entry = irq_2_pin + irq; - while (entry->next) + while (entry->next) { + BUG_ON((entry->apic == apic) && (entry->pin == pin)); entry = irq_2_pin + entry->next; + } + + BUG_ON((entry->apic == apic) && (entry->pin == pin)); if (entry->pin != -1) { - entry->next = first_free_entry; + if (irq_2_pin_free_entry >= PIN_MAP_SIZE) + panic("io_apic.c: whoops"); + entry->next = irq_2_pin_free_entry; entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) - panic("io_apic.c: whoops"); + irq_2_pin_free_entry = entry->next; + entry->next = 0; } entry->apic = apic; entry->pin = pin; +} + +static void remove_pin_at_irq(unsigned int irq, int apic, int pin) +{ + struct irq_pin_list *entry, *prev; + + for (entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next]) { + if ((entry->apic == apic) && (entry->pin == pin)) + break; + if (!entry->next) + BUG(); + } + + entry->pin = entry->apic = -1; + + if (entry != &irq_2_pin[irq]) { + /* Removed entry is not at head of list. */ + prev = &irq_2_pin[irq]; + while (&irq_2_pin[prev->next] != entry) + prev = &irq_2_pin[prev->next]; + prev->next = entry->next; + entry->next = irq_2_pin_free_entry; + irq_2_pin_free_entry = entry - irq_2_pin; + } else if (entry->next != 0) { + /* Removed entry is at head of multi-item list. */ + prev = entry; + entry = &irq_2_pin[entry->next]; + *prev = *entry; + entry->pin = entry->apic = -1; + entry->next = irq_2_pin_free_entry; + irq_2_pin_free_entry = entry - irq_2_pin; + } } /* @@ -958,6 +996,10 @@ static void __init enable_IO_APIC(void) irq_2_pin[i].pin = -1; irq_2_pin[i].next = 0; } + + /* Initialise dynamic irq_2_pin free list. */ + for (i = NR_IRQS; i < PIN_MAP_SIZE; i++) + irq_2_pin[i].next = i + 1; /* * The number of IO-APIC IRQ registers (== #pins): @@ -1736,8 +1778,10 @@ int __init io_apic_get_unique_id (int io spin_unlock_irqrestore(&ioapic_lock, flags); /* Sanity check */ - if (reg_00.bits.ID != apic_id) - panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); + if (reg_00.bits.ID != apic_id) { + printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + return -1; + } } apic_printk(APIC_VERBOSE, KERN_INFO @@ -1852,11 +1896,17 @@ int ioapic_guest_read(unsigned long phys return 0; } +#define WARN_BOGUS_WRITE(f, a...) \ + DPRINTK("\n%s: apic=%d, pin=%d, old_irq=%d, new_irq=%d\n" \ + "%s: old_entry=%08x, new_entry=%08x\n" \ + "%s: " f, __FUNCTION__, apic, pin, old_irq, new_irq, \ + __FUNCTION__, *(u32 *)&old_rte, *(u32 *)&new_rte, \ + __FUNCTION__ , ##a ) + int ioapic_guest_write(unsigned long physbase, unsigned int reg, u32 val) { - int apic, pin, irq; - struct IO_APIC_route_entry rte = { 0 }; - struct irq_pin_list *entry; + int apic, pin, old_irq = -1, new_irq = -1; + struct IO_APIC_route_entry old_rte = { 0 }, new_rte = { 0 }; unsigned long flags; if ( (apic = ioapic_physbase_to_id(physbase)) < 0 ) @@ -1868,8 +1918,9 @@ int ioapic_guest_write(unsigned long phy pin = (reg - 0x10) >> 1; - *(u32 *)&rte = val; - rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + /* Write first half from guest; second half is target info. */ + *(u32 *)&new_rte = val; + new_rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); /* * What about weird destination types? @@ -1879,7 +1930,7 @@ int ioapic_guest_write(unsigned long phy * ExtINT: Ignore? Linux only asserts this at start of day. * For now, print a message and return an error. We can fix up on demand. */ - if ( rte.delivery_mode > dest_LowestPrio ) + if ( new_rte.delivery_mode > dest_LowestPrio ) { printk("ERROR: Attempt to write weird IOAPIC destination mode!\n"); printk(" APIC=%d/%d, lo-reg=%x\n", apic, pin, val); @@ -1890,36 +1941,69 @@ int ioapic_guest_write(unsigned long phy * The guest does not know physical APIC arrangement (flat vs. cluster). * Apply genapic conventions for this platform. */ - rte.delivery_mode = INT_DELIVERY_MODE; - rte.dest_mode = INT_DEST_MODE; - - if ( rte.vector >= FIRST_DEVICE_VECTOR ) - { - /* Is there a valid irq mapped to this vector? */ - irq = vector_irq[rte.vector]; - if ( !IO_APIC_IRQ(irq) ) + new_rte.delivery_mode = INT_DELIVERY_MODE; + new_rte.dest_mode = INT_DEST_MODE; + + spin_lock_irqsave(&ioapic_lock, flags); + + /* Read first (interesting) half of current routing entry. */ + *(u32 *)&old_rte = io_apic_read(apic, 0x10 + 2 * pin); + + /* No change to the first half of the routing entry? Bail quietly. */ + if ( *(u32 *)&old_rte == *(u32 *)&new_rte ) + { + spin_unlock_irqrestore(&ioapic_lock, flags); + return 0; + } + + if ( old_rte.vector >= FIRST_DEVICE_VECTOR ) + old_irq = vector_irq[old_rte.vector]; + if ( new_rte.vector >= FIRST_DEVICE_VECTOR ) + new_irq = vector_irq[new_rte.vector]; + + if ( (old_irq != new_irq) && (old_irq != -1) && IO_APIC_IRQ(old_irq) ) + { + if ( irq_desc[IO_APIC_VECTOR(old_irq)].action ) + { + WARN_BOGUS_WRITE("Attempt to remove IO-APIC pin of in-use IRQ!\n"); + spin_unlock_irqrestore(&ioapic_lock, flags); return 0; - + } + + remove_pin_at_irq(old_irq, apic, pin); + } + + if ( (new_irq != -1) && IO_APIC_IRQ(new_irq) ) + { + if ( irq_desc[IO_APIC_VECTOR(new_irq)].action ) + { + WARN_BOGUS_WRITE("Attempt to %s IO-APIC pin for in-use IRQ!\n", + (old_irq != new_irq) ? "add" : "modify"); + spin_unlock_irqrestore(&ioapic_lock, flags); + return 0; + } + /* Set the correct irq-handling type. */ - irq_desc[IO_APIC_VECTOR(irq)].handler = rte.trigger ? + irq_desc[IO_APIC_VECTOR(new_irq)].handler = new_rte.trigger ? &ioapic_level_type: &ioapic_edge_type; - - /* Record the pin<->irq mapping. */ - for ( entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next] ) - { - if ( (entry->apic == apic) && (entry->pin == pin) ) - break; - if ( !entry->next ) - { - add_pin_to_irq(irq, apic, pin); - break; - } - } - } - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&rte) + 0)); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&rte) + 1)); + + if ( old_irq != new_irq ) + add_pin_to_irq(new_irq, apic, pin); + + /* Mask iff level triggered. */ + new_rte.mask = new_rte.trigger; + } + else if ( !new_rte.mask ) + { + /* This pin leads nowhere but the guest has not masked it. */ + WARN_BOGUS_WRITE("Installing bogus unmasked IO-APIC entry!\n"); + new_rte.mask = 1; + } + + + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&new_rte) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&new_rte) + 1)); + spin_unlock_irqrestore(&ioapic_lock, flags); return 0; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/irq.c Tue Apr 11 18:54:18 2006 -0600 @@ -171,26 +171,20 @@ int pirq_guest_unmask(struct domain *d) int pirq_guest_unmask(struct domain *d) { irq_desc_t *desc; - unsigned int i, j, pirq; - u32 m; + unsigned int pirq; shared_info_t *s = d->shared_info; - for ( i = 0; i < ARRAY_SIZE(d->pirq_mask); i++ ) - { - m = d->pirq_mask[i]; - while ( m != 0 ) - { - j = find_first_set_bit(m); - m &= ~(1 << j); - pirq = (i << 5) + j; - desc = &irq_desc[irq_to_vector(pirq)]; - spin_lock_irq(&desc->lock); - if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && - test_and_clear_bit(pirq, &d->pirq_mask) && - (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) - desc->handler->end(irq_to_vector(pirq)); - spin_unlock_irq(&desc->lock); - } + for ( pirq = find_first_bit(d->pirq_mask, NR_PIRQS); + pirq < NR_PIRQS; + pirq = find_next_bit(d->pirq_mask, NR_PIRQS, pirq+1) ) + { + desc = &irq_desc[irq_to_vector(pirq)]; + spin_lock_irq(&desc->lock); + if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && + test_and_clear_bit(pirq, &d->pirq_mask) && + (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) + desc->handler->end(irq_to_vector(pirq)); + spin_unlock_irq(&desc->lock); } return 0; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/mm.c Tue Apr 11 18:54:18 2006 -0600 @@ -1528,10 +1528,9 @@ int get_page_type(struct page_info *page nx &= ~PGT_va_mask; nx |= type; /* we know the actual type is correct */ } - else + else if ( (type & PGT_va_mask) != PGT_va_mutable ) { ASSERT((type & PGT_va_mask) != (x & PGT_va_mask)); - ASSERT((type & PGT_va_mask) != PGT_va_mutable); #ifdef CONFIG_X86_PAE /* We use backptr as extra typing. Cannot be unknown. */ if ( (type & PGT_type_mask) == PGT_l2_page_table ) diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/mpparse.c --- a/xen/arch/x86/mpparse.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/mpparse.c Tue Apr 11 18:54:18 2006 -0600 @@ -35,7 +35,7 @@ /* Have we found an MP table */ int smp_found_config; -unsigned int __initdata maxcpus = NR_CPUS; +unsigned int __devinitdata maxcpus = NR_CPUS; #ifdef CONFIG_HOTPLUG_CPU #define CPU_HOTPLUG_ENABLED (1) @@ -226,16 +226,11 @@ static void __devinit MP_processor_info num_processors++; if (CPU_HOTPLUG_ENABLED || (num_processors > 8)) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(ver)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } + /* + * No need for processor or APIC checks: physical delivery + * (bigsmp) mode should always work. + */ + def_to_bigsmp = 1; } bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; } @@ -916,6 +911,7 @@ void __init mp_register_ioapic ( u32 gsi_base) { int idx = 0; + int tmpid; if (nr_ioapics >= MAX_IO_APICS) { printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " @@ -936,9 +932,14 @@ void __init mp_register_ioapic ( set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + tmpid = io_apic_get_unique_id(idx, id); else - mp_ioapics[idx].mpc_apicid = id; + tmpid = id; + if (tmpid == -1) { + nr_ioapics--; + return; + } + mp_ioapics[idx].mpc_apicid = tmpid; mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/nmi.c --- a/xen/arch/x86/nmi.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/nmi.c Tue Apr 11 18:54:18 2006 -0600 @@ -431,14 +431,14 @@ void nmi_watchdog_tick(struct cpu_user_r */ static void do_nmi_trigger(unsigned char key) { - u32 id = apic_read(APIC_ID); + u32 id = GET_APIC_ID(apic_read(APIC_ID)); printk("Triggering NMI on APIC ID %x\n", id); local_irq_disable(); apic_wait_icr_idle(); apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_INT_ASSERT); + apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL); local_irq_enable(); } diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/setup.c Tue Apr 11 18:54:18 2006 -0600 @@ -404,6 +404,7 @@ void __init __start_xen(multiboot_info_t BUG_ON(idle_domain == NULL); set_current(idle_domain->vcpu[0]); + set_current_execstate(idle_domain->vcpu[0]); idle_vcpu[0] = current; paging_init(); diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/shadow32.c Tue Apr 11 18:54:18 2006 -0600 @@ -924,7 +924,8 @@ set_p2m_entry(struct domain *d, unsigned } l1 = map_domain_page_with_cache(page_to_mfn(l1page), l1cache); - memset(l1, 0, PAGE_SIZE); + /* Initialise entries to INVALID_MFN = ~0 */ + memset(l1, -1, PAGE_SIZE); unmap_domain_page_with_cache(l1, l1cache); l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR); @@ -1686,8 +1687,10 @@ get_mfn_from_gpfn_foreign(struct domain unmap_domain_page(l2); if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) { +#if 0 printk("%s(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n", __func__, d->domain_id, gpfn, l2e_get_intpte(l2e)); +#endif return INVALID_MFN; } l1 = map_domain_page(l2e_get_pfn(l2e)); diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/smp.c --- a/xen/arch/x86/smp.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/smp.c Tue Apr 11 18:54:18 2006 -0600 @@ -20,6 +20,7 @@ #include <asm/flushtlb.h> #include <asm/smpboot.h> #include <asm/hardirq.h> +#include <asm/ipi.h> #include <mach_apic.h> /* @@ -66,44 +67,12 @@ static inline int __prepare_ICR (unsigned int shortcut, int vector) { - return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; + return APIC_DM_FIXED | shortcut | vector; } static inline int __prepare_ICR2 (unsigned int mask) { return SET_APIC_DEST_FIELD(mask); -} - -void __send_IPI_shortcut(unsigned int shortcut, int vector) -{ - /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * No need to touch the target chip field - */ - cfg = __prepare_ICR(shortcut, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); -} - -void send_IPI_self(int vector) -{ - __send_IPI_shortcut(APIC_DEST_SELF, vector); } static inline void check_IPI_mask(cpumask_t cpumask) @@ -116,10 +85,7 @@ static inline void check_IPI_mask(cpumas ASSERT(!cpus_empty(cpumask)); } -/* - * This is only used on smaller machines. - */ -void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) +void send_IPI_mask_flat(cpumask_t cpumask, int vector) { unsigned long mask = cpus_addr(cpumask)[0]; unsigned long cfg; @@ -133,18 +99,18 @@ void send_IPI_mask_bitmask(cpumask_t cpu * Wait for idle. */ apic_wait_icr_idle(); - + /* * prepare target chip field */ cfg = __prepare_ICR2(mask); apic_write_around(APIC_ICR2, cfg); - + /* * program the ICR */ - cfg = __prepare_ICR(0, vector); - + cfg = __prepare_ICR(0, vector) | APIC_DEST_LOGICAL; + /* * Send the IPI. The write to APIC_ICR fires this off. */ @@ -153,7 +119,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu local_irq_restore(flags); } -inline void send_IPI_mask_sequence(cpumask_t mask, int vector) +void send_IPI_mask_phys(cpumask_t mask, int vector) { unsigned long cfg, flags; unsigned int query_cpu; @@ -168,35 +134,32 @@ inline void send_IPI_mask_sequence(cpuma local_irq_save(flags); - for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { - if (cpu_isset(query_cpu, mask)) { - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu)); - apic_write_around(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); - } - } + for_each_cpu_mask( query_cpu, mask ) + { + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(cpu_physical_id(query_cpu)); + apic_write_around(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector) | APIC_DEST_PHYSICAL; + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); + } + local_irq_restore(flags); } - -#include <mach_ipi.h> static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED; static cpumask_t flush_cpumask; @@ -241,20 +204,12 @@ void __flush_tlb_mask(cpumask_t mask, un /* Call with no locks held and interrupts enabled (e.g., softirq context). */ void new_tlbflush_clock_period(void) { - ASSERT(local_irq_is_enabled()); - + cpumask_t allbutself; + /* Flush everyone else. We definitely flushed just before entry. */ - if ( num_online_cpus() > 1 ) - { - spin_lock(&flush_lock); - flush_cpumask = cpu_online_map; - flush_va = FLUSHVA_ALL; - send_IPI_allbutself(INVALIDATE_TLB_VECTOR); - cpu_clear(smp_processor_id(), flush_cpumask); - while ( !cpus_empty(flush_cpumask) ) - cpu_relax(); - spin_unlock(&flush_lock); - } + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + __flush_tlb_mask(allbutself, FLUSHVA_ALL); /* No need for atomicity: we are the only possible updater. */ ASSERT(tlbflush_clock == 0); diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/smpboot.c Tue Apr 11 18:54:18 2006 -0600 @@ -441,7 +441,7 @@ void __devinit smp_callin(void) calibrate_tsc_ap(); } -static int cpucount; +static int cpucount, booting_cpu; /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -524,12 +524,13 @@ void __devinit start_secondary(void *unu * booting is too fragile that we want to limit the * things done here to the most necessary things. */ - unsigned int cpu = cpucount; + unsigned int cpu = booting_cpu; extern void percpu_traps_init(void); + set_processor_id(cpu); set_current(idle_vcpu[cpu]); - set_processor_id(cpu); + set_current_execstate(idle_vcpu[cpu]); percpu_traps_init(); @@ -880,18 +881,32 @@ static int __devinit do_boot_cpu(int api * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. */ { - struct vcpu *v; unsigned long boot_error; int timeout; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; + struct domain *d; + struct vcpu *v; + int vcpu_id; ++cpucount; - v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu); - BUG_ON(v == NULL); - - v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table)); + booting_cpu = cpu; + + if ((vcpu_id = cpu % MAX_VIRT_CPUS) == 0) { + d = domain_create(IDLE_DOMAIN_ID, cpu); + BUG_ON(d == NULL); + v = d->vcpu[0]; + } else { + d = idle_vcpu[cpu - vcpu_id]->domain; + BUG_ON(d == NULL); + v = alloc_vcpu(d, vcpu_id, cpu); + } + + idle_vcpu[cpu] = v; + BUG_ON(v == NULL); + + v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table)); /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); @@ -1079,7 +1094,7 @@ static void __init smp_boot_cpus(unsigne if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID)) continue; - if (!check_apicid_present(bit)) + if (!check_apicid_present(apicid)) continue; if (max_cpus <= cpucount+1) continue; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/time.c Tue Apr 11 18:54:18 2006 -0600 @@ -670,7 +670,7 @@ static inline void version_update_end(u3 (*version)++; } -static inline void __update_dom_time(struct vcpu *v) +static inline void __update_vcpu_system_time(struct vcpu *v) { struct cpu_time *t; struct vcpu_time_info *u; @@ -688,44 +688,14 @@ static inline void __update_dom_time(str version_update_end(&u->version); } -void update_dom_time(struct vcpu *v) +void update_vcpu_system_time(struct vcpu *v) { if ( v->domain->shared_info->vcpu_info[v->vcpu_id].time.tsc_timestamp != cpu_time[smp_processor_id()].local_tsc_stamp ) - __update_dom_time(v); -} - -/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ -void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base) -{ - u64 x; - u32 y, _wc_sec, _wc_nsec; - struct domain *d; - shared_info_t *s; - - x = (secs * 1000000000ULL) + (u64)nsecs - system_time_base; - y = do_div(x, 1000000000); - - wc_sec = _wc_sec = (u32)x; - wc_nsec = _wc_nsec = (u32)y; - - read_lock(&domlist_lock); - spin_lock(&wc_lock); - - for_each_domain ( d ) - { - s = d->shared_info; - version_update_begin(&s->wc_version); - s->wc_sec = _wc_sec; - s->wc_nsec = _wc_nsec; - version_update_end(&s->wc_version); - } - - spin_unlock(&wc_lock); - read_unlock(&domlist_lock); -} - -void init_domain_time(struct domain *d) + __update_vcpu_system_time(v); +} + +void update_domain_wallclock_time(struct domain *d) { spin_lock(&wc_lock); version_update_begin(&d->shared_info->wc_version); @@ -733,6 +703,27 @@ void init_domain_time(struct domain *d) d->shared_info->wc_nsec = wc_nsec; version_update_end(&d->shared_info->wc_version); spin_unlock(&wc_lock); +} + +/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ +void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base) +{ + u64 x; + u32 y, _wc_sec, _wc_nsec; + struct domain *d; + + x = (secs * 1000000000ULL) + (u64)nsecs - system_time_base; + y = do_div(x, 1000000000); + + spin_lock(&wc_lock); + wc_sec = _wc_sec = (u32)x; + wc_nsec = _wc_nsec = (u32)y; + spin_unlock(&wc_lock); + + read_lock(&domlist_lock); + for_each_domain ( d ) + update_domain_wallclock_time(d); + read_unlock(&domlist_lock); } static void local_time_calibration(void *unused) @@ -925,7 +916,7 @@ void __init early_time_init(void) void send_timer_event(struct vcpu *v) { - send_guest_virq(v, VIRQ_TIMER); + send_guest_vcpu_virq(v, VIRQ_TIMER); } /* diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/traps.c Tue Apr 11 18:54:18 2006 -0600 @@ -32,6 +32,7 @@ #include <xen/errno.h> #include <xen/mm.h> #include <xen/console.h> +#include <xen/reboot.h> #include <asm/regs.h> #include <xen/delay.h> #include <xen/event.h> @@ -138,13 +139,13 @@ static void show_guest_stack(struct cpu_ if ( vm86_mode(regs) ) { stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff)); - printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n ", + printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n ", regs->ss, (uint16_t)(regs->esp & 0xffff)); } else { stack = (unsigned long *)regs->esp; - printk("Guest stack trace from "__OP"sp=%p:\n ", stack); + printk("Guest stack trace from "__OP"sp=%p:\n ", stack); } for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ ) @@ -160,8 +161,8 @@ static void show_guest_stack(struct cpu_ break; } if ( (i != 0) && ((i % stack_words_per_line) == 0) ) - printk("\n "); - printk("%p ", _p(addr)); + printk("\n "); + printk(" %p", _p(addr)); stack++; } if ( i == 0 ) @@ -257,16 +258,16 @@ void show_stack(struct cpu_user_regs *re if ( guest_mode(regs) ) return show_guest_stack(regs); - printk("Xen stack trace from "__OP"sp=%p:\n ", stack); + printk("Xen stack trace from "__OP"sp=%p:\n ", stack); for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ ) { if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 ) break; if ( (i != 0) && ((i % stack_words_per_line) == 0) ) - printk("\n "); + printk("\n "); addr = *stack++; - printk("%p ", _p(addr)); + printk(" %p", _p(addr)); } if ( i == 0 ) printk("Stack empty."); @@ -318,8 +319,7 @@ asmlinkage void fatal_trap(int trapnr, s console_force_lock(); /* Wait for manual reset. */ - for ( ; ; ) - __asm__ __volatile__ ( "hlt" ); + machine_halt(); } static inline int do_trap(int trapnr, char *str, diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/x86_32/Makefile --- a/xen/arch/x86/x86_32/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/x86_32/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += domain_page.o obj-y += entry.o obj-y += mm.o @@ -7,5 +5,3 @@ obj-y += traps.o obj-y += traps.o obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/x86_32/asm-offsets.c --- a/xen/arch/x86/x86_32/asm-offsets.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/x86_32/asm-offsets.c Tue Apr 11 18:54:18 2006 -0600 @@ -44,7 +44,6 @@ void __dummy__(void) OFFSET(UREGS_eflags, struct cpu_user_regs, eflags); OFFSET(UREGS_error_code, struct cpu_user_regs, error_code); OFFSET(UREGS_entry_vector, struct cpu_user_regs, entry_vector); - OFFSET(UREGS_saved_upcall_mask, struct cpu_user_regs, saved_upcall_mask); OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, esp); DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs)); BLANK(); diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/x86_32/entry.S Tue Apr 11 18:54:18 2006 -0600 @@ -119,7 +119,7 @@ FIX1: SET_XEN_SEGMENTS(a) movl $DBLFLT1,%eax pushl %eax # EIP pushl %esi # error_code/entry_vector - jmp error_code + jmp handle_exception DBLFLT1:GET_CURRENT(%ebx) jmp test_all_events failsafe_callback: @@ -381,14 +381,6 @@ domain_crash_synchronous: jmp __domain_crash_synchronous ALIGN -process_guest_exception_and_events: - leal VCPU_trap_bounce(%ebx),%edx - testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx) - jz test_all_events - call create_bounce_frame - jmp test_all_events - - ALIGN ENTRY(ret_from_intr) GET_CURRENT(%ebx) movl UREGS_eflags(%esp),%eax @@ -400,7 +392,7 @@ ENTRY(divide_error) ENTRY(divide_error) pushl $TRAP_divide_error<<16 ALIGN -error_code: +handle_exception: FIXUP_RING0_GUEST_STACK SAVE_ALL_NOSEGREGS(a) SET_XEN_SEGMENTS(a) @@ -419,7 +411,11 @@ error_code: movb UREGS_cs(%esp),%al testl $(3|X86_EFLAGS_VM),%eax jz restore_all_xen - jmp process_guest_exception_and_events + leal VCPU_trap_bounce(%ebx),%edx + testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx) + jz test_all_events + call create_bounce_frame + jmp test_all_events exception_with_ints_disabled: movl UREGS_eflags(%esp),%eax @@ -452,71 +448,71 @@ FATAL_exception_with_ints_disabled: ENTRY(coprocessor_error) pushl $TRAP_copro_error<<16 - jmp error_code + jmp handle_exception ENTRY(simd_coprocessor_error) pushl $TRAP_simd_error<<16 - jmp error_code + jmp handle_exception ENTRY(device_not_available) pushl $TRAP_no_device<<16 - jmp error_code + jmp handle_exception ENTRY(debug) pushl $TRAP_debug<<16 - jmp error_code + jmp handle_exception ENTRY(int3) pushl $TRAP_int3<<16 - jmp error_code + jmp handle_exception ENTRY(overflow) pushl $TRAP_overflow<<16 - jmp error_code + jmp handle_exception ENTRY(bounds) pushl $TRAP_bounds<<16 - jmp error_code + jmp handle_exception ENTRY(invalid_op) pushl $TRAP_invalid_op<<16 - jmp error_code + jmp handle_exception ENTRY(coprocessor_segment_overrun) pushl $TRAP_copro_seg<<16 - jmp error_code + jmp handle_exception ENTRY(invalid_TSS) - movw $TRAP_invalid_tss,2(%esp) - jmp error_code + movw $TRAP_invalid_tss,2(%esp) + jmp handle_exception ENTRY(segment_not_present) - movw $TRAP_no_segment,2(%esp) - jmp error_code + movw $TRAP_no_segment,2(%esp) + jmp handle_exception ENTRY(stack_segment) - movw $TRAP_stack_error,2(%esp) - jmp error_code + movw $TRAP_stack_error,2(%esp) + jmp handle_exception ENTRY(general_protection) - movw $TRAP_gp_fault,2(%esp) - jmp error_code + movw $TRAP_gp_fault,2(%esp) + jmp handle_exception ENTRY(alignment_check) - movw $TRAP_alignment_check,2(%esp) - jmp error_code + movw $TRAP_alignment_check,2(%esp) + jmp handle_exception ENTRY(page_fault) - movw $TRAP_page_fault,2(%esp) - jmp error_code + movw $TRAP_page_fault,2(%esp) + jmp handle_exception ENTRY(machine_check) pushl $TRAP_machine_check<<16 - jmp error_code + jmp handle_exception ENTRY(spurious_interrupt_bug) pushl $TRAP_spurious_int<<16 - jmp error_code + jmp handle_exception ENTRY(nmi) #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL @@ -561,7 +557,7 @@ 1: movl %ss:APIC_ICR(%eax),%ebx testl $APIC_ICR_BUSY,%ebx jnz 1b # __send_IPI_shortcut(APIC_DEST_SELF, TRAP_deferred_nmi) - movl $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_LOGICAL | \ + movl $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_PHYSICAL | \ TRAP_deferred_nmi),%ss:APIC_ICR(%eax) jmp restore_all_xen #endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */ @@ -579,19 +575,19 @@ ENTRY(setup_vm86_frame) addl $16,%esp ret +do_arch_sched_op_compat: + # Ensure we return success even if we return via schedule_tail() + xorl %eax,%eax + GET_GUEST_REGS(%ecx) + movl %eax,UREGS_eax(%ecx) + jmp do_sched_op_compat + do_arch_sched_op: # Ensure we return success even if we return via schedule_tail() xorl %eax,%eax GET_GUEST_REGS(%ecx) movl %eax,UREGS_eax(%ecx) jmp do_sched_op - -do_arch_sched_op_new: - # Ensure we return success even if we return via schedule_tail() - xorl %eax,%eax - GET_GUEST_REGS(%ecx) - movl %eax,UREGS_eax(%ecx) - jmp do_sched_op_new .data @@ -624,7 +620,7 @@ ENTRY(hypercall_table) .long do_stack_switch .long do_set_callbacks .long do_fpu_taskswitch /* 5 */ - .long do_arch_sched_op + .long do_arch_sched_op_compat .long do_dom0_op .long do_set_debugreg .long do_get_debugreg @@ -647,7 +643,9 @@ ENTRY(hypercall_table) .long do_mmuext_op .long do_acm_op .long do_nmi_op - .long do_arch_sched_op_new + .long do_arch_sched_op + .long do_callback_op /* 30 */ + .long do_xenoprof_op .rept NR_hypercalls-((.-hypercall_table)/4) .long do_ni_hypercall .endr @@ -659,7 +657,7 @@ ENTRY(hypercall_args_table) .byte 2 /* do_stack_switch */ .byte 4 /* do_set_callbacks */ .byte 1 /* do_fpu_taskswitch */ /* 5 */ - .byte 2 /* do_arch_sched_op */ + .byte 2 /* do_arch_sched_op_compat */ .byte 1 /* do_dom0_op */ .byte 2 /* do_set_debugreg */ .byte 1 /* do_get_debugreg */ @@ -682,7 +680,9 @@ ENTRY(hypercall_args_table) .byte 4 /* do_mmuext_op */ .byte 1 /* do_acm_op */ .byte 2 /* do_nmi_op */ - .byte 2 /* do_arch_sched_op_new */ + .byte 2 /* do_arch_sched_op */ + .byte 2 /* do_callback_op */ /* 30 */ + .byte 3 /* do_xenoprof_op */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/x86_32/traps.c Tue Apr 11 18:54:18 2006 -0600 @@ -9,10 +9,13 @@ #include <xen/mm.h> #include <xen/irq.h> #include <xen/symbols.h> +#include <xen/reboot.h> #include <asm/current.h> #include <asm/flushtlb.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> + +#include <public/callback.h> /* All CPUs have their own IDT to allow int80 direct trap. */ idt_entry_t *idt_tables[NR_CPUS] = { 0 }; @@ -178,8 +181,7 @@ asmlinkage void do_double_fault(void) console_force_lock(); /* Wait for manual reset. */ - for ( ; ; ) - __asm__ __volatile__ ( "hlt" ); + machine_halt(); } unsigned long do_iret(void) @@ -230,8 +232,8 @@ unsigned long do_iret(void) /* No longer in NMI context. */ clear_bit(_VCPUF_nmi_masked, ¤t->vcpu_flags); - /* Restore upcall mask from saved value. */ - current->vcpu_info->evtchn_upcall_mask = regs->saved_upcall_mask; + /* Restore upcall mask from supplied EFLAGS.IF. */ + current->vcpu_info->evtchn_upcall_mask = !(eflags & X86_EFLAGS_IF); /* * The hypercall exit path will overwrite EAX with this return @@ -315,20 +317,102 @@ void init_int80_direct_trap(struct vcpu set_int80_direct_trap(v); } +static long register_guest_callback(struct callback_register *reg) +{ + long ret = 0; + struct vcpu *v = current; + + fixup_guest_code_selector(reg->address.cs); + + switch ( reg->type ) + { + case CALLBACKTYPE_event: + v->arch.guest_context.event_callback_cs = reg->address.cs; + v->arch.guest_context.event_callback_eip = reg->address.eip; + break; + + case CALLBACKTYPE_failsafe: + v->arch.guest_context.failsafe_callback_cs = reg->address.cs; + v->arch.guest_context.failsafe_callback_eip = reg->address.eip; + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static long unregister_guest_callback(struct callback_unregister *unreg) +{ + long ret; + + switch ( unreg->type ) + { + default: + ret = -EINVAL; + break; + } + + return ret; +} + + +long do_callback_op(int cmd, GUEST_HANDLE(void) arg) +{ + long ret; + + switch ( cmd ) + { + case CALLBACKOP_register: + { + struct callback_register reg; + + ret = -EFAULT; + if ( copy_from_guest(®, arg, 1) ) + break; + + ret = register_guest_callback(®); + } + break; + + case CALLBACKOP_unregister: + { + struct callback_unregister unreg; + + ret = -EFAULT; + if ( copy_from_guest(&unreg, arg, 1) ) + break; + + ret = unregister_guest_callback(&unreg); + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + long do_set_callbacks(unsigned long event_selector, unsigned long event_address, unsigned long failsafe_selector, unsigned long failsafe_address) { - struct vcpu *d = current; - - fixup_guest_code_selector(event_selector); - fixup_guest_code_selector(failsafe_selector); - - d->arch.guest_context.event_callback_cs = event_selector; - d->arch.guest_context.event_callback_eip = event_address; - d->arch.guest_context.failsafe_callback_cs = failsafe_selector; - d->arch.guest_context.failsafe_callback_eip = failsafe_address; + struct callback_register event = { + .type = CALLBACKTYPE_event, + .address = { event_selector, event_address }, + }; + struct callback_register failsafe = { + .type = CALLBACKTYPE_failsafe, + .address = { failsafe_selector, failsafe_address }, + }; + + register_guest_callback(&event); + register_guest_callback(&failsafe); return 0; } diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/x86_64/Makefile --- a/xen/arch/x86/x86_64/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/x86_64/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,7 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += entry.o obj-y += mm.o obj-y += traps.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/x86_64/asm-offsets.c --- a/xen/arch/x86/x86_64/asm-offsets.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/x86_64/asm-offsets.c Tue Apr 11 18:54:18 2006 -0600 @@ -48,7 +48,6 @@ void __dummy__(void) OFFSET(UREGS_eflags, struct cpu_user_regs, eflags); OFFSET(UREGS_rsp, struct cpu_user_regs, rsp); OFFSET(UREGS_ss, struct cpu_user_regs, ss); - OFFSET(UREGS_saved_upcall_mask, struct cpu_user_regs, saved_upcall_mask); OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es); DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs)); BLANK(); diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/x86_64/entry.S Tue Apr 11 18:54:18 2006 -0600 @@ -68,7 +68,7 @@ FIX1: popq -15*8-8(%rsp) # leaq DBLFLT1(%rip),%rax pushq %rax # RIP pushq %rsi # error_code/entry_vector - jmp error_code + jmp handle_exception DBLFLT1:GET_CURRENT(%rbx) jmp test_all_events failsafe_callback: @@ -320,15 +320,6 @@ domain_crash_synchronous: jmp __domain_crash_synchronous ALIGN -/* %rbx: struct vcpu */ -process_guest_exception_and_events: - leaq VCPU_trap_bounce(%rbx),%rdx - testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) - jz test_all_events - call create_bounce_frame - jmp test_all_events - - ALIGN /* No special register assumptions. */ ENTRY(ret_from_intr) GET_CURRENT(%rbx) @@ -338,7 +329,7 @@ ENTRY(ret_from_intr) ALIGN /* No special register assumptions. */ -error_code: +handle_exception: SAVE_ALL testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp) jz exception_with_ints_disabled @@ -351,7 +342,11 @@ error_code: callq *(%rdx,%rax,8) testb $3,UREGS_cs(%rsp) jz restore_all_xen - jmp process_guest_exception_and_events + leaq VCPU_trap_bounce(%rbx),%rdx + testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) + jz test_all_events + call create_bounce_frame + jmp test_all_events /* No special register assumptions. */ exception_with_ints_disabled: @@ -384,90 +379,90 @@ ENTRY(divide_error) ENTRY(divide_error) pushq $0 movl $TRAP_divide_error,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(coprocessor_error) pushq $0 movl $TRAP_copro_error,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(simd_coprocessor_error) pushq $0 movl $TRAP_simd_error,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(device_not_available) pushq $0 movl $TRAP_no_device,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(debug) pushq $0 movl $TRAP_debug,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(int3) pushq $0 movl $TRAP_int3,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(overflow) pushq $0 movl $TRAP_overflow,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(bounds) pushq $0 movl $TRAP_bounds,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(invalid_op) pushq $0 movl $TRAP_invalid_op,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(coprocessor_segment_overrun) pushq $0 movl $TRAP_copro_seg,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(invalid_TSS) movl $TRAP_invalid_tss,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(segment_not_present) movl $TRAP_no_segment,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(stack_segment) movl $TRAP_stack_error,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(general_protection) movl $TRAP_gp_fault,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(alignment_check) movl $TRAP_alignment_check,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(page_fault) movl $TRAP_page_fault,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(machine_check) pushq $0 movl $TRAP_machine_check,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(spurious_interrupt_bug) pushq $0 movl $TRAP_spurious_int,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(double_fault) movl $TRAP_double_fault,4(%rsp) - jmp error_code + jmp handle_exception ENTRY(nmi) pushq $0 @@ -488,19 +483,19 @@ nmi_in_hypervisor_mode: call do_nmi jmp ret_from_intr +do_arch_sched_op_compat: + # Ensure we return success even if we return via schedule_tail() + xorl %eax,%eax + GET_GUEST_REGS(%r10) + movq %rax,UREGS_rax(%r10) + jmp do_sched_op_compat + do_arch_sched_op: # Ensure we return success even if we return via schedule_tail() xorl %eax,%eax GET_GUEST_REGS(%r10) movq %rax,UREGS_rax(%r10) jmp do_sched_op - -do_arch_sched_op_new: - # Ensure we return success even if we return via schedule_tail() - xorl %eax,%eax - GET_GUEST_REGS(%r10) - movq %rax,UREGS_rax(%r10) - jmp do_sched_op_new .data @@ -533,7 +528,7 @@ ENTRY(hypercall_table) .quad do_stack_switch .quad do_set_callbacks .quad do_fpu_taskswitch /* 5 */ - .quad do_arch_sched_op + .quad do_arch_sched_op_compat .quad do_dom0_op .quad do_set_debugreg .quad do_get_debugreg @@ -556,7 +551,9 @@ ENTRY(hypercall_table) .quad do_mmuext_op .quad do_acm_op .quad do_nmi_op - .quad do_arch_sched_op_new + .quad do_arch_sched_op + .quad do_callback_op /* 30 */ + .quad do_xenoprof_op .rept NR_hypercalls-((.-hypercall_table)/8) .quad do_ni_hypercall .endr @@ -568,7 +565,7 @@ ENTRY(hypercall_args_table) .byte 2 /* do_stack_switch */ .byte 3 /* do_set_callbacks */ .byte 1 /* do_fpu_taskswitch */ /* 5 */ - .byte 2 /* do_arch_sched_op */ + .byte 2 /* do_arch_sched_op_compat */ .byte 1 /* do_dom0_op */ .byte 2 /* do_set_debugreg */ .byte 1 /* do_get_debugreg */ @@ -591,7 +588,9 @@ ENTRY(hypercall_args_table) .byte 4 /* do_mmuext_op */ .byte 1 /* do_acm_op */ .byte 2 /* do_nmi_op */ - .byte 2 /* do_arch_sched_op_new */ + .byte 2 /* do_arch_sched_op */ + .byte 2 /* do_callback_op */ /* 30 */ + .byte 3 /* do_xenoprof_op */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/x86_64/mm.c Tue Apr 11 18:54:18 2006 -0600 @@ -148,12 +148,14 @@ void subarch_init_memory(void) if ( ((offsetof(struct page_info, u.inuse._domain) != (offsetof(struct page_info, count_info) + sizeof(u32)))) || ((offsetof(struct page_info, count_info) & 7) != 0) || - (sizeof(struct page_info) != 40) ) - { - printk("Weird page_info layout (%ld,%ld,%ld)\n", + (sizeof(struct page_info) != + (32 + BITS_TO_LONGS(NR_CPUS)*sizeof(long))) ) + { + printk("Weird page_info layout (%ld,%ld,%ld,%ld)\n", offsetof(struct page_info, count_info), offsetof(struct page_info, u.inuse._domain), - sizeof(struct page_info)); + sizeof(struct page_info), + 32 + BITS_TO_LONGS(NR_CPUS)*sizeof(long)); for ( ; ; ) ; } diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/arch/x86/x86_64/traps.c Tue Apr 11 18:54:18 2006 -0600 @@ -10,12 +10,15 @@ #include <xen/symbols.h> #include <xen/console.h> #include <xen/sched.h> +#include <xen/reboot.h> #include <asm/current.h> #include <asm/flushtlb.h> #include <asm/msr.h> #include <asm/shadow.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> + +#include <public/callback.h> void show_registers(struct cpu_user_regs *regs) { @@ -164,8 +167,7 @@ asmlinkage void do_double_fault(struct c console_force_lock(); /* Wait for manual reset. */ - for ( ; ; ) - __asm__ __volatile__ ( "hlt" ); + machine_halt(); } void toggle_guest_mode(struct vcpu *v) @@ -184,13 +186,19 @@ unsigned long do_iret(void) if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp, sizeof(iret_saved))) ) + { + DPRINTK("Fault while reading IRET context from guest stack\n"); domain_crash_synchronous(); + } /* Returning to user mode? */ if ( (iret_saved.cs & 3) == 3 ) { if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) ) - return -EFAULT; + { + DPRINTK("Guest switching to user mode with no user page tables\n"); + domain_crash_synchronous(); + } toggle_guest_mode(v); } @@ -210,6 +218,9 @@ unsigned long do_iret(void) /* No longer in NMI context. */ clear_bit(_VCPUF_nmi_masked, ¤t->vcpu_flags); + /* Restore upcall mask from supplied EFLAGS.IF. */ + current->vcpu_info->evtchn_upcall_mask = !(iret_saved.rflags & EF_IE); + /* Saved %rax gets written back to regs->rax in entry.S. */ return iret_saved.rax; } @@ -309,15 +320,106 @@ void __init percpu_traps_init(void) wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U); } +static long register_guest_callback(struct callback_register *reg) +{ + long ret = 0; + struct vcpu *v = current; + + switch ( reg->type ) + { + case CALLBACKTYPE_event: + v->arch.guest_context.event_callback_eip = reg->address; + break; + + case CALLBACKTYPE_failsafe: + v->arch.guest_context.failsafe_callback_eip = reg->address; + break; + + case CALLBACKTYPE_syscall: + v->arch.guest_context.syscall_callback_eip = reg->address; + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static long unregister_guest_callback(struct callback_unregister *unreg) +{ + long ret; + + switch ( unreg->type ) + { + default: + ret = -EINVAL; + break; + } + + return ret; +} + + +long do_callback_op(int cmd, GUEST_HANDLE(void) arg) +{ + long ret; + + switch ( cmd ) + { + case CALLBACKOP_register: + { + struct callback_register reg; + + ret = -EFAULT; + if ( copy_from_guest(®, arg, 1) ) + break; + + ret = register_guest_callback(®); + } + break; + + case CALLBACKOP_unregister: + { + struct callback_unregister unreg; + + ret = -EFAULT; + if ( copy_from_guest(&unreg, arg, 1) ) + break; + + ret = unregister_guest_callback(&unreg); + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + long do_set_callbacks(unsigned long event_address, unsigned long failsafe_address, unsigned long syscall_address) { - struct vcpu *d = current; - - d->arch.guest_context.event_callback_eip = event_address; - d->arch.guest_context.failsafe_callback_eip = failsafe_address; - d->arch.guest_context.syscall_callback_eip = syscall_address; + struct callback_register event = { + .type = CALLBACKTYPE_event, + .address = event_address, + }; + struct callback_register failsafe = { + .type = CALLBACKTYPE_failsafe, + .address = failsafe_address, + }; + struct callback_register syscall = { + .type = CALLBACKTYPE_syscall, + .address = syscall_address, + }; + + register_guest_callback(&event); + register_guest_callback(&failsafe); + register_guest_callback(&syscall); return 0; } diff -r 5719550652a1 -r 5cc367720223 xen/common/Makefile --- a/xen/common/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,3 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += acm_ops.o obj-y += bitmap.o obj-y += dom0_ops.o @@ -28,7 +26,5 @@ obj-$(perfc) += perfc.o obj-$(perfc) += perfc.o obj-$(crash_debug) += gdbstub.o -include $(BASEDIR)/Post.mk - # Object file contains changeset and compiler information. kernel.o: $(BASEDIR)/include/xen/compile.h diff -r 5719550652a1 -r 5cc367720223 xen/common/domain.c --- a/xen/common/domain.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/domain.c Tue Apr 11 18:54:18 2006 -0600 @@ -19,6 +19,7 @@ #include <xen/rangeset.h> #include <xen/guest_access.h> #include <xen/hypercall.h> +#include <xen/delay.h> #include <asm/debugger.h> #include <public/dom0_ops.h> #include <public/sched.h> @@ -136,7 +137,7 @@ void domain_kill(struct domain *d) domain_relinquish_resources(d); put_domain(d); - send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC); + send_guest_global_virq(dom0, VIRQ_DOM_EXC); } } @@ -191,7 +192,7 @@ static void domain_shutdown_finalise(voi /* Don't set DOMF_shutdown until execution contexts are sync'ed. */ if ( !test_and_set_bit(_DOMF_shutdown, &d->domain_flags) ) - send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC); + send_guest_global_virq(dom0, VIRQ_DOM_EXC); UNLOCK_BIGLOCK(d); @@ -221,6 +222,13 @@ void domain_shutdown(struct domain *d, u { printk("Domain 0 halted: halting machine.\n"); machine_halt(); + } + else if ( reason == SHUTDOWN_crash ) + { + printk("Domain 0 crashed: rebooting machine in 5 seconds.\n"); + watchdog_disable(); + mdelay(5000); + machine_restart(0); } else { @@ -259,7 +267,7 @@ void domain_pause_for_debugger(void) for_each_vcpu ( d, v ) vcpu_sleep_nosync(v); - send_guest_virq(dom0->vcpu[0], VIRQ_DEBUGGER); + send_guest_global_virq(dom0, VIRQ_DEBUGGER); } @@ -299,7 +307,7 @@ void domain_destroy(struct domain *d) free_domain(d); - send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC); + send_guest_global_virq(dom0, VIRQ_DOM_EXC); } void vcpu_pause(struct vcpu *v) diff -r 5719550652a1 -r 5cc367720223 xen/common/event_channel.c --- a/xen/common/event_channel.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/event_channel.c Tue Apr 11 18:54:18 2006 -0600 @@ -3,7 +3,7 @@ * * Event notifications from VIRQs, PIRQs, and other domains. * - * Copyright (c) 2003-2005, K A Fraser. + * Copyright (c) 2003-2006, K A Fraser. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -46,6 +46,29 @@ goto out; \ } while ( 0 ) + +static int virq_is_global(int virq) +{ + int rc; + + ASSERT((virq >= 0) && (virq < NR_VIRQS)); + + switch ( virq ) + { + case VIRQ_TIMER: + case VIRQ_DEBUG: + case VIRQ_XENOPROF: + rc = 0; + break; + default: + rc = 1; + break; + } + + return rc; +} + + static int get_free_port(struct domain *d) { struct evtchn *chn; @@ -179,6 +202,9 @@ static long evtchn_bind_virq(evtchn_bind long rc = 0; if ( virq >= ARRAY_SIZE(v->virq_to_evtchn) ) + return -EINVAL; + + if ( virq_is_global(virq) && (vcpu != 0) ) return -EINVAL; if ( (vcpu >= ARRAY_SIZE(d->vcpu)) || ((v = d->vcpu[vcpu]) == NULL) ) @@ -360,7 +386,7 @@ static long __evtchn_close(struct domain rc = -EINVAL; goto out; } - + port2 = chn1->u.interdomain.remote_port; BUG_ON(!port_is_valid(d2, port2)); @@ -437,6 +463,7 @@ long evtchn_send(unsigned int lport) return ret; } + void evtchn_set_pending(struct vcpu *v, int port) { @@ -471,20 +498,47 @@ void evtchn_set_pending(struct vcpu *v, } } -void send_guest_virq(struct vcpu *v, int virq) -{ - int port = v->virq_to_evtchn[virq]; - - if ( likely(port != 0) ) - evtchn_set_pending(v, port); -} + +void send_guest_vcpu_virq(struct vcpu *v, int virq) +{ + int port; + + ASSERT(!virq_is_global(virq)); + + port = v->virq_to_evtchn[virq]; + if ( unlikely(port == 0) ) + return; + + evtchn_set_pending(v, port); +} + +void send_guest_global_virq(struct domain *d, int virq) +{ + int port; + struct evtchn *chn; + + ASSERT(virq_is_global(virq)); + + port = d->vcpu[0]->virq_to_evtchn[virq]; + if ( unlikely(port == 0) ) + return; + + chn = evtchn_from_port(d, port); + evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port); +} + void send_guest_pirq(struct domain *d, int pirq) { int port = d->pirq_to_evtchn[pirq]; - struct evtchn *chn = evtchn_from_port(d, port); + struct evtchn *chn; + + ASSERT(port != 0); + + chn = evtchn_from_port(d, port); evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port); } + static long evtchn_status(evtchn_status_t *status) { @@ -550,6 +604,7 @@ static long evtchn_status(evtchn_status_ return rc; } + long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id) { struct domain *d = current->domain; @@ -570,6 +625,12 @@ long evtchn_bind_vcpu(unsigned int port, chn = evtchn_from_port(d, port); switch ( chn->state ) { + case ECS_VIRQ: + if ( virq_is_global(chn->u.virq) ) + chn->notify_vcpu_id = vcpu_id; + else + rc = -EINVAL; + break; case ECS_UNBOUND: case ECS_INTERDOMAIN: case ECS_PIRQ: @@ -584,6 +645,7 @@ long evtchn_bind_vcpu(unsigned int port, spin_unlock(&d->evtchn_lock); return rc; } + static long evtchn_unmask(evtchn_unmask_t *unmask) { @@ -620,6 +682,7 @@ static long evtchn_unmask(evtchn_unmask_ return 0; } + long do_event_channel_op(GUEST_HANDLE(evtchn_op_t) uop) { long rc; @@ -691,6 +754,13 @@ long do_event_channel_op(GUEST_HANDLE(ev } return rc; +} + + +void evtchn_notify_reserved_port(struct domain *d, int port) +{ + struct evtchn *chn = evtchn_from_port(d, port); + evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port); } diff -r 5719550652a1 -r 5cc367720223 xen/common/grant_table.c --- a/xen/common/grant_table.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/grant_table.c Tue Apr 11 18:54:18 2006 -0600 @@ -618,8 +618,10 @@ gnttab_transfer( return -EFAULT; } + mfn = gmfn_to_mfn(d, gop.mfn); + /* Check the passed page frame for basic validity. */ - if ( unlikely(!mfn_valid(gop.mfn)) ) + if ( unlikely(!mfn_valid(mfn)) ) { DPRINTK("gnttab_transfer: out-of-range %lx\n", (unsigned long)gop.mfn); @@ -627,7 +629,6 @@ gnttab_transfer( goto copyback; } - mfn = gmfn_to_mfn(d, gop.mfn); page = mfn_to_page(mfn); if ( unlikely(IS_XEN_HEAP_FRAME(page)) ) { diff -r 5719550652a1 -r 5cc367720223 xen/common/keyhandler.c --- a/xen/common/keyhandler.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/keyhandler.c Tue Apr 11 18:54:18 2006 -0600 @@ -162,7 +162,7 @@ static void dump_domains(unsigned char k &d->shared_info->evtchn_mask[0]), test_bit(v->virq_to_evtchn[VIRQ_DEBUG]/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel)); - send_guest_virq(v, VIRQ_DEBUG); + send_guest_vcpu_virq(v, VIRQ_DEBUG); } } diff -r 5719550652a1 -r 5cc367720223 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/page_alloc.c Tue Apr 11 18:54:18 2006 -0600 @@ -42,6 +42,20 @@ static char opt_badpage[100] = ""; static char opt_badpage[100] = ""; string_param("badpage", opt_badpage); +/* + * Amount of memory to reserve in a low-memory (<4GB) pool for specific + * allocation requests. Ordinary requests will not fall back to the + * lowmem emergency pool. + */ +static unsigned long lowmem_emergency_pool_pages; +static void parse_lowmem_emergency_pool(char *s) +{ + unsigned long long bytes; + bytes = parse_size_and_unit(s); + lowmem_emergency_pool_pages = bytes >> PAGE_SHIFT; +} +custom_param("lowmem_emergency_pool", parse_lowmem_emergency_pool); + #define round_pgdown(_p) ((_p)&PAGE_MASK) #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) @@ -156,7 +170,7 @@ paddr_t init_boot_allocator(paddr_t bitm void init_boot_pages(paddr_t ps, paddr_t pe) { - unsigned long bad_pfn; + unsigned long bad_spfn, bad_epfn, i; char *p; ps = round_pgup(ps); @@ -170,18 +184,31 @@ void init_boot_pages(paddr_t ps, paddr_t p = opt_badpage; while ( *p != '\0' ) { - bad_pfn = simple_strtoul(p, &p, 0); + bad_spfn = simple_strtoul(p, &p, 0); + bad_epfn = bad_spfn; + + if ( *p == '-' ) + { + p++; + bad_epfn = simple_strtoul(p, &p, 0); + if ( bad_epfn < bad_spfn ) + bad_epfn = bad_spfn; + } if ( *p == ',' ) p++; else if ( *p != '\0' ) break; - if ( (bad_pfn < max_page) && !allocated_in_map(bad_pfn) ) - { - printk("Marking page %lx as bad\n", bad_pfn); - map_alloc(bad_pfn, 1); - } + if ( bad_epfn == bad_spfn ) + printk("Marking page %lx as bad\n", bad_spfn); + else + printk("Marking pages %lx through %lx as bad\n", + bad_spfn, bad_epfn); + + for ( i = bad_spfn; i <= bad_epfn; i++ ) + if ( (i < max_page) && !allocated_in_map(i) ) + map_alloc(i, 1); } } @@ -514,7 +541,15 @@ struct page_info *alloc_domheap_pages( ASSERT(!in_irq()); if ( !(flags & ALLOC_DOM_DMA) ) + { pg = alloc_heap_pages(MEMZONE_DOM, order); + /* Failure? Then check if we can fall back to the DMA pool. */ + if ( unlikely(pg == NULL) && + ((order > MAX_ORDER) || + (avail[MEMZONE_DMADOM] < + (lowmem_emergency_pool_pages + (1UL << order)))) ) + return NULL; + } if ( pg == NULL ) if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL ) @@ -657,7 +692,17 @@ void free_domheap_pages(struct page_info unsigned long avail_domheap_pages(void) { - return avail[MEMZONE_DOM] + avail[MEMZONE_DMADOM]; + unsigned long avail_nrm, avail_dma; + + avail_nrm = avail[MEMZONE_DOM]; + + avail_dma = avail[MEMZONE_DMADOM]; + if ( avail_dma > lowmem_emergency_pool_pages ) + avail_dma -= lowmem_emergency_pool_pages; + else + avail_dma = 0; + + return avail_nrm + avail_dma; } diff -r 5719550652a1 -r 5cc367720223 xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/sched_sedf.c Tue Apr 11 18:54:18 2006 -0600 @@ -15,34 +15,23 @@ /*verbosity settings*/ #define SEDFLEVEL 0 -#define PRINT(_f, _a...) \ - if ((_f)<=SEDFLEVEL) printk(_a ); +#define PRINT(_f, _a...) \ + do { \ + if ( (_f) <= SEDFLEVEL ) \ + printk(_a ); \ + } while ( 0 ) #ifndef NDEBUG #define SEDF_STATS -#define CHECK(_p) if ( !(_p) ) \ - { printk("Check '%s' failed, line %d, file %s\n", #_p , __LINE__,\ - __FILE__);} +#define CHECK(_p) \ + do { \ + if ( !(_p) ) \ + printk("Check '%s' failed, line %d, file %s\n", \ + #_p , __LINE__, __FILE__); \ + } while ( 0 ) #else #define CHECK(_p) ((void)0) #endif - -/*various ways of unblocking domains*/ -#define UNBLOCK_ISOCHRONOUS_EDF 1 -#define UNBLOCK_EDF 2 -#define UNBLOCK_ATROPOS 3 -#define UNBLOCK_SHORT_RESUME 4 -#define UNBLOCK_BURST 5 -#define UNBLOCK_EXTRA_SUPPORT 6 -#define UNBLOCK UNBLOCK_EXTRA_SUPPORT - -/*various ways of treating extra-time*/ -#define EXTRA_OFF 1 -#define EXTRA_ROUNDR 2 -#define EXTRA_SLICE_WEIGHT 3 -#define EXTRA_BLOCK_WEIGHT 4 - -#define EXTRA EXTRA_BLOCK_WEIGHT #define EXTRA_NONE (0) #define EXTRA_AWARE (1) @@ -68,8 +57,8 @@ struct sedf_dom_info { struct sedf_dom_info { struct domain *domain; }; -struct sedf_vcpu_info -{ + +struct sedf_vcpu_info { struct vcpu *vcpu; struct list_head list; struct list_head extralist[2]; @@ -85,10 +74,10 @@ struct sedf_vcpu_info s_time_t latency; /*status of domain*/ - int status; + int status; /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/ - short weight; - short extraweight; + short weight; + short extraweight; /*Bookkeeping*/ s_time_t deadl_abs; s_time_t sched_start_abs; @@ -123,28 +112,29 @@ struct sedf_cpu_info { s_time_t current_slice_expires; }; -#define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv)) -#define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv) -#define LIST(d) (&EDOM_INFO(d)->list) -#define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i])) -#define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq) +#define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv)) +#define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv) +#define LIST(d) (&EDOM_INFO(d)->list) +#define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i])) +#define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq) #define WAITQ(cpu) (&CPU_INFO(cpu)->waitq) -#define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i])) +#define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i])) #define IDLETASK(cpu) ((struct vcpu *)schedule_data[cpu].idle) #define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period) -#define MIN(x,y) (((x)<(y))?(x):(y)) +#define MIN(x,y) (((x)<(y))?(x):(y)) #define DIV_UP(x,y) (((x) + (y) - 1) / y) -#define extra_runs(inf) ((inf->status) & 6) +#define extra_runs(inf) ((inf->status) & 6) #define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1) -#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP)) +#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP)) static void sedf_dump_cpu_state(int i); -static inline int extraq_on(struct vcpu *d, int i) { +static inline int extraq_on(struct vcpu *d, int i) +{ return ((EXTRALIST(d,i)->next != NULL) && (EXTRALIST(d,i)->next != EXTRALIST(d,i))); } @@ -165,8 +155,8 @@ static inline void extraq_del(struct vcp { struct list_head *list = EXTRALIST(d,i); ASSERT(extraq_on(d,i)); - PRINT(3, "Removing domain %i.%i from L%i extraq\n", d->domain->domain_id, - d->vcpu_id, i); + PRINT(3, "Removing domain %i.%i from L%i extraq\n", + d->domain->domain_id, d->vcpu_id, i); list_del(list); list->next = NULL; ASSERT(!extraq_on(d, i)); @@ -178,94 +168,96 @@ static inline void extraq_del(struct vcp each entry, in order to avoid overflow. The algorithm works by simply charging each domain that recieved extratime with an inverse of its weight. */ -static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub) { +static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub) +{ struct list_head *cur; struct sedf_vcpu_info *curinf; ASSERT(!extraq_on(d,i)); + PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")" " to L%i extraq\n", d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i], EDOM_INFO(d)->short_block_lost_tot, i); - /*iterate through all elements to find our "hole" and on our way - update all the other scores*/ - list_for_each(cur,EXTRAQ(d->processor,i)){ + + /* + * Iterate through all elements to find our "hole" and on our way + * update all the other scores. + */ + list_for_each ( cur, EXTRAQ(d->processor, i) ) + { curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]); curinf->score[i] -= sub; - if (EDOM_INFO(d)->score[i] < curinf->score[i]) + if ( EDOM_INFO(d)->score[i] < curinf->score[i] ) break; - else - PRINT(4,"\tbehind domain %i.%i (score= %i)\n", - curinf->vcpu->domain->domain_id, - curinf->vcpu->vcpu_id, curinf->score[i]); - } - /*cur now contains the element, before which we'll enqueue*/ + PRINT(4,"\tbehind domain %i.%i (score= %i)\n", + curinf->vcpu->domain->domain_id, + curinf->vcpu->vcpu_id, curinf->score[i]); + } + + /* cur now contains the element, before which we'll enqueue. */ PRINT(3, "\tlist_add to %p\n", cur->prev); list_add(EXTRALIST(d,i),cur->prev); - /*continue updating the extraq*/ - if ((cur != EXTRAQ(d->processor,i)) && sub) - for (cur = cur->next; cur != EXTRAQ(d->processor,i); - cur = cur-> next) { - curinf = list_entry(cur,struct sedf_vcpu_info, - extralist[i]); + /* Continue updating the extraq. */ + if ( (cur != EXTRAQ(d->processor,i)) && sub ) + { + for ( cur = cur->next; cur != EXTRAQ(d->processor,i); cur = cur->next ) + { + curinf = list_entry(cur,struct sedf_vcpu_info, extralist[i]); curinf->score[i] -= sub; PRINT(4, "\tupdating domain %i.%i (score= %u)\n", curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id, curinf->score[i]); } + } + ASSERT(extraq_on(d,i)); } -static inline void extraq_check(struct vcpu *d) { - if (extraq_on(d, EXTRA_UTIL_Q)) { - PRINT(2,"Dom %i.%i is on L1 extraQ\n",d->domain->domain_id, d->vcpu_id); - if (!(EDOM_INFO(d)->status & EXTRA_AWARE) && - !extra_runs(EDOM_INFO(d))) { +static inline void extraq_check(struct vcpu *d) +{ + if ( extraq_on(d, EXTRA_UTIL_Q) ) + { + PRINT(2,"Dom %i.%i is on L1 extraQ\n", + d->domain->domain_id, d->vcpu_id); + + if ( !(EDOM_INFO(d)->status & EXTRA_AWARE) && + !extra_runs(EDOM_INFO(d)) ) + { extraq_del(d, EXTRA_UTIL_Q); PRINT(2,"Removed dom %i.%i from L1 extraQ\n", d->domain->domain_id, d->vcpu_id); } - } else { - PRINT(2,"Dom %i.%i is NOT on L1 extraQ\n",d->domain->domain_id, + } + else + { + PRINT(2, "Dom %i.%i is NOT on L1 extraQ\n", + d->domain->domain_id, d->vcpu_id); - if ((EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d)) - { -#if (EXTRA == EXTRA_ROUNDR) - extraq_add_tail(d, EXTRA_UTIL_Q); -#elif (EXTRA == EXTRA_SLICE_WEIGHT || \ - EXTRA == EXTRA_BLOCK_WEIGHT) + + if ( (EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d) ) + { extraq_add_sort_update(d, EXTRA_UTIL_Q, 0); -#elif - ; -#endif - PRINT(2,"Added dom %i.%i to L1 extraQ\n",d->domain->domain_id, - d->vcpu_id); - } - } -} - -static inline void extraq_check_add_unblocked(struct vcpu *d, - int priority) { + PRINT(2,"Added dom %i.%i to L1 extraQ\n", + d->domain->domain_id, d->vcpu_id); + } + } +} + +static inline void extraq_check_add_unblocked(struct vcpu *d, int priority) +{ struct sedf_vcpu_info *inf = EDOM_INFO(d); - if (inf->status & EXTRA_AWARE) -#if (EXTRA == EXTRA_ROUNDR) - if (priority) - extraq_add_head(d,EXTRA_UTIL_Q); - else - extraq_add_tail(d,EXTRA_UTIL_Q); -#elif (EXTRA == EXTRA_SLICE_WEIGHT \ - || EXTRA == EXTRA_BLOCK_WEIGHT) - /*put in on the weighted extraq, - without updating any scores*/ - extraq_add_sort_update(d, EXTRA_UTIL_Q, 0); -#else - ; -#endif -} - -static inline int __task_on_queue(struct vcpu *d) { + + if ( inf->status & EXTRA_AWARE ) + /* Put on the weighted extraq without updating any scores. */ + extraq_add_sort_update(d, EXTRA_UTIL_Q, 0); +} + +static inline int __task_on_queue(struct vcpu *d) +{ return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d))); } + static inline void __del_from_queue(struct vcpu *d) { struct list_head *list = LIST(d); @@ -279,42 +271,47 @@ static inline void __del_from_queue(stru typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2); -static inline void list_insert_sort(struct list_head *list, - struct list_head *element, list_comparer comp) { +static inline void list_insert_sort( + struct list_head *list, struct list_head *element, list_comparer comp) +{ struct list_head *cur; - /*iterate through all elements to find our "hole"*/ - list_for_each(cur,list){ - if (comp(element, cur) < 0) + + /* Iterate through all elements to find our "hole". */ + list_for_each( cur, list ) + if ( comp(element, cur) < 0 ) break; - } - /*cur now contains the element, before which we'll enqueue*/ + + /* cur now contains the element, before which we'll enqueue. */ PRINT(3,"\tlist_add to %p\n",cur->prev); list_add(element, cur->prev); -} +} + #define DOMAIN_COMPARER(name, field, comp1, comp2) \ int name##_comp(struct list_head* el1, struct list_head* el2) \ { \ - struct sedf_vcpu_info *d1, *d2; \ - d1 = list_entry(el1,struct sedf_vcpu_info, field); \ - d2 = list_entry(el2,struct sedf_vcpu_info, field); \ - if ((comp1) == (comp2)) \ - return 0; \ - if ((comp1) < (comp2)) \ - return -1; \ - else \ - return 1; \ -} + struct sedf_vcpu_info *d1, *d2; \ + d1 = list_entry(el1,struct sedf_vcpu_info, field); \ + d2 = list_entry(el2,struct sedf_vcpu_info, field); \ + if ( (comp1) == (comp2) ) \ + return 0; \ + if ( (comp1) < (comp2) ) \ + return -1; \ + else \ + return 1; \ +} + /* adds a domain to the queue of processes which wait for the beginning of the next period; this list is therefore sortet by this time, which is simply absol. deadline - period */ -DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2)) - static inline void __add_to_waitqueue_sort(struct vcpu *d) { - ASSERT(!__task_on_queue(d)); +DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2)); +static inline void __add_to_waitqueue_sort(struct vcpu *v) +{ + ASSERT(!__task_on_queue(v)); PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n", - d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d))); - list_insert_sort(WAITQ(d->processor), LIST(d), waitq_comp); - ASSERT(__task_on_queue(d)); + v->domain->domain_id, v->vcpu_id, PERIOD_BEGIN(EDOM_INFO(v))); + list_insert_sort(WAITQ(v->processor), LIST(v), waitq_comp); + ASSERT(__task_on_queue(v)); } /* adds a domain to the queue of processes which have started their current @@ -322,60 +319,62 @@ DOMAIN_COMPARER(waitq, list, PERIOD_BEGI on this list is running on the processor, if the list is empty the idle task will run. As we are implementing EDF, this list is sorted by deadlines. */ -DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs) - static inline void __add_to_runqueue_sort(struct vcpu *d) { +DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs); +static inline void __add_to_runqueue_sort(struct vcpu *v) +{ PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n", - d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->deadl_abs); - list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp); + v->domain->domain_id, v->vcpu_id, EDOM_INFO(v)->deadl_abs); + list_insert_sort(RUNQ(v->processor), LIST(v), runq_comp); } /* Allocates memory for per domain private scheduling data*/ -static int sedf_alloc_task(struct vcpu *d) +static int sedf_alloc_task(struct vcpu *v) { PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n", - d->domain->domain_id, d->vcpu_id); - - if ( d->domain->sched_priv == NULL ) - { - d->domain->sched_priv = xmalloc(struct sedf_dom_info); - if ( d->domain->sched_priv == NULL ) + v->domain->domain_id, v->vcpu_id); + + if ( v->domain->sched_priv == NULL ) + { + v->domain->sched_priv = xmalloc(struct sedf_dom_info); + if ( v->domain->sched_priv == NULL ) return -1; - memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info)); - } - - if ( (d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL ) + memset(v->domain->sched_priv, 0, sizeof(struct sedf_dom_info)); + } + + if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL ) return -1; - memset(d->sched_priv, 0, sizeof(struct sedf_vcpu_info)); + memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info)); return 0; } /* Setup the sedf_dom_info */ -static void sedf_add_task(struct vcpu *d) -{ - struct sedf_vcpu_info *inf = EDOM_INFO(d); - inf->vcpu = d; - - PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",d->domain->domain_id, - d->vcpu_id); +static void sedf_add_task(struct vcpu *v) +{ + struct sedf_vcpu_info *inf = EDOM_INFO(v); + + inf->vcpu = v; + + PRINT(2,"sedf_add_task was called, domain-id %i.%i\n", + v->domain->domain_id, v->vcpu_id); /* Allocate per-CPU context if this is the first domain to be added. */ - if ( unlikely(schedule_data[d->processor].sched_priv == NULL) ) - { - schedule_data[d->processor].sched_priv = + if ( unlikely(schedule_data[v->processor].sched_priv == NULL) ) + { + schedule_data[v->processor].sched_priv = xmalloc(struct sedf_cpu_info); - BUG_ON(schedule_data[d->processor].sched_priv == NULL); - memset(CPU_INFO(d->processor), 0, sizeof(*CPU_INFO(d->processor))); - INIT_LIST_HEAD(WAITQ(d->processor)); - INIT_LIST_HEAD(RUNQ(d->processor)); - INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_PEN_Q)); - INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q)); + BUG_ON(schedule_data[v->processor].sched_priv == NULL); + memset(CPU_INFO(v->processor), 0, sizeof(*CPU_INFO(v->processor))); + INIT_LIST_HEAD(WAITQ(v->processor)); + INIT_LIST_HEAD(RUNQ(v->processor)); + INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_PEN_Q)); + INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q)); } - if ( d->domain->domain_id == 0 ) + if ( v->domain->domain_id == 0 ) { /*set dom0 to something useful to boot the machine*/ inf->period = MILLISECS(20); @@ -400,14 +399,14 @@ static void sedf_add_task(struct vcpu *d INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q])); INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q])); - if ( !is_idle_vcpu(d) ) - { - extraq_check(d); + if ( !is_idle_vcpu(v) ) + { + extraq_check(v); } else { - EDOM_INFO(d)->deadl_abs = 0; - EDOM_INFO(d)->status &= ~SEDF_ASLEEP; + EDOM_INFO(v)->deadl_abs = 0; + EDOM_INFO(v)->status &= ~SEDF_ASLEEP; } } @@ -418,17 +417,11 @@ static void sedf_free_task(struct domain PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id); - ASSERT(d->sched_priv != NULL); xfree(d->sched_priv); for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - { if ( d->vcpu[i] ) - { - ASSERT(d->vcpu[i]->sched_priv != NULL); xfree(d->vcpu[i]->sched_priv); - } - } } /* @@ -438,64 +431,60 @@ static void desched_edf_dom(s_time_t now static void desched_edf_dom(s_time_t now, struct vcpu* d) { struct sedf_vcpu_info* inf = EDOM_INFO(d); - /*current domain is running in real time mode*/ - + + /* Current domain is running in real time mode. */ ASSERT(__task_on_queue(d)); - /*update the domains cputime*/ + + /* Update the domain's cputime. */ inf->cputime += now - inf->sched_start_abs; - /*scheduling decisions, which don't remove the running domain - from the runq*/ + /* + * Scheduling decisions which don't remove the running domain from the + * runq. + */ if ( (inf->cputime < inf->slice) && sedf_runnable(d) ) return; __del_from_queue(d); - /*manage bookkeeping (i.e. calculate next deadline, - memorize overun-time of slice) of finished domains*/ + /* + * Manage bookkeeping (i.e. calculate next deadline, memorise + * overrun-time of slice) of finished domains. + */ if ( inf->cputime >= inf->slice ) { inf->cputime -= inf->slice; if ( inf->period < inf->period_orig ) { - /*this domain runs in latency scaling or burst mode*/ -#if (UNBLOCK == UNBLOCK_BURST) - /*if we are runnig in burst scaling wait for two periods - before scaling periods up again*/ - if ( (now - inf->unblock_abs) >= (2 * inf->period) ) -#endif + /* This domain runs in latency scaling or burst mode. */ + inf->period *= 2; + inf->slice *= 2; + if ( (inf->period > inf->period_orig) || + (inf->slice > inf->slice_orig) ) { - inf->period *= 2; inf->slice *= 2; - if ( (inf->period > inf->period_orig) || - (inf->slice > inf->slice_orig) ) - { - /*reset slice & period*/ - inf->period = inf->period_orig; - inf->slice = inf->slice_orig; - } + /* Reset slice and period. */ + inf->period = inf->period_orig; + inf->slice = inf->slice_orig; } } - /*set next deadline*/ + + /* Set next deadline. */ inf->deadl_abs += inf->period; } - /*add a runnable domain to the waitqueue*/ + /* Add a runnable domain to the waitqueue. */ if ( sedf_runnable(d) ) { __add_to_waitqueue_sort(d); } else { - /*we have a blocked realtime task -> remove it from exqs too*/ -#if (EXTRA > EXTRA_OFF) -#if (EXTRA == EXTRA_BLOCK_WEIGHT) + /* We have a blocked realtime task -> remove it from exqs too. */ if ( extraq_on(d, EXTRA_PEN_Q) ) extraq_del(d, EXTRA_PEN_Q); -#endif if ( extraq_on(d, EXTRA_UTIL_Q) ) extraq_del(d, EXTRA_UTIL_Q); -#endif } ASSERT(EQ(sedf_runnable(d), __task_on_queue(d))); @@ -513,58 +502,57 @@ static void update_queues( PRINT(3,"Updating waitq..\n"); - /*check for the first elements of the waitqueue, whether their - next period has already started*/ - list_for_each_safe(cur, tmp, waitq) { + /* + * Check for the first elements of the waitqueue, whether their + * next period has already started. + */ + list_for_each_safe ( cur, tmp, waitq ) + { curinf = list_entry(cur, struct sedf_vcpu_info, list); PRINT(4,"\tLooking @ dom %i.%i\n", curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id); - if ( PERIOD_BEGIN(curinf) <= now ) - { - __del_from_queue(curinf->vcpu); - __add_to_runqueue_sort(curinf->vcpu); - } - else + if ( PERIOD_BEGIN(curinf) > now ) break; + __del_from_queue(curinf->vcpu); + __add_to_runqueue_sort(curinf->vcpu); } PRINT(3,"Updating runq..\n"); - /*process the runq, find domains that are on - the runqueue which shouldn't be there*/ - list_for_each_safe(cur, tmp, runq) { + /* Process the runq, find domains that are on the runq that shouldn't. */ + list_for_each_safe ( cur, tmp, runq ) + { curinf = list_entry(cur,struct sedf_vcpu_info,list); PRINT(4,"\tLooking @ dom %i.%i\n", curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id); if ( unlikely(curinf->slice == 0) ) { - /*ignore domains with empty slice*/ + /* Ignore domains with empty slice. */ PRINT(4,"\tUpdating zero-slice domain %i.%i\n", curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id); __del_from_queue(curinf->vcpu); - /*move them to their next period*/ + /* Move them to their next period. */ curinf->deadl_abs += curinf->period; - /*ensure that the start of the next period is in the future*/ + + /* Ensure that the start of the next period is in the future. */ if ( unlikely(PERIOD_BEGIN(curinf) < now) ) - { curinf->deadl_abs += (DIV_UP(now - PERIOD_BEGIN(curinf), - curinf->period)) * curinf->period; - } - /*and put them back into the queue*/ + curinf->period)) * curinf->period; + + /* Put them back into the queue. */ __add_to_waitqueue_sort(curinf->vcpu); - continue; - } - - if ( unlikely((curinf->deadl_abs < now) || - (curinf->cputime > curinf->slice)) ) - { - /*we missed the deadline or the slice was - already finished... might hapen because - of dom_adj.*/ + } + else if ( unlikely((curinf->deadl_abs < now) || + (curinf->cputime > curinf->slice)) ) + { + /* + * We missed the deadline or the slice was already finished. + * Might hapen because of dom_adj. + */ PRINT(4,"\tDomain %i.%i exceeded it's deadline/" "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64 " cputime: %"PRIu64"\n", @@ -573,20 +561,23 @@ static void update_queues( curinf->deadl_abs, curinf->slice, now, curinf->cputime); __del_from_queue(curinf->vcpu); - /*common case: we miss one period!*/ + + /* Common case: we miss one period. */ curinf->deadl_abs += curinf->period; - /*if we are still behind: modulo arithmetic, - force deadline to be in future and - aligned to period borders!*/ - if (unlikely(curinf->deadl_abs < now)) + /* + * If we are still behind: modulo arithmetic, force deadline + * to be in future and aligned to period borders. + */ + if ( unlikely(curinf->deadl_abs < now) ) curinf->deadl_abs += DIV_UP(now - curinf->deadl_abs, curinf->period) * curinf->period; ASSERT(curinf->deadl_abs >= now); - /*give a fresh slice*/ + + /* Give a fresh slice. */ curinf->cputime = 0; - if (PERIOD_BEGIN(curinf) > now) + if ( PERIOD_BEGIN(curinf) > now ) __add_to_waitqueue_sort(curinf->vcpu); else __add_to_runqueue_sort(curinf->vcpu); @@ -594,43 +585,36 @@ static void update_queues( else break; } + PRINT(3,"done updating the queues\n"); } -#if (EXTRA > EXTRA_OFF) /* removes a domain from the head of the according extraQ and requeues it at a specified position: round-robin extratime: end of extraQ weighted ext.: insert in sorted list by score if the domain is blocked / has regained its short-block-loss time it is not put on any queue */ -static void desched_extra_dom(s_time_t now, struct vcpu* d) +static void desched_extra_dom(s_time_t now, struct vcpu *d) { struct sedf_vcpu_info *inf = EDOM_INFO(d); int i = extra_get_cur_q(inf); - -#if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT) - unsigned long oldscore; -#endif + unsigned long oldscore; + ASSERT(extraq_on(d, i)); - /*unset all running flags*/ + + /* Unset all running flags. */ inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL); - /*fresh slice for the next run*/ + /* Fresh slice for the next run. */ inf->cputime = 0; - /*accumulate total extratime*/ + /* Accumulate total extratime. */ inf->extra_time_tot += now - inf->sched_start_abs; - /*remove extradomain from head of the queue*/ + /* Remove extradomain from head of the queue. */ extraq_del(d, i); -#if (EXTRA == EXTRA_ROUNDR) - if ( sedf_runnable(d) && (inf->status & EXTRA_AWARE) ) - /*add to the tail if it is runnable => round-robin*/ - extraq_add_tail(d, EXTRA_UTIL_Q); -#elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT) - /*update the score*/ + /* Update the score. */ oldscore = inf->score[i]; -#if (EXTRA == EXTRA_BLOCK_WEIGHT) if ( i == EXTRA_PEN_Q ) { /*domain was running in L0 extraq*/ @@ -640,7 +624,8 @@ static void desched_extra_dom(s_time_t n PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n", inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id, inf->short_block_lost_tot); - if (inf->short_block_lost_tot <= 0) { + if ( inf->short_block_lost_tot <= 0 ) + { PRINT(4,"Domain %i.%i compensated short block loss!\n", inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id); /*we have (over-)compensated our block penalty*/ @@ -649,6 +634,7 @@ static void desched_extra_dom(s_time_t n inf->status &= ~EXTRA_WANT_PEN_Q; goto check_extra_queues; } + /*we have to go again for another try in the block-extraq, the score is not used incremantally here, as this is already done by recalculating the block_lost*/ @@ -657,7 +643,6 @@ static void desched_extra_dom(s_time_t n oldscore = 0; } else -#endif { /*domain was running in L1 extraq => score is inverse of utilization and is used somewhat incremental!*/ @@ -684,7 +669,6 @@ static void desched_extra_dom(s_time_t n { /*remove this blocked domain from the waitq!*/ __del_from_queue(d); -#if (EXTRA == EXTRA_BLOCK_WEIGHT) /*make sure that we remove a blocked domain from the other extraq too*/ if ( i == EXTRA_PEN_Q ) @@ -697,14 +681,12 @@ static void desched_extra_dom(s_time_t n if ( extraq_on(d, EXTRA_PEN_Q) ) extraq_del(d, EXTRA_PEN_Q); } -#endif - } -#endif + } + ASSERT(EQ(sedf_runnable(d), __task_on_queue(d))); ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), sedf_runnable(d))); } -#endif static struct task_slice sedf_do_extra_schedule( @@ -718,7 +700,6 @@ static struct task_slice sedf_do_extra_s if ( end_xt - now < EXTRA_QUANTUM ) goto return_idle; -#if (EXTRA == EXTRA_BLOCK_WEIGHT) if ( !list_empty(extraq[EXTRA_PEN_Q]) ) { /*we still have elements on the level 0 extraq @@ -733,7 +714,6 @@ static struct task_slice sedf_do_extra_s #endif } else -#endif { if ( !list_empty(extraq[EXTRA_UTIL_Q]) ) { @@ -772,11 +752,9 @@ static struct task_slice sedf_do_schedul int cpu = smp_processor_id(); struct list_head *runq = RUNQ(cpu); struct list_head *waitq = WAITQ(cpu); -#if (EXTRA > EXTRA_OFF) struct sedf_vcpu_info *inf = EDOM_INFO(current); struct list_head *extraq[] = { EXTRAQ(cpu, EXTRA_PEN_Q), EXTRAQ(cpu, EXTRA_UTIL_Q)}; -#endif struct sedf_vcpu_info *runinf, *waitinf; struct task_slice ret; @@ -793,14 +771,12 @@ static struct task_slice sedf_do_schedul if ( inf->status & SEDF_ASLEEP ) inf->block_abs = now; -#if (EXTRA > EXTRA_OFF) if ( unlikely(extra_runs(inf)) ) { /*special treatment of domains running in extra time*/ desched_extra_dom(now, current); } else -#endif { desched_edf_dom(now, current); } @@ -837,13 +813,8 @@ static struct task_slice sedf_do_schedul waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list); /*we could not find any suitable domain => look for domains that are aware of extratime*/ -#if (EXTRA > EXTRA_OFF) ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf), extraq, cpu); -#else - ret.task = IDLETASK(cpu); - ret.time = PERIOD_BEGIN(waitinf) - now; -#endif CHECK(ret.time > 0); } else @@ -891,14 +862,10 @@ static void sedf_sleep(struct vcpu *d) { if ( __task_on_queue(d) ) __del_from_queue(d); -#if (EXTRA > EXTRA_OFF) if ( extraq_on(d, EXTRA_UTIL_Q) ) extraq_del(d, EXTRA_UTIL_Q); -#endif -#if (EXTRA == EXTRA_BLOCK_WEIGHT) if ( extraq_on(d, EXTRA_PEN_Q) ) extraq_del(d, EXTRA_PEN_Q); -#endif } } @@ -939,7 +906,7 @@ static void sedf_sleep(struct vcpu *d) * -addition: experiments have shown that this may have a HUGE impact on * performance of other domains, becaus it can lead to excessive context * switches - + * * Part2: Long Unblocking * Part 2a * -it is obvious that such accounting of block time, applied when @@ -974,32 +941,6 @@ static void sedf_sleep(struct vcpu *d) * -either behaviour can lead to missed deadlines in other domains as * opposed to approaches 1,2a,2b */ -#if (UNBLOCK <= UNBLOCK_SHORT_RESUME) -static void unblock_short_vcons(struct sedf_vcpu_info* inf, s_time_t now) -{ - inf->deadl_abs += inf->period; - inf->cputime = 0; -} -#endif - -#if (UNBLOCK == UNBLOCK_SHORT_RESUME) -static void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now) -{ - /*treat blocked time as consumed by the domain*/ - inf->cputime += now - inf->block_abs; - if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice ) - { - /*we don't have a reasonable amount of time in - our slice left :( => start in next period!*/ - unblock_short_vcons(inf, now); - } -#ifdef SEDF_STATS - else - inf->short_cont++; -#endif -} -#endif - static void unblock_short_extra_support( struct sedf_vcpu_info* inf, s_time_t now) { @@ -1051,33 +992,6 @@ static void unblock_short_extra_support( } -#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF) -static void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now) -{ - /* align to next future period */ - inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1) - * inf->period; - inf->cputime = 0; -} -#endif - - -#if 0 -static void unblock_long_cons_a (struct sedf_vcpu_info* inf, s_time_t now) -{ - /*treat the time the domain was blocked in the - CURRENT period as consumed by the domain*/ - inf->cputime = (now - inf->deadl_abs) % inf->period; - if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice ) - { - /*we don't have a reasonable amount of time in our slice - left :( => start in next period!*/ - unblock_long_vcons(inf, now); - } -} -#endif - - static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now) { /*Conservative 2b*/ @@ -1085,110 +999,6 @@ static void unblock_long_cons_b(struct s inf->deadl_abs = now + inf->period; inf->cputime = 0; } - - -#if (UNBLOCK == UNBLOCK_ATROPOS) -static void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now) -{ - if ( likely(inf->latency) ) - { - /*scale the slice and period accordingly to the latency hint*/ - /*reduce period temporarily to the latency hint*/ - inf->period = inf->latency; - /*this results in max. 4s slice/period length*/ - ASSERT((inf->period < ULONG_MAX) - && (inf->slice_orig < ULONG_MAX)); - /*scale slice accordingly, so that utilisation stays the same*/ - inf->slice = (inf->period * inf->slice_orig) - / inf->period_orig; - inf->deadl_abs = now + inf->period; - inf->cputime = 0; - } - else - { - /*we don't have a latency hint.. use some other technique*/ - unblock_long_cons_b(inf, now); - } -} -#endif - - -#if (UNBLOCK == UNBLOCK_BURST) -/*a new idea of dealing with short blocks: burst period scaling*/ -static void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now) -{ - /*treat blocked time as consumed by the domain*/ - inf->cputime += now - inf->block_abs; - - if ( (inf->cputime + EXTRA_QUANTUM) <= inf->slice ) - { - /*if we can still use some time in the current slice - then use it!*/ -#ifdef SEDF_STATS - /*we let the domain run in the current period*/ - inf->short_cont++; -#endif - } - else - { - /*we don't have a reasonable amount of time in - our slice left => switch to burst mode*/ - if ( likely(inf->unblock_abs) ) - { - /*set the period-length to the current blocking - interval, possible enhancements: average over last - blocking intervals, user-specified minimum,...*/ - inf->period = now - inf->unblock_abs; - /*check for overflow on multiplication*/ - ASSERT((inf->period < ULONG_MAX) - && (inf->slice_orig < ULONG_MAX)); - /*scale slice accordingly, so that utilisation - stays the same*/ - inf->slice = (inf->period * inf->slice_orig) - / inf->period_orig; - /*set new (shorter) deadline*/ - inf->deadl_abs += inf->period; - } - else - { - /*in case we haven't unblocked before - start in next period!*/ - inf->cputime=0; - inf->deadl_abs += inf->period; - } - } - - inf->unblock_abs = now; -} - - -static void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now) -{ - if ( unlikely(inf->latency && (inf->period > inf->latency)) ) - { - /*scale the slice and period accordingly to the latency hint*/ - inf->period = inf->latency; - /*check for overflows on multiplication*/ - ASSERT((inf->period < ULONG_MAX) - && (inf->slice_orig < ULONG_MAX)); - /*scale slice accordingly, so that utilisation stays the same*/ - inf->slice = (inf->period * inf->slice_orig) - / inf->period_orig; - inf->deadl_abs = now + inf->period; - inf->cputime = 0; - } - else - { - /*we don't have a latency hint.. or we are currently in - "burst mode": use some other technique - NB: this should be in fact the normal way of operation, - when we are in sync with the device!*/ - unblock_long_cons_b(inf, now); - } - - inf->unblock_abs = now; -} -#endif /* UNBLOCK == UNBLOCK_BURST */ #define DOMAIN_EDF 1 @@ -1225,32 +1035,31 @@ static inline int should_switch(struct v cur_inf = EDOM_INFO(cur); other_inf = EDOM_INFO(other); - /*check whether we need to make an earlier sched-decision*/ - if (PERIOD_BEGIN(other_inf) < - CPU_INFO(other->processor)->current_slice_expires) + /* Check whether we need to make an earlier scheduling decision. */ + if ( PERIOD_BEGIN(other_inf) < + CPU_INFO(other->processor)->current_slice_expires ) return 1; - /*no timing-based switches need to be taken into account here*/ - switch (get_run_type(cur)) { + + /* No timing-based switches need to be taken into account here. */ + switch ( get_run_type(cur) ) + { case DOMAIN_EDF: - /* do not interrupt a running EDF domain */ + /* Do not interrupt a running EDF domain. */ return 0; case DOMAIN_EXTRA_PEN: - /*check whether we also want - the L0 ex-q with lower score*/ - if ((other_inf->status & EXTRA_WANT_PEN_Q) - && (other_inf->score[EXTRA_PEN_Q] < - cur_inf->score[EXTRA_PEN_Q])) - return 1; - else return 0; + /* Check whether we also want the L0 ex-q with lower score. */ + return ((other_inf->status & EXTRA_WANT_PEN_Q) && + (other_inf->score[EXTRA_PEN_Q] < + cur_inf->score[EXTRA_PEN_Q])); case DOMAIN_EXTRA_UTIL: - /*check whether we want the L0 extraq, don't - switch if both domains want L1 extraq */ - if (other_inf->status & EXTRA_WANT_PEN_Q) - return 1; - else return 0; + /* Check whether we want the L0 extraq. Don't + * switch if both domains want L1 extraq. + */ + return !!(other_inf->status & EXTRA_WANT_PEN_Q); case DOMAIN_IDLE: return 1; } + return 1; } @@ -1295,7 +1104,6 @@ void sedf_wake(struct vcpu *d) { PRINT(4,"extratime unblock\n"); /* unblocking in extra-time! */ -#if (EXTRA == EXTRA_BLOCK_WEIGHT) if ( inf->status & EXTRA_WANT_PEN_Q ) { /*we have a domain that wants compensation @@ -1304,7 +1112,6 @@ void sedf_wake(struct vcpu *d) chance!*/ extraq_add_sort_update(d, EXTRA_PEN_Q, 0); } -#endif extraq_check_add_unblocked(d, 0); } else @@ -1316,15 +1123,7 @@ void sedf_wake(struct vcpu *d) #ifdef SEDF_STATS inf->short_block_tot++; #endif -#if (UNBLOCK <= UNBLOCK_ATROPOS) - unblock_short_vcons(inf, now); -#elif (UNBLOCK == UNBLOCK_SHORT_RESUME) - unblock_short_cons(inf, now); -#elif (UNBLOCK == UNBLOCK_BURST) - unblock_short_burst(inf, now); -#elif (UNBLOCK == UNBLOCK_EXTRA_SUPPORT) unblock_short_extra_support(inf, now); -#endif extraq_check_add_unblocked(d, 1); } @@ -1335,18 +1134,7 @@ void sedf_wake(struct vcpu *d) #ifdef SEDF_STATS inf->long_block_tot++; #endif -#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF) - unblock_long_vcons(inf, now); -#elif (UNBLOCK == UNBLOCK_EDF \ - || UNBLOCK == UNBLOCK_EXTRA_SUPPORT) unblock_long_cons_b(inf, now); -#elif (UNBLOCK == UNBLOCK_ATROPOS) - unblock_long_cons_c(inf, now); -#elif (UNBLOCK == UNBLOCK_SHORT_RESUME) - unblock_long_cons_b(inf, now); -#elif (UNBLOCK == UNBLOCK_BURST) - unblock_long_burst(inf, now); -#endif extraq_check_add_unblocked(d, 1); } @@ -1528,7 +1316,7 @@ static int sedf_adjust_weights(struct sc sumt[cpu] = 0; } - /* sum up all weights */ + /* Sum across all weights. */ for_each_domain( d ) { for_each_vcpu( d, p ) @@ -1553,7 +1341,7 @@ static int sedf_adjust_weights(struct sc } } - /* adjust all slices (and periods) to the new weight */ + /* Adjust all slices (and periods) to the new weight. */ for_each_domain( d ) { for_each_vcpu ( d, p ) @@ -1580,35 +1368,42 @@ static int sedf_adjdom(struct domain *p, { struct vcpu *v; - PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\ + PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" " "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n", p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice, cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no"); if ( cmd->direction == SCHED_INFO_PUT ) { - /*check for sane parameters*/ - if (!cmd->u.sedf.period && !cmd->u.sedf.weight) + /* Check for sane parameters. */ + if ( !cmd->u.sedf.period && !cmd->u.sedf.weight ) return -EINVAL; - if (cmd->u.sedf.weight) { - if ((cmd->u.sedf.extratime & EXTRA_AWARE) && - (! cmd->u.sedf.period)) { - /*weight driven domains with xtime ONLY!*/ - for_each_vcpu(p, v) { + if ( cmd->u.sedf.weight ) + { + if ( (cmd->u.sedf.extratime & EXTRA_AWARE) && + (!cmd->u.sedf.period) ) + { + /* Weight-driven domains with extratime only. */ + for_each_vcpu ( p, v ) + { EDOM_INFO(v)->extraweight = cmd->u.sedf.weight; EDOM_INFO(v)->weight = 0; EDOM_INFO(v)->slice = 0; EDOM_INFO(v)->period = WEIGHT_PERIOD; } - } else { - /*weight driven domains with real-time execution*/ - for_each_vcpu(p, v) + } + else + { + /* Weight-driven domains with real-time execution. */ + for_each_vcpu ( p, v ) EDOM_INFO(v)->weight = cmd->u.sedf.weight; } } - else { - /*time driven domains*/ - for_each_vcpu(p, v) { + else + { + /* Time-driven domains. */ + for_each_vcpu ( p, v ) + { /* * Sanity checking: note that disabling extra weight requires * that we set a non-zero slice. @@ -1626,10 +1421,12 @@ static int sedf_adjdom(struct domain *p, EDOM_INFO(v)->slice = cmd->u.sedf.slice; } } - if (sedf_adjust_weights(cmd)) + + if ( sedf_adjust_weights(cmd) ) return -EINVAL; - - for_each_vcpu(p, v) { + + for_each_vcpu ( p, v ) + { EDOM_INFO(v)->status = (EDOM_INFO(v)->status & ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE); @@ -1641,11 +1438,11 @@ static int sedf_adjdom(struct domain *p, { cmd->u.sedf.period = EDOM_INFO(p->vcpu[0])->period; cmd->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice; - cmd->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status - & EXTRA_AWARE; + cmd->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE; cmd->u.sedf.latency = EDOM_INFO(p->vcpu[0])->latency; cmd->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight; } + PRINT(2,"sedf_adjdom_finished\n"); return 0; } diff -r 5719550652a1 -r 5cc367720223 xen/common/schedule.c --- a/xen/common/schedule.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/schedule.c Tue Apr 11 18:54:18 2006 -0600 @@ -332,7 +332,7 @@ static long do_yield(void) return 0; } -long do_sched_op(int cmd, unsigned long arg) +long do_sched_op_compat(int cmd, unsigned long arg) { long ret = 0; @@ -365,7 +365,7 @@ long do_sched_op(int cmd, unsigned long return ret; } -long do_sched_op_new(int cmd, GUEST_HANDLE(void) arg) +long do_sched_op(int cmd, GUEST_HANDLE(void) arg) { long ret = 0; @@ -409,6 +409,30 @@ long do_sched_op_new(int cmd, GUEST_HAND break; ret = do_poll(&sched_poll); + + break; + } + + case SCHEDOP_remote_shutdown: + { + struct domain *d; + struct sched_remote_shutdown sched_remote_shutdown; + + if ( !IS_PRIV(current->domain) ) + return -EPERM; + + ret = -EFAULT; + if ( copy_from_guest(&sched_remote_shutdown, arg, 1) ) + break; + + ret = -ESRCH; + d = find_domain_by_id(sched_remote_shutdown.domain_id); + if ( d == NULL ) + break; + + domain_shutdown(d, (u8)sched_remote_shutdown.reason); + put_domain(d); + ret = 0; break; } @@ -572,7 +596,7 @@ static void __enter_scheduler(void) /* Ensure that the domain has an up-to-date time base. */ if ( !is_idle_vcpu(next) ) { - update_dom_time(next); + update_vcpu_system_time(next); if ( next->sleep_tick != schedule_data[cpu].tick ) send_timer_event(next); } @@ -609,7 +633,7 @@ static void t_timer_fn(void *unused) if ( !is_idle_vcpu(v) ) { - update_dom_time(v); + update_vcpu_system_time(v); send_timer_event(v); } @@ -623,7 +647,7 @@ static void dom_timer_fn(void *data) { struct vcpu *v = data; - update_dom_time(v); + update_vcpu_system_time(v); send_timer_event(v); } diff -r 5719550652a1 -r 5cc367720223 xen/common/softirq.c --- a/xen/common/softirq.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/softirq.c Tue Apr 11 18:54:18 2006 -0600 @@ -21,9 +21,10 @@ irq_cpustat_t irq_stat[NR_CPUS]; static softirq_handler softirq_handlers[NR_SOFTIRQS]; -asmlinkage void do_softirq() +asmlinkage void do_softirq(void) { - unsigned int i, pending, cpu = smp_processor_id(); + unsigned int i, cpu = smp_processor_id(); + unsigned long pending; pending = softirq_pending(cpu); ASSERT(pending != 0); diff -r 5719550652a1 -r 5cc367720223 xen/common/trace.c --- a/xen/common/trace.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/common/trace.c Tue Apr 11 18:54:18 2006 -0600 @@ -27,6 +27,8 @@ #include <xen/smp.h> #include <xen/trace.h> #include <xen/errno.h> +#include <xen/event.h> +#include <xen/softirq.h> #include <xen/init.h> #include <asm/atomic.h> #include <public/dom0_ops.h> @@ -40,6 +42,11 @@ static struct t_rec *t_recs[NR_CPUS]; static struct t_rec *t_recs[NR_CPUS]; static int nr_recs; +/* High water mark for trace buffers; */ +/* Send virtual interrupt when buffer level reaches this point */ +static int t_buf_highwater; + + /* a flag recording whether initialization has been done */ /* or more properly, if the tbuf subsystem is enabled right now */ int tb_init_done; @@ -49,6 +56,12 @@ static unsigned long tb_cpu_mask = (~0UL /* which tracing events are enabled */ static u32 tb_event_mask = TRC_ALL; + +static void trace_notify_guest(void) +{ + send_guest_global_virq(dom0, VIRQ_TBUF); +} + /** * alloc_trace_bufs - performs initialization of the per-cpu trace buffers. @@ -92,6 +105,9 @@ static int alloc_trace_bufs(void) buf->cons = buf->prod = 0; t_recs[i] = (struct t_rec *)(buf + 1); } + + t_buf_highwater = nr_recs >> 1; /* 50% high water */ + open_softirq(TRACE_SOFTIRQ, trace_notify_guest); return 0; } @@ -272,6 +288,13 @@ void trace(u32 event, unsigned long d1, buf->prod++; local_irq_restore(flags); + + /* + * Notify trace buffer consumer that we've reached the high water mark. + * + */ + if ( (buf->prod - buf->cons) == t_buf_highwater ) + raise_softirq(TRACE_SOFTIRQ); } /* diff -r 5719550652a1 -r 5cc367720223 xen/drivers/Makefile --- a/xen/drivers/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/drivers/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,6 +1,2 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - subdir-y += char subdir-$(HAS_ACPI) += acpi - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/drivers/acpi/Makefile --- a/xen/drivers/acpi/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/drivers/acpi/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,5 +1,1 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += tables.o - -include $(BASEDIR)/Post.mk diff -r 5719550652a1 -r 5cc367720223 xen/drivers/char/Makefile --- a/xen/drivers/char/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/drivers/char/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,10 +1,6 @@ include $(BASEDIR)/Rules.mk -include $(BASEDIR)/Rules.mk - obj-y += console.o obj-y += ns16550.o obj-y += serial.o -include $(BASEDIR)/Post.mk - # Object file contains changeset and compiler information. console.o: $(BASEDIR)/include/xen/compile.h diff -r 5719550652a1 -r 5cc367720223 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/drivers/char/console.c Tue Apr 11 18:54:18 2006 -0600 @@ -200,10 +200,11 @@ static void putchar_console(int c) } else { + if ( xpos >= COLUMNS ) + put_newline(); video[(xpos + ypos * COLUMNS) * 2] = c & 0xFF; video[(xpos + ypos * COLUMNS) * 2 + 1] = ATTRIBUTE; - if ( ++xpos >= COLUMNS ) - put_newline(); + ++xpos; } } @@ -293,7 +294,7 @@ static void __serial_rx(char c, struct c if ( (serial_rx_prod-serial_rx_cons) != SERIAL_RX_SIZE ) serial_rx_ring[SERIAL_RX_MASK(serial_rx_prod++)] = c; /* Always notify the guest: prevents receive path from getting stuck. */ - send_guest_virq(dom0->vcpu[0], VIRQ_CONSOLE); + send_guest_global_virq(dom0, VIRQ_CONSOLE); } static void serial_rx(char c, struct cpu_user_regs *regs) @@ -519,6 +520,7 @@ void console_force_unlock(void) { console_lock = SPIN_LOCK_UNLOCKED; serial_force_unlock(sercon_handle); + console_start_sync(); } void console_force_lock(void) @@ -684,6 +686,7 @@ void panic(const char *fmt, ...) va_end(args); /* Spit out multiline message in one go. */ + console_start_sync(); spin_lock_irqsave(&lock, flags); printk("\n****************************************\n"); printk("Panic on CPU %d:\n", smp_processor_id()); diff -r 5719550652a1 -r 5cc367720223 xen/drivers/char/ns16550.c --- a/xen/drivers/char/ns16550.c Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/drivers/char/ns16550.c Tue Apr 11 18:54:18 2006 -0600 @@ -99,6 +99,9 @@ static struct ns16550 { #define PARITY_MARK (5<<3) #define PARITY_SPACE (7<<3) +/* Frequency of external clock source. This definition assumes PC platform. */ +#define UART_CLOCK_HZ 1843200 + static char ns_read_reg(struct ns16550 *uart, int reg) { if ( uart->remapped_io_base == NULL ) @@ -171,6 +174,7 @@ static void ns16550_init_preirq(struct s { struct ns16550 *uart = port->uart; unsigned char lcr; + unsigned int divisor; /* I/O ports are distinguished by their size (16 bits). */ if ( uart->io_base >= 0x10000 ) @@ -182,13 +186,22 @@ static void ns16550_init_preirq(struct s ns_write_reg(uart, IER, 0); /* Line control and baud-rate generator. */ + ns_write_reg(uart, LCR, lcr | LCR_DLAB); if ( uart->baud != BAUD_AUTO ) { - ns_write_reg(uart, LCR, lcr | LCR_DLAB); - ns_write_reg(uart, DLL, 115200/uart->baud); /* baud lo */ - ns_write_reg(uart, DLM, 0); /* baud hi */ - } - ns_write_reg(uart, LCR, lcr); /* parity, data, stop */ + /* Baud rate specified: program it into the divisor latch. */ + divisor = UART_CLOCK_HZ / (uart->baud * 16); + ns_write_reg(uart, DLL, (char)divisor); + ns_write_reg(uart, DLM, (char)(divisor >> 8)); + } + else + { + /* Baud rate already set: read it out from the divisor latch. */ + divisor = ns_read_reg(uart, DLL); + divisor |= ns_read_reg(uart, DLM) << 8; + uart->baud = UART_CLOCK_HZ / (divisor * 16); + } + ns_write_reg(uart, LCR, lcr); /* No flow ctrl: DTR and RTS are both wedged high to keep remote happy. */ ns_write_reg(uart, MCR, MCR_DTR | MCR_RTS); diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-ia64/event.h --- a/xen/include/asm-ia64/event.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-ia64/event.h Tue Apr 11 18:54:18 2006 -0600 @@ -32,4 +32,9 @@ static inline void evtchn_notify(struct vcpu_pend_interrupt(v, v->vcpu_info->arch.evtchn_vector); } +/* Note: Bitwise operations result in fast code with no branches. */ +#define event_pending(v) \ + (!!(v)->vcpu_info->evtchn_upcall_pending & \ + !(v)->vcpu_info->evtchn_upcall_mask) + #endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/apicdef.h --- a/xen/include/asm-x86/apicdef.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/apicdef.h Tue Apr 11 18:54:18 2006 -0600 @@ -11,6 +11,9 @@ #define APIC_DEFAULT_PHYS_BASE 0xfee00000 #define APIC_ID 0x20 +#define APIC_ID_MASK (0xFFu<<24) +#define GET_APIC_ID(x) (((x)>>24)&0xFFu) +#define SET_APIC_ID(x) (((x)<<24)) #define APIC_LVR 0x30 #define APIC_LVR_MASK 0xFF00FF #define GET_APIC_VERSION(x) ((x)&0xFF) @@ -59,6 +62,7 @@ #define APIC_INT_ASSERT 0x04000 #define APIC_ICR_BUSY 0x01000 #define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 #define APIC_DM_FIXED 0x00000 #define APIC_DM_LOWEST 0x00100 #define APIC_DM_SMI 0x00200 diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/config.h --- a/xen/include/asm-x86/config.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/config.h Tue Apr 11 18:54:18 2006 -0600 @@ -35,7 +35,15 @@ #define OPT_CONSOLE_STR "com1,vga" +#ifdef MAX_PHYS_CPUS +#define NR_CPUS MAX_PHYS_CPUS +#else #define NR_CPUS 32 +#endif + +#if defined(__i386__) && (NR_CPUS > 32) +#error "Maximum of 32 physical processors supported by Xen on x86_32" +#endif #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL # define supervisor_mode_kernel (1) @@ -57,7 +65,12 @@ #define barrier() __asm__ __volatile__("": : :"memory") +/* A power-of-two value greater than or equal to number of hypercalls. */ #define NR_hypercalls 32 + +#if NR_hypercalls & (NR_hypercalls - 1) +#error "NR_hypercalls must be a power-of-two value" +#endif #ifndef NDEBUG #define MEMORY_GUARD @@ -204,13 +217,13 @@ extern unsigned long _end; /* standard E * ------ ------ * I/O remapping area ( 4MB) * Direct-map (1:1) area [Xen code/data/heap] (12MB) - * Per-domain mappings (inc. 4MB map_domain_page cache) ( 4MB) + * Per-domain mappings (inc. 4MB map_domain_page cache) ( 8MB) * Shadow linear pagetable ( 4MB) ( 8MB) * Guest linear pagetable ( 4MB) ( 8MB) * Machine-to-physical translation table [writable] ( 4MB) (16MB) * Frame-info table (24MB) (96MB) * * Start of guest inaccessible area - * Machine-to-physical translation table [read-only] ( 4MB) + * Machine-to-physical translation table [read-only] ( 4MB) (16MB) * * Start of guest unmodifiable area */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/current.h --- a/xen/include/asm-x86/current.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/current.h Tue Apr 11 18:54:18 2006 -0600 @@ -53,4 +53,6 @@ static inline struct cpu_info *get_cpu_i #define schedule_tail(_ed) (((_ed)->arch.schedule_tail)(_ed)) +extern void set_current_execstate(struct vcpu *v); + #endif /* __X86_CURRENT_H__ */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/event.h --- a/xen/include/asm-x86/event.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/event.h Tue Apr 11 18:54:18 2006 -0600 @@ -26,4 +26,9 @@ static inline void evtchn_notify(struct smp_send_event_check_cpu(v->processor); } +/* Note: Bitwise operations result in fast code with no branches. */ +#define event_pending(v) \ + (!!(v)->vcpu_info->evtchn_upcall_pending & \ + !(v)->vcpu_info->evtchn_upcall_mask) + #endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/genapic.h --- a/xen/include/asm-x86/genapic.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/genapic.h Tue Apr 11 18:54:18 2006 -0600 @@ -21,27 +21,6 @@ struct genapic { char *name; int (*probe)(void); - int (*apic_id_registered)(void); - cpumask_t (*target_cpus)(void); - int int_delivery_mode; - int int_dest_mode; - int ESR_DISABLE; - int apic_destination_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); - unsigned long (*check_apicid_present)(int apicid); - int no_balance_irq; - void (*init_apic_ldr)(void); - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); - - void (*clustered_apic_check)(void); - int (*apicid_to_node)(int logical_apicid); - int (*cpu_to_logical_apicid)(int cpu); - int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); - int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); - void (*enable_apic_mode)(void); - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); - /* When one of the next two hooks returns 1 the genapic is switched to this. Essentially they are additional probe functions. */ @@ -49,48 +28,52 @@ struct genapic { char *productid); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - unsigned (*get_apic_id)(unsigned long x); + /* Interrupt delivery parameters ('physical' vs. 'logical flat'). */ + int int_delivery_mode; + int int_dest_mode; + void (*init_apic_ldr)(void); + void (*clustered_apic_check)(void); + cpumask_t (*target_cpus)(void); unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); - - /* ipi */ void (*send_IPI_mask)(cpumask_t mask, int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); }; #define APICFUNC(x) .x = x -#define APIC_INIT(aname, aprobe) { \ +#define APIC_INIT(aname, aprobe) \ .name = aname, \ .probe = aprobe, \ - .int_delivery_mode = INT_DELIVERY_MODE, \ - .int_dest_mode = INT_DEST_MODE, \ - .no_balance_irq = NO_BALANCE_IRQ, \ - .ESR_DISABLE = esr_disable, \ - .apic_destination_logical = APIC_DEST_LOGICAL, \ - APICFUNC(apic_id_registered), \ - APICFUNC(target_cpus), \ - APICFUNC(check_apicid_used), \ - APICFUNC(check_apicid_present), \ - APICFUNC(init_apic_ldr), \ - APICFUNC(ioapic_phys_id_map), \ - APICFUNC(clustered_apic_check), \ - APICFUNC(apicid_to_node), \ - APICFUNC(cpu_to_logical_apicid), \ - APICFUNC(cpu_present_to_apicid), \ - APICFUNC(apicid_to_cpu_present), \ - APICFUNC(check_phys_apicid_present), \ APICFUNC(mps_oem_check), \ - APICFUNC(get_apic_id), \ - APICFUNC(cpu_mask_to_apicid), \ - APICFUNC(acpi_madt_oem_check), \ - APICFUNC(send_IPI_mask), \ - APICFUNC(send_IPI_allbutself), \ - APICFUNC(send_IPI_all), \ - APICFUNC(enable_apic_mode), \ - APICFUNC(phys_pkg_id), \ - } + APICFUNC(acpi_madt_oem_check) extern struct genapic *genapic; +void init_apic_ldr_flat(void); +void clustered_apic_check_flat(void); +cpumask_t target_cpus_flat(void); +unsigned int cpu_mask_to_apicid_flat(cpumask_t cpumask); +void send_IPI_mask_flat(cpumask_t mask, int vector); +#define GENAPIC_FLAT \ + .int_delivery_mode = dest_LowestPrio, \ + .int_dest_mode = 1 /* logical delivery */, \ + .init_apic_ldr = init_apic_ldr_flat, \ + .clustered_apic_check = clustered_apic_check_flat, \ + .target_cpus = target_cpus_flat, \ + .cpu_mask_to_apicid = cpu_mask_to_apicid_flat, \ + .send_IPI_mask = send_IPI_mask_flat + +void init_apic_ldr_phys(void); +void clustered_apic_check_phys(void); +cpumask_t target_cpus_phys(void); +unsigned int cpu_mask_to_apicid_phys(cpumask_t cpumask); +void send_IPI_mask_phys(cpumask_t mask, int vector); +#define GENAPIC_PHYS \ + .int_delivery_mode = dest_Fixed, \ + .int_dest_mode = 0 /* physical delivery */, \ + .init_apic_ldr = init_apic_ldr_phys, \ + .clustered_apic_check = clustered_apic_check_phys, \ + .target_cpus = target_cpus_phys, \ + .cpu_mask_to_apicid = cpu_mask_to_apicid_phys, \ + .send_IPI_mask = send_IPI_mask_phys + #endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/hardirq.h --- a/xen/include/asm-x86/hardirq.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/hardirq.h Tue Apr 11 18:54:18 2006 -0600 @@ -5,10 +5,9 @@ #include <xen/cache.h> typedef struct { - unsigned int __softirq_pending; + unsigned long __softirq_pending; unsigned int __local_irq_count; unsigned int __nmi_count; - unsigned long idle_timestamp; } __cacheline_aligned irq_cpustat_t; #include <xen/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/hvm/hvm.h Tue Apr 11 18:54:18 2006 -0600 @@ -47,8 +47,6 @@ struct hvm_function_table { struct vcpu *v, struct cpu_user_regs *r, unsigned long *crs); void (*load_cpu_guest_regs)( struct vcpu *v, struct cpu_user_regs *r); - void (*modify_guest_state)(struct vcpu *v); - /* * Examine specifics of the guest state: * 1) determine whether the guest is in real or vm8086 mode, @@ -105,12 +103,6 @@ hvm_load_cpu_guest_regs(struct vcpu *v, hvm_funcs.load_cpu_guest_regs(v, r); } -static inline void -hvm_modify_guest_state(struct vcpu *v) -{ - hvm_funcs.modify_guest_state(v); -} - static inline int hvm_realmode(struct vcpu *v) { diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/hvm/io.h --- a/xen/include/asm-x86/hvm/io.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/hvm/io.h Tue Apr 11 18:54:18 2006 -0600 @@ -66,6 +66,7 @@ #define INSTR_STOS 10 #define INSTR_TEST 11 #define INSTR_BT 12 +#define INSTR_XCHG 13 struct instruction { __s8 instr; /* instruction type */ @@ -76,7 +77,7 @@ struct instruction { __u32 flags; }; -#define MAX_INST_LEN 32 +#define MAX_INST_LEN 15 /* Maximum instruction length = 15 bytes */ struct mmio_op { int flags; diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/hvm/support.h Tue Apr 11 18:54:18 2006 -0600 @@ -94,6 +94,7 @@ enum hval_bitmaps { #else #define MONITOR_DEFAULT_EXCEPTION_BITMAP \ ( EXCEPTION_BITMAP_PG | \ + EXCEPTION_BITMAP_BP | \ EXCEPTION_BITMAP_GP ) #endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/hvm/svm/svm.h --- a/xen/include/asm-x86/hvm/svm/svm.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/hvm/svm/svm.h Tue Apr 11 18:54:18 2006 -0600 @@ -39,7 +39,6 @@ extern void svm_stop(void); extern void svm_stop(void); extern void svm_save_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs); extern void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs); -extern int svm_modify_vmcb(struct vcpu *v, struct cpu_user_regs *regs); extern void svm_vmread(struct vcpu *v, int index, unsigned long *value); extern void svm_vmwrite(struct vcpu *v, int index, unsigned long value); extern void svm_final_setup_guest(struct vcpu *v); diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Apr 11 18:54:18 2006 -0600 @@ -89,8 +89,6 @@ struct arch_vmx_struct { void vmx_do_resume(struct vcpu *); struct vmcs_struct *alloc_vmcs(void); -int modify_vmcs(struct arch_vmx_struct *arch_vmx, - struct cpu_user_regs *regs); void destroy_vmcs(struct arch_vmx_struct *arch_vmx); extern void vmx_request_clear_vmcs(struct vcpu *v); diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-generic/mach_apic.h --- a/xen/include/asm-x86/mach-generic/mach_apic.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/mach-generic/mach_apic.h Tue Apr 11 18:54:18 2006 -0600 @@ -2,28 +2,40 @@ #define __ASM_MACH_APIC_H #include <asm/genapic.h> +#include <asm/smp.h> -#define esr_disable (genapic->ESR_DISABLE) -#define NO_BALANCE_IRQ (genapic->no_balance_irq) +/* ESR was originally disabled in Linux for NUMA-Q. Do we really need to? */ +#define esr_disable (0) + +/* The following are dependent on APIC delivery mode (logical vs. physical). */ #define INT_DELIVERY_MODE (genapic->int_delivery_mode) #define INT_DEST_MODE (genapic->int_dest_mode) -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL (genapic->apic_destination_logical) #define TARGET_CPUS (genapic->target_cpus()) -#define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) -#define ioapic_phys_id_map (genapic->ioapic_phys_id_map) #define clustered_apic_check (genapic->clustered_apic_check) -#define apicid_to_node (genapic->apicid_to_node) -#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid) -#define cpu_present_to_apicid (genapic->cpu_present_to_apicid) -#define apicid_to_cpu_present (genapic->apicid_to_cpu_present) -#define check_apicid_present (genapic->check_apicid_present) -#define check_phys_apicid_present (genapic->check_phys_apicid_present) -#define check_apicid_used (genapic->check_apicid_used) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define enable_apic_mode (genapic->enable_apic_mode) -#define phys_pkg_id (genapic->phys_pkg_id) + +extern void es7000_sw_apic(void); +static inline void enable_apic_mode(void) +{ + es7000_sw_apic(); + return; +} + +/* No sane NUMA support right now. We should parse ACPI SRAT. */ +static inline int apicid_to_node(int logical_apicid) +{ + return 0; +} + +extern u8 bios_cpu_apicid[]; +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < NR_CPUS) + return (int)bios_cpu_apicid[mps_cpu]; + else + return BAD_APICID; +} static inline int mpc_apic_id(struct mpc_config_processor *m, struct mpc_config_translation *translation_record) @@ -47,4 +59,41 @@ static inline int multi_timer_check(int extern void generic_bigsmp_probe(void); +/* + * The following functions based around phys_cpu_present_map are disabled in + * some i386 Linux subarchitectures, and in x86_64 'cluster' genapic mode. I'm + * really not sure why, since all local APICs should have distinct physical + * IDs, and we need to know what they are. + */ +static inline int apic_id_registered(void) +{ + return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), + phys_cpu_present_map); +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +{ + return phys_map; +} + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long check_apicid_present(int apicid) +{ + return physid_isset(apicid, phys_cpu_present_map); +} + +static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); +} + +static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + #endif /* __ASM_MACH_APIC_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-summit/mach_mpparse.h --- a/xen/include/asm-x86/mach-summit/mach_mpparse.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/mach-summit/mach_mpparse.h Tue Apr 11 18:54:18 2006 -0600 @@ -1,7 +1,5 @@ #ifndef __ASM_MACH_MPPARSE_H #define __ASM_MACH_MPPARSE_H - -#include <mach_apic.h> extern int use_cyclone; diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/shadow.h Tue Apr 11 18:54:18 2006 -0600 @@ -135,6 +135,8 @@ extern int set_p2m_entry( struct domain_mmap_cache *l1cache); extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype); +extern void free_shadow_page(unsigned long smfn); + extern void shadow_l1_normal_pt_update(struct domain *d, paddr_t pa, l1_pgentry_t l1e, struct domain_mmap_cache *cache); @@ -660,54 +662,12 @@ static inline void shadow_sync_and_drop_ if ( likely(!shadow_mode_refcounts(d)) ) return; - shadow_lock(d); - if ( page_out_of_sync(page) ) __shadow_sync_mfn(d, page_to_mfn(page)); shadow_remove_all_access(d, page_to_mfn(page)); - - shadow_unlock(d); -} -#endif - -static inline void guest_physmap_add_page( - struct domain *d, unsigned long gpfn, unsigned long mfn) -{ - struct domain_mmap_cache c1, c2; - - if ( likely(!shadow_mode_translate(d)) ) - return; - - domain_mmap_cache_init(&c1); - domain_mmap_cache_init(&c2); - shadow_lock(d); - shadow_sync_and_drop_references(d, mfn_to_page(mfn)); - set_p2m_entry(d, gpfn, mfn, &c1, &c2); - set_gpfn_from_mfn(mfn, gpfn); - shadow_unlock(d); - domain_mmap_cache_destroy(&c1); - domain_mmap_cache_destroy(&c2); -} - -static inline void guest_physmap_remove_page( - struct domain *d, unsigned long gpfn, unsigned long mfn) -{ - struct domain_mmap_cache c1, c2; - - if ( likely(!shadow_mode_translate(d)) ) - return; - - domain_mmap_cache_init(&c1); - domain_mmap_cache_init(&c2); - shadow_lock(d); - shadow_sync_and_drop_references(d, mfn_to_page(mfn)); - set_p2m_entry(d, gpfn, -1, &c1, &c2); - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); - shadow_unlock(d); - domain_mmap_cache_destroy(&c1); - domain_mmap_cache_destroy(&c2); -} +} +#endif /************************************************************************/ @@ -738,8 +698,6 @@ get_shadow_ref(unsigned long smfn) return 1; } - -extern void free_shadow_page(unsigned long smfn); /* * Drop a shadow reference to smfn. @@ -1525,6 +1483,49 @@ static inline void set_shadow_status( /************************************************************************/ +static inline void guest_physmap_add_page( + struct domain *d, unsigned long gpfn, unsigned long mfn) +{ + struct domain_mmap_cache c1, c2; + + if ( likely(!shadow_mode_translate(d)) ) + return; + + domain_mmap_cache_init(&c1); + domain_mmap_cache_init(&c2); + shadow_lock(d); + shadow_sync_and_drop_references(d, mfn_to_page(mfn)); + set_p2m_entry(d, gpfn, mfn, &c1, &c2); + set_gpfn_from_mfn(mfn, gpfn); + shadow_unlock(d); + domain_mmap_cache_destroy(&c1); + domain_mmap_cache_destroy(&c2); +} + +static inline void guest_physmap_remove_page( + struct domain *d, unsigned long gpfn, unsigned long mfn) +{ + struct domain_mmap_cache c1, c2; + unsigned long type; + + if ( likely(!shadow_mode_translate(d)) ) + return; + + domain_mmap_cache_init(&c1); + domain_mmap_cache_init(&c2); + shadow_lock(d); + shadow_sync_and_drop_references(d, mfn_to_page(mfn)); + while ( (type = shadow_max_pgtable_type(d, gpfn, NULL)) != PGT_none ) + free_shadow_page(__shadow_status(d, gpfn, type)); + set_p2m_entry(d, gpfn, -1, &c1, &c2); + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); + shadow_unlock(d); + domain_mmap_cache_destroy(&c1); + domain_mmap_cache_destroy(&c2); +} + +/************************************************************************/ + void static inline shadow_update_min_max(unsigned long smfn, int index) { diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/smp.h --- a/xen/include/asm-x86/smp.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/smp.h Tue Apr 11 18:54:18 2006 -0600 @@ -74,16 +74,11 @@ static inline int num_booting_cpus(void) #ifdef CONFIG_X86_LOCAL_APIC -#ifdef APIC_DEFINITION -extern int hard_smp_processor_id(void); -#else -#include <mach_apicdef.h> static inline int hard_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); } -#endif static __inline int logical_smp_processor_id(void) { diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/time.h --- a/xen/include/asm-x86/time.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/asm-x86/time.h Tue Apr 11 18:54:18 2006 -0600 @@ -6,9 +6,6 @@ extern void calibrate_tsc_bp(void); extern void calibrate_tsc_ap(void); - -struct domain; -extern void init_domain_time(struct domain *d); typedef u64 cycles_t; diff -r 5719550652a1 -r 5cc367720223 xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/public/arch-x86_32.h Tue Apr 11 18:54:18 2006 -0600 @@ -168,6 +168,11 @@ typedef struct { unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ } arch_vcpu_info_t; +typedef struct { + unsigned long cs; + unsigned long eip; +} xen_callback_t; + #endif /* !__ASSEMBLY__ */ /* diff -r 5719550652a1 -r 5cc367720223 xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/public/arch-x86_64.h Tue Apr 11 18:54:18 2006 -0600 @@ -244,6 +244,8 @@ typedef struct { unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ } arch_vcpu_info_t; +typedef unsigned long xen_callback_t; + #endif /* !__ASSEMBLY__ */ /* diff -r 5719550652a1 -r 5cc367720223 xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/public/dom0_ops.h Tue Apr 11 18:54:18 2006 -0600 @@ -140,15 +140,16 @@ DEFINE_GUEST_HANDLE(dom0_settime_t); DEFINE_GUEST_HANDLE(dom0_settime_t); #define DOM0_GETPAGEFRAMEINFO 18 +#define LTAB_SHIFT 28 #define NOTAB 0 /* normal page */ -#define L1TAB (1<<28) -#define L2TAB (2<<28) -#define L3TAB (3<<28) -#define L4TAB (4<<28) +#define L1TAB (1<<LTAB_SHIFT) +#define L2TAB (2<<LTAB_SHIFT) +#define L3TAB (3<<LTAB_SHIFT) +#define L4TAB (4<<LTAB_SHIFT) #define LPINTAB (1<<31) -#define XTAB (0xf<<28) /* invalid page */ +#define XTAB (0xf<<LTAB_SHIFT) /* invalid page */ #define LTAB_MASK XTAB -#define LTABTYPE_MASK (0x7<<28) +#define LTABTYPE_MASK (0x7<<LTAB_SHIFT) typedef struct dom0_getpageframeinfo { /* IN variables. */ diff -r 5719550652a1 -r 5cc367720223 xen/include/public/event_channel.h --- a/xen/include/public/event_channel.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/public/event_channel.h Tue Apr 11 18:54:18 2006 -0600 @@ -50,9 +50,13 @@ typedef struct evtchn_bind_interdomain { * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified * vcpu. * NOTES: - * 1. A virtual IRQ may be bound to at most one event channel per vcpu. - * 2. The allocated event channel is bound to the specified vcpu. The binding - * may not be changed. + * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list + * in xen.h for the classification of each VIRQ. + * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be + * re-bound via EVTCHNOP_bind_vcpu. + * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. + * The allocated event channel is bound to the specified vcpu and the + * binding cannot be changed. */ #define EVTCHNOP_bind_virq 1 typedef struct evtchn_bind_virq { @@ -152,9 +156,11 @@ typedef struct evtchn_status { * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an * event is pending. * NOTES: - * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised - * the binding. This binding cannot be changed. - * 2. All other channels notify vcpu0 by default. This default is set when + * 1. IPI-bound channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 3. All other channels notify vcpu0 by default. This default is set when * the channel is allocated (a port that is freed and subsequently reused * has its binding reset to vcpu0). */ diff -r 5719550652a1 -r 5cc367720223 xen/include/public/hvm/ioreq.h --- a/xen/include/public/hvm/ioreq.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/public/hvm/ioreq.h Tue Apr 11 18:54:18 2006 -0600 @@ -34,6 +34,7 @@ #define IOREQ_TYPE_AND 2 #define IOREQ_TYPE_OR 3 #define IOREQ_TYPE_XOR 4 +#define IOREQ_TYPE_XCHG 5 /* * VMExit dispatcher should cooperate with instruction decoder to diff -r 5719550652a1 -r 5cc367720223 xen/include/public/sched.h --- a/xen/include/public/sched.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/public/sched.h Tue Apr 11 18:54:18 2006 -0600 @@ -13,17 +13,17 @@ /* * The prototype for this hypercall is: - * long sched_op_new(int cmd, void *arg) + * long sched_op(int cmd, void *arg) * @cmd == SCHEDOP_??? (scheduler operation). * @arg == Operation-specific extra argument(s), as described below. * - * **NOTE**: - * Versions of Xen prior to 3.0.2 provide only the following legacy version + * Versions of Xen prior to 3.0.2 provided only the following legacy version * of this hypercall, supporting only the commands yield, block and shutdown: * long sched_op(int cmd, unsigned long arg) * @cmd == SCHEDOP_??? (scheduler operation). * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) * == SHUTDOWN_* code (SCHEDOP_shutdown) + * This legacy version is available to new guests as sched_op_compat(). */ /* @@ -65,6 +65,19 @@ DEFINE_GUEST_HANDLE(sched_poll_t); DEFINE_GUEST_HANDLE(sched_poll_t); /* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown structure. + */ +#define SCHEDOP_remote_shutdown 4 +typedef struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_xxx reason */ +} sched_remote_shutdown_t; +DEFINE_GUEST_HANDLE(sched_remote_shutdown_t); + +/* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control * software to determine the appropriate action. For the most part, Xen does * not care about the shutdown code. diff -r 5719550652a1 -r 5cc367720223 xen/include/public/xen.h --- a/xen/include/public/xen.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/public/xen.h Tue Apr 11 18:54:18 2006 -0600 @@ -37,7 +37,7 @@ #define __HYPERVISOR_stack_switch 3 #define __HYPERVISOR_set_callbacks 4 #define __HYPERVISOR_fpu_taskswitch 5 -#define __HYPERVISOR_sched_op 6 +#define __HYPERVISOR_sched_op_compat 6 /* compat as of 0x00030101 */ #define __HYPERVISOR_dom0_op 7 #define __HYPERVISOR_set_debugreg 8 #define __HYPERVISOR_get_debugreg 9 @@ -59,18 +59,27 @@ #define __HYPERVISOR_mmuext_op 26 #define __HYPERVISOR_acm_op 27 #define __HYPERVISOR_nmi_op 28 -#define __HYPERVISOR_sched_op_new 29 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 /* * VIRTUAL INTERRUPTS * * Virtual interrupts that a guest OS may receive from Xen. - */ -#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */ -#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */ -#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ -#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ -#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ + * + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ #define NR_VIRQS 8 /* @@ -436,6 +445,8 @@ typedef uint8_t xen_domain_handle_t[16]; #endif /* !__ASSEMBLY__ */ +#include "xen-compat.h" + #endif /* __XEN_PUBLIC_XEN_H__ */ /* diff -r 5719550652a1 -r 5cc367720223 xen/include/xen/event.h --- a/xen/include/xen/event.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/xen/event.h Tue Apr 11 18:54:18 2006 -0600 @@ -3,7 +3,7 @@ * * A nice interface for passing asynchronous events to guest OSes. * - * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2002-2006, K A Fraser */ #ifndef __XEN_EVENT_H__ @@ -18,11 +18,18 @@ extern void evtchn_set_pending(struct vc extern void evtchn_set_pending(struct vcpu *v, int port); /* - * send_guest_virq: + * send_guest_vcpu_virq: Notify guest via a per-VCPU VIRQ. * @v: VCPU to which virtual IRQ should be sent * @virq: Virtual IRQ number (VIRQ_*) */ -extern void send_guest_virq(struct vcpu *v, int virq); +extern void send_guest_vcpu_virq(struct vcpu *v, int virq); + +/* + * send_guest_global_virq: Notify guest via a global VIRQ. + * @d: Domain to which virtual IRQ should be sent + * @virq: Virtual IRQ number (VIRQ_*) + */ +extern void send_guest_global_virq(struct domain *d, int virq); /* * send_guest_pirq: @@ -30,11 +37,6 @@ extern void send_guest_virq(struct vcpu * @pirq: Physical IRQ number */ extern void send_guest_pirq(struct domain *d, int pirq); - -/* Note: Bitwise operations result in fast code with no branches. */ -#define event_pending(v) \ - (!!(v)->vcpu_info->evtchn_upcall_pending & \ - !(v)->vcpu_info->evtchn_upcall_mask) #define evtchn_pending(d, p) \ (test_bit((p), &(d)->shared_info->evtchn_pending[0])) diff -r 5719550652a1 -r 5cc367720223 xen/include/xen/hypercall.h --- a/xen/include/xen/hypercall.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/xen/hypercall.h Tue Apr 11 18:54:18 2006 -0600 @@ -18,9 +18,14 @@ do_ni_hypercall( void); extern long +do_sched_op_compat( + int cmd, + unsigned long arg); + +extern long do_sched_op( int cmd, - unsigned long arg); + GUEST_HANDLE(void) arg); extern long do_dom0_op( diff -r 5719550652a1 -r 5cc367720223 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/xen/sched.h Tue Apr 11 18:54:18 2006 -0600 @@ -14,6 +14,7 @@ #include <xen/grant_table.h> #include <xen/rangeset.h> #include <asm/domain.h> +#include <xen/xenoprof.h> extern unsigned long volatile jiffies; extern rwlock_t domlist_lock; @@ -133,7 +134,7 @@ struct domain */ #define NR_PIRQS 256 /* Put this somewhere sane! */ u16 pirq_to_evtchn[NR_PIRQS]; - u32 pirq_mask[NR_PIRQS/32]; + DECLARE_BITMAP(pirq_mask, NR_PIRQS); /* I/O capabilities (access to IRQs and memory-mapped I/O). */ struct rangeset *iomem_caps; @@ -155,6 +156,9 @@ struct domain /* Control-plane tools handle for this domain. */ xen_domain_handle_t handle; + + /* OProfile support. */ + struct xenoprof *xenoprof; }; struct domain_setup_info diff -r 5719550652a1 -r 5cc367720223 xen/include/xen/softirq.h --- a/xen/include/xen/softirq.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/xen/softirq.h Tue Apr 11 18:54:18 2006 -0600 @@ -9,7 +9,8 @@ #define NMI_SOFTIRQ 4 #define PAGE_SCRUB_SOFTIRQ 5 #define DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ 6 -#define NR_SOFTIRQS 7 +#define TRACE_SOFTIRQ 7 +#define NR_SOFTIRQS 8 #ifndef __ASSEMBLY__ diff -r 5719550652a1 -r 5cc367720223 xen/include/xen/time.h --- a/xen/include/xen/time.h Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/include/xen/time.h Tue Apr 11 18:54:18 2006 -0600 @@ -55,7 +55,9 @@ s_time_t get_s_time(void); #define MILLISECS(_ms) ((s_time_t)((_ms) * 1000000ULL)) #define MICROSECS(_us) ((s_time_t)((_us) * 1000ULL)) -extern void update_dom_time(struct vcpu *v); +extern void update_vcpu_system_time(struct vcpu *v); +extern void update_domain_wallclock_time(struct domain *d); + extern void do_settime( unsigned long secs, unsigned long nsecs, u64 system_time_base); diff -r 5719550652a1 -r 5cc367720223 xen/tools/Makefile --- a/xen/tools/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/tools/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -1,10 +1,12 @@ include $(BASEDIR)/../Config.mk +.PHONY: default default: $(MAKE) -C figlet $(MAKE) symbols +.PHONY: clean clean: $(MAKE) -C figlet clean rm -f *.o symbols diff -r 5719550652a1 -r 5cc367720223 xen/tools/figlet/Makefile --- a/xen/tools/figlet/Makefile Tue Apr 11 13:55:47 2006 -0600 +++ b/xen/tools/figlet/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -4,5 +4,6 @@ figlet: figlet.c figlet: figlet.c $(HOSTCC) -o $@ $< +.PHONY: clean clean: rm -f *.o figlet diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/oprofile/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,16 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +ifdef CONFIG_XEN +oprofile-y := $(DRIVER_OBJS) xenoprof.o +else +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ + op_model_ppro.o op_model_p4.o +oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o +endif diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,395 @@ +/** + * @file xenoprof.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified by Aravind Menon and Jose Renato Santos for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include <linux/init.h> +#include <linux/notifier.h> +#include <linux/smp.h> +#include <linux/oprofile.h> +#include <linux/sysdev.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/vmalloc.h> +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/apic.h> +#include <asm/pgtable.h> +#include <xen/evtchn.h> +#include "op_counter.h" + +#include <xen/interface/xen.h> +#include <xen/interface/xenoprof.h> + +static int xenoprof_start(void); +static void xenoprof_stop(void); + +void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot); + +static int xenoprof_enabled = 0; +static int num_events = 0; +static int is_primary = 0; + +/* sample buffers shared with Xen */ +xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS]; +/* Shared buffer area */ +char * shared_buffer; +/* Number of buffers in shared area (one per VCPU) */ +int nbuf; +/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */ +int ovf_irq[NR_CPUS]; +/* cpu model type string - copied from Xen memory space on XENOPROF_init command */ +char cpu_type[XENOPROF_CPU_TYPE_SIZE]; + +#ifdef CONFIG_PM + +static int xenoprof_suspend(struct sys_device * dev, pm_message_t state) +{ + if (xenoprof_enabled == 1) + xenoprof_stop(); + return 0; +} + + +static int xenoprof_resume(struct sys_device * dev) +{ + if (xenoprof_enabled == 1) + xenoprof_start(); + return 0; +} + + +static struct sysdev_class oprofile_sysclass = { + set_kset_name("oprofile"), + .resume = xenoprof_resume, + .suspend = xenoprof_suspend +}; + + +static struct sys_device device_oprofile = { + .id = 0, + .cls = &oprofile_sysclass, +}; + + +static int __init init_driverfs(void) +{ + int error; + if (!(error = sysdev_class_register(&oprofile_sysclass))) + error = sysdev_register(&device_oprofile); + return error; +} + + +static void __exit exit_driverfs(void) +{ + sysdev_unregister(&device_oprofile); + sysdev_class_unregister(&oprofile_sysclass); +} + +#else +#define init_driverfs() do { } while (0) +#define exit_driverfs() do { } while (0) +#endif /* CONFIG_PM */ + +unsigned long long oprofile_samples = 0; + +static irqreturn_t +xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs) +{ + int head, tail, size; + xenoprof_buf_t * buf; + int cpu; + + cpu = smp_processor_id(); + buf = xenoprof_buf[cpu]; + + head = buf->event_head; + tail = buf->event_tail; + size = buf->event_size; + + if (tail > head) { + while (tail < size) { + oprofile_add_pc(buf->event_log[tail].eip, + buf->event_log[tail].mode, + buf->event_log[tail].event); + oprofile_samples++; + tail++; + } + tail = 0; + } + while (tail < head) { + oprofile_add_pc(buf->event_log[tail].eip, + buf->event_log[tail].mode, + buf->event_log[tail].event); + oprofile_samples++; + tail++; + } + + buf->event_tail = tail; + + return IRQ_HANDLED; +} + + +static void unbind_virq_cpu(void * info) +{ + int cpu = smp_processor_id(); + if (ovf_irq[cpu] >= 0) { + unbind_from_irqhandler(ovf_irq[cpu], NULL); + ovf_irq[cpu] = -1; + } +} + + +static void unbind_virq(void) +{ + on_each_cpu(unbind_virq_cpu, NULL, 0, 1); +} + + +int bind_virq_error; + +static void bind_virq_cpu(void * info) +{ + int result; + int cpu = smp_processor_id(); + + result = bind_virq_to_irqhandler(VIRQ_XENOPROF, + cpu, + xenoprof_ovf_interrupt, + SA_INTERRUPT, + "xenoprof", + NULL); + + if (result<0) { + bind_virq_error = result; + printk("xenoprof.c: binding VIRQ_XENOPROF to IRQ failed on CPU " + "%d\n", cpu); + } else { + ovf_irq[cpu] = result; + } +} + + +static int bind_virq(void) +{ + bind_virq_error = 0; + on_each_cpu(bind_virq_cpu, NULL, 0, 1); + if (bind_virq_error) { + unbind_virq(); + return bind_virq_error; + } else { + return 0; + } +} + + +static int xenoprof_setup(void) +{ + int ret; + + ret = bind_virq(); + if (ret) + return ret; + + if (is_primary) { + ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, + (unsigned long)NULL, + (unsigned long)NULL); + if (ret) + goto err; + + ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, + (unsigned long)&counter_config, + (unsigned long)num_events); + if (ret) + goto err; + } + + ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, + (unsigned long)NULL, + (unsigned long)NULL); + if (ret) + goto err; + + xenoprof_enabled = 1; + return 0; + err: + unbind_virq(); + return ret; +} + + +static void xenoprof_shutdown(void) +{ + xenoprof_enabled = 0; + + HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, + (unsigned long)NULL, + (unsigned long)NULL); + + if (is_primary) { + HYPERVISOR_xenoprof_op(XENOPROF_release_counters, + (unsigned long)NULL, + (unsigned long)NULL); + } + + unbind_virq(); +} + + +static int xenoprof_start(void) +{ + int ret = 0; + + if (is_primary) + ret = HYPERVISOR_xenoprof_op(XENOPROF_start, + (unsigned long)NULL, + (unsigned long)NULL); + return ret; +} + + +static void xenoprof_stop(void) +{ + if (is_primary) + HYPERVISOR_xenoprof_op(XENOPROF_stop, + (unsigned long)NULL, + (unsigned long)NULL); +} + + +static int xenoprof_set_active(int * active_domains, + unsigned int adomains) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, + (unsigned long)active_domains, + (unsigned long)adomains); + return ret; +} + + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +static int xenoprof_create_files(struct super_block * sb, struct dentry * root) +{ + unsigned int i; + + for (i = 0; i < num_events; ++i) { + struct dentry * dir; + char buf[2]; + + snprintf(buf, 2, "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "enabled", + &counter_config[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", + &counter_config[i].event); + oprofilefs_create_ulong(sb, dir, "count", + &counter_config[i].count); + oprofilefs_create_ulong(sb, dir, "unit_mask", + &counter_config[i].unit_mask); + oprofilefs_create_ulong(sb, dir, "kernel", + &counter_config[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", + &counter_config[i].user); + } + + return 0; +} + + +struct oprofile_operations xenoprof_ops = { + .create_files = xenoprof_create_files, + .set_active = xenoprof_set_active, + .setup = xenoprof_setup, + .shutdown = xenoprof_shutdown, + .start = xenoprof_start, + .stop = xenoprof_stop +}; + + +/* in order to get driverfs right */ +static int using_xenoprof; + +int __init oprofile_arch_init(struct oprofile_operations * ops) +{ + xenoprof_init_result_t result; + xenoprof_buf_t * buf; + int max_samples = 16; + int vm_size; + int npages; + int i; + + int ret = HYPERVISOR_xenoprof_op(XENOPROF_init, + (unsigned long)max_samples, + (unsigned long)&result); + + if (!ret) { + pgprot_t prot = __pgprot(_KERNPG_TABLE); + + num_events = result.num_events; + is_primary = result.is_primary; + nbuf = result.nbuf; + + npages = (result.bufsize * nbuf - 1) / PAGE_SIZE + 1; + vm_size = npages * PAGE_SIZE; + + shared_buffer = (char *) vm_map_xen_pages(result.buf_maddr, + vm_size, prot); + if (!shared_buffer) { + ret = -ENOMEM; + goto out; + } + + for (i=0; i< nbuf; i++) { + buf = (xenoprof_buf_t*) + &shared_buffer[i * result.bufsize]; + BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); + xenoprof_buf[buf->vcpu_id] = buf; + } + + /* cpu_type is detected by Xen */ + cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0; + strncpy(cpu_type, result.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1); + xenoprof_ops.cpu_type = cpu_type; + + init_driverfs(); + using_xenoprof = 1; + *ops = xenoprof_ops; + + for (i=0; i<NR_CPUS; i++) + ovf_irq[i] = -1; + } + out: + printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, " + "is_primary %d\n", ret, num_events, is_primary); + return ret; +} + + +void __exit oprofile_arch_exit(void) +{ + if (using_xenoprof) + exit_driverfs(); + + if (shared_buffer) { + vunmap(shared_buffer); + shared_buffer = NULL; + } + if (is_primary) + HYPERVISOR_xenoprof_op(XENOPROF_shutdown, + (unsigned long)NULL, + (unsigned long)NULL); +} diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,22 @@ +# +# oprofile for x86-64. +# Just reuse the one from i386. +# + +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +ifdef CONFIG_XEN +OPROFILE-y := xenoprof.o +else +OPROFILE-y := init.o backtrace.o +OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o op_model_p4.o \ + op_model_ppro.o +OPROFILE-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o +endif +oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y)) diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,173 @@ +/****************************************************************************** + * Backend-client-facing interface for the Xenbus driver. In other words, the + * interface between the Xenbus and the device-specific code in the backend + * driver. + * + * Copyright (C) 2005-2006 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <xen/gnttab.h> +#include <xen/xenbus.h> +#include <xen/driver_util.h> + +/* Based on Rusty Russell's skeleton driver's map_page */ +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +{ + struct gnttab_map_grant_ref op = { + .flags = GNTMAP_host_map, + .ref = gnt_ref, + .dom = dev->otherend_id, + }; + struct vm_struct *area; + + *vaddr = NULL; + + area = alloc_vm_area(PAGE_SIZE); + if (!area) + return -ENOMEM; + + op.host_addr = (unsigned long)area->addr; + + lock_vm_area(area); + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + unlock_vm_area(area); + + if (op.status != GNTST_okay) { + free_vm_area(area); + xenbus_dev_fatal(dev, op.status, + "mapping in shared page %d from domain %d", + gnt_ref, dev->otherend_id); + return op.status; + } + + /* Stuff the handle in an unused field */ + area->phys_addr = (unsigned long)op.handle; + + *vaddr = area->addr; + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + + +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, + grant_handle_t *handle, void *vaddr) +{ + struct gnttab_map_grant_ref op = { + .host_addr = (unsigned long)vaddr, + .flags = GNTMAP_host_map, + .ref = gnt_ref, + .dom = dev->otherend_id, + }; + + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + + if (op.status != GNTST_okay) { + xenbus_dev_fatal(dev, op.status, + "mapping in shared page %d from domain %d", + gnt_ref, dev->otherend_id); + } else + *handle = op.handle; + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_map_ring); + + +/* Based on Rusty Russell's skeleton driver's unmap_page */ +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) +{ + struct vm_struct *area; + struct gnttab_unmap_grant_ref op = { + .host_addr = (unsigned long)vaddr, + }; + + /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) + * method so that we don't have to muck with vmalloc internals here. + * We could force the user to hang on to their struct vm_struct from + * xenbus_map_ring_valloc, but these 6 lines considerably simplify + * this API. + */ + read_lock(&vmlist_lock); + for (area = vmlist; area != NULL; area = area->next) { + if (area->addr == vaddr) + break; + } + read_unlock(&vmlist_lock); + + if (!area) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + op.handle = (grant_handle_t)area->phys_addr; + + lock_vm_area(area); + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + unlock_vm_area(area); + + if (op.status == GNTST_okay) + free_vm_area(area); + else + xenbus_dev_error(dev, op.status, + "unmapping page at handle %d error %d", + (int16_t)area->phys_addr, op.status); + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + + +int xenbus_unmap_ring(struct xenbus_device *dev, + grant_handle_t handle, void *vaddr) +{ + struct gnttab_unmap_grant_ref op = { + .host_addr = (unsigned long)vaddr, + .handle = handle, + }; + + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + + if (op.status != GNTST_okay) + xenbus_dev_error(dev, op.status, + "unmapping page at handle %d error %d", + handle, op.status); + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring); + + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 5719550652a1 -r 5cc367720223 patches/linux-2.6.16/x86-increase-interrupt-vector-range.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16/x86-increase-interrupt-vector-range.patch Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,107 @@ +Subject: Increase x86 interrupt vector range + +Remove the limit of 256 interrupt vectors by changing the value +stored in orig_{e,r}ax to be the negated interrupt vector. +The orig_{e,r}ax needs to be < 0 to allow the signal code to +distinguish between return from interrupt and return from syscall. +With this change applied, NR_IRQS can be > 256. + +Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxx> +--- + arch/i386/kernel/entry.S | 4 ++-- + arch/i386/kernel/irq.c | 4 ++-- + arch/x86_64/kernel/entry.S | 2 +- + arch/x86_64/kernel/irq.c | 4 ++-- + arch/x86_64/kernel/smp.c | 4 ++-- + include/asm-x86_64/hw_irq.h | 2 +- + 6 files changed, 10 insertions(+), 10 deletions(-) + +diff -r 7d239c83edea arch/i386/kernel/entry.S +--- a/arch/i386/kernel/entry.S Mon Mar 20 06:00:20 2006 +0000 ++++ b/arch/i386/kernel/entry.S Fri Mar 31 17:01:35 2006 +0100 +@@ -406,7 +406,7 @@ ENTRY(irq_entries_start) + ENTRY(irq_entries_start) + .rept NR_IRQS + ALIGN +-1: pushl $vector-256 ++1: pushl $~(vector) + jmp common_interrupt + .data + .long 1b +@@ -423,7 +423,7 @@ common_interrupt: + + #define BUILD_INTERRUPT(name, nr) \ + ENTRY(name) \ +- pushl $nr-256; \ ++ pushl $~(nr); \ + SAVE_ALL \ + movl %esp,%eax; \ + call smp_/**/name; \ +diff -r 7d239c83edea arch/i386/kernel/irq.c +--- a/arch/i386/kernel/irq.c Mon Mar 20 06:00:20 2006 +0000 ++++ b/arch/i386/kernel/irq.c Fri Mar 31 17:01:35 2006 +0100 +@@ -53,8 +53,8 @@ static union irq_ctx *softirq_ctx[NR_CPU + */ + fastcall unsigned int do_IRQ(struct pt_regs *regs) + { +- /* high bits used in ret_from_ code */ +- int irq = regs->orig_eax & 0xff; ++ /* high bit used in ret_from_ code */ ++ int irq = ~regs->orig_eax; + #ifdef CONFIG_4KSTACKS + union irq_ctx *curctx, *irqctx; + u32 *isp; +diff -r 7d239c83edea arch/x86_64/kernel/entry.S +--- a/arch/x86_64/kernel/entry.S Mon Mar 20 06:00:20 2006 +0000 ++++ b/arch/x86_64/kernel/entry.S Fri Mar 31 17:01:35 2006 +0100 +@@ -609,7 +609,7 @@ retint_kernel: + */ + .macro apicinterrupt num,func + INTR_FRAME +- pushq $\num-256 ++ pushq $~(\num) + CFI_ADJUST_CFA_OFFSET 8 + interrupt \func + jmp ret_from_intr +diff -r 7d239c83edea arch/x86_64/kernel/irq.c +--- a/arch/x86_64/kernel/irq.c Mon Mar 20 06:00:20 2006 +0000 ++++ b/arch/x86_64/kernel/irq.c Fri Mar 31 17:01:35 2006 +0100 +@@ -96,8 +96,8 @@ skip: + */ + asmlinkage unsigned int do_IRQ(struct pt_regs *regs) + { +- /* high bits used in ret_from_ code */ +- unsigned irq = regs->orig_rax & 0xff; ++ /* high bit used in ret_from_ code */ ++ unsigned irq = ~regs->orig_rax; + + exit_idle(); + irq_enter(); +diff -r 7d239c83edea arch/x86_64/kernel/smp.c +--- a/arch/x86_64/kernel/smp.c Mon Mar 20 06:00:20 2006 +0000 ++++ b/arch/x86_64/kernel/smp.c Fri Mar 31 17:01:35 2006 +0100 +@@ -135,10 +135,10 @@ asmlinkage void smp_invalidate_interrupt + + cpu = smp_processor_id(); + /* +- * orig_rax contains the interrupt vector - 256. ++ * orig_rax contains the negated interrupt vector. + * Use that to determine where the sender put the data. + */ +- sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START; ++ sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START; + f = &per_cpu(flush_state, sender); + + if (!cpu_isset(cpu, f->flush_cpumask)) +diff -r 7d239c83edea include/asm-x86_64/hw_irq.h +--- a/include/asm-x86_64/hw_irq.h Mon Mar 20 06:00:20 2006 +0000 ++++ b/include/asm-x86_64/hw_irq.h Fri Mar 31 17:01:35 2006 +0100 +@@ -127,7 +127,7 @@ __asm__( \ + __asm__( \ + "\n.p2align\n" \ + "IRQ" #nr "_interrupt:\n\t" \ +- "push $" #nr "-256 ; " \ ++ "push $~(" #nr ") ; " \ + "jmp common_interrupt"); + + #if defined(CONFIG_X86_IO_APIC) diff -r 5719550652a1 -r 5cc367720223 patches/linux-2.6.16/xenoprof-generic.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16/xenoprof-generic.patch Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,384 @@ +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c ./drivers/oprofile/buffer_sync.c +--- ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/buffer_sync.c 2006-04-03 15:53:05.000000000 +0100 +@@ -6,6 +6,10 @@ + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * This is the core of the buffer management. Each + * CPU buffer is processed and entered into the + * global event buffer. Such processing is necessary +@@ -275,15 +279,24 @@ static void add_cpu_switch(int i) + last_cookie = INVALID_COOKIE; + } + +-static void add_kernel_ctx_switch(unsigned int in_kernel) ++static void add_cpu_mode_switch(unsigned int cpu_mode) + { + add_event_entry(ESCAPE_CODE); +- if (in_kernel) +- add_event_entry(KERNEL_ENTER_SWITCH_CODE); +- else +- add_event_entry(KERNEL_EXIT_SWITCH_CODE); ++ switch (cpu_mode) { ++ case CPU_MODE_USER: ++ add_event_entry(USER_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_KERNEL: ++ add_event_entry(KERNEL_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_XEN: ++ add_event_entry(XEN_ENTER_SWITCH_CODE); ++ break; ++ default: ++ break; ++ } + } +- ++ + static void + add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) + { +@@ -348,9 +361,9 @@ static int add_us_sample(struct mm_struc + * for later lookup from userspace. + */ + static int +-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) ++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) + { +- if (in_kernel) { ++ if (cpu_mode >= CPU_MODE_KERNEL) { + add_sample_entry(s->eip, s->event); + return 1; + } else if (mm) { +@@ -496,7 +509,7 @@ void sync_buffer(int cpu) + struct mm_struct *mm = NULL; + struct task_struct * new; + unsigned long cookie = 0; +- int in_kernel = 1; ++ int cpu_mode = 1; + unsigned int i; + sync_buffer_state state = sb_buffer_start; + unsigned long available; +@@ -513,12 +526,12 @@ void sync_buffer(int cpu) + struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; + + if (is_code(s->eip)) { +- if (s->event <= CPU_IS_KERNEL) { ++ if (s->event <= CPU_MODE_XEN) { + /* kernel/userspace switch */ +- in_kernel = s->event; ++ cpu_mode = s->event; + if (state == sb_buffer_start) + state = sb_sample_start; +- add_kernel_ctx_switch(s->event); ++ add_cpu_mode_switch(s->event); + } else if (s->event == CPU_TRACE_BEGIN) { + state = sb_bt_start; + add_trace_begin(); +@@ -536,7 +549,7 @@ void sync_buffer(int cpu) + } + } else { + if (state >= sb_bt_start && +- !add_sample(mm, s, in_kernel)) { ++ !add_sample(mm, s, cpu_mode)) { + if (state == sb_bt_start) { + state = sb_bt_ignore; + atomic_inc(&oprofile_stats.bt_lost_no_mapping); +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c ./drivers/oprofile/cpu_buffer.c +--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/cpu_buffer.c 2006-04-03 15:53:05.000000000 +0100 +@@ -6,6 +6,10 @@ + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * Each CPU has a local buffer that stores PC value/event + * pairs. We also log context switches when we notice them. + * Eventually each CPU's buffer is processed into the global +@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void) + goto fail; + + b->last_task = NULL; +- b->last_is_kernel = -1; ++ b->last_cpu_mode = -1; + b->tracing = 0; + b->buffer_size = buffer_size; + b->tail_pos = 0; +@@ -114,7 +118,7 @@ void cpu_buffer_reset(struct oprofile_cp + * collected will populate the buffer with proper + * values to initialize the buffer + */ +- cpu_buf->last_is_kernel = -1; ++ cpu_buf->last_cpu_mode = -1; + cpu_buf->last_task = NULL; + } + +@@ -164,13 +168,13 @@ add_code(struct oprofile_cpu_buffer * bu + * because of the head/tail separation of the writer and reader + * of the CPU buffer. + * +- * is_kernel is needed because on some architectures you cannot ++ * cpu_mode is needed because on some architectures you cannot + * tell if you are in kernel or user space simply by looking at +- * pc. We tag this in the buffer by generating kernel enter/exit +- * events whenever is_kernel changes ++ * pc. We tag this in the buffer by generating kernel/user (and xen) ++ * enter events whenever cpu_mode changes + */ + static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, +- int is_kernel, unsigned long event) ++ int cpu_mode, unsigned long event) + { + struct task_struct * task; + +@@ -181,16 +185,16 @@ static int log_sample(struct oprofile_cp + return 0; + } + +- is_kernel = !!is_kernel; ++ WARN_ON(cpu_mode > CPU_MODE_XEN); + + task = current; + + /* notice a switch from user->kernel or vice versa */ +- if (cpu_buf->last_is_kernel != is_kernel) { +- cpu_buf->last_is_kernel = is_kernel; +- add_code(cpu_buf, is_kernel); ++ if (cpu_buf->last_cpu_mode != cpu_mode) { ++ cpu_buf->last_cpu_mode = cpu_mode; ++ add_code(cpu_buf, cpu_mode); + } +- ++ + /* notice a task switch */ + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h ./drivers/oprofile/cpu_buffer.h +--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/cpu_buffer.h 2006-04-03 15:53:05.000000000 +0100 +@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer { + volatile unsigned long tail_pos; + unsigned long buffer_size; + struct task_struct * last_task; +- int last_is_kernel; ++ int last_cpu_mode; + int tracing; + struct op_sample * buffer; + unsigned long sample_received; +@@ -51,7 +51,9 @@ extern struct oprofile_cpu_buffer cpu_bu + void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); + + /* transient events for the CPU buffer -> event buffer */ +-#define CPU_IS_KERNEL 1 +-#define CPU_TRACE_BEGIN 2 ++#define CPU_MODE_USER 0 ++#define CPU_MODE_KERNEL 1 ++#define CPU_MODE_XEN 2 ++#define CPU_TRACE_BEGIN 3 + + #endif /* OPROFILE_CPU_BUFFER_H */ +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h ./drivers/oprofile/event_buffer.h +--- ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/event_buffer.h 2006-04-03 15:53:05.000000000 +0100 +@@ -29,11 +29,12 @@ void wake_up_buffer_waiter(void); + #define CPU_SWITCH_CODE 2 + #define COOKIE_SWITCH_CODE 3 + #define KERNEL_ENTER_SWITCH_CODE 4 +-#define KERNEL_EXIT_SWITCH_CODE 5 ++#define USER_ENTER_SWITCH_CODE 5 + #define MODULE_LOADED_CODE 6 + #define CTX_TGID_CODE 7 + #define TRACE_BEGIN_CODE 8 + #define TRACE_END_CODE 9 ++#define XEN_ENTER_SWITCH_CODE 10 + + #define INVALID_COOKIE ~0UL + #define NO_COOKIE 0UL +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.c ./drivers/oprofile/oprof.c +--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.c 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/oprof.c 2006-04-03 15:53:05.000000000 +0100 +@@ -5,6 +5,10 @@ + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include <linux/kernel.h> +@@ -19,7 +23,7 @@ + #include "cpu_buffer.h" + #include "buffer_sync.h" + #include "oprofile_stats.h" +- ++ + struct oprofile_operations oprofile_ops; + + unsigned long oprofile_started; +@@ -33,6 +37,17 @@ static DECLARE_MUTEX(start_sem); + */ + static int timer = 0; + ++extern unsigned int adomains; ++extern int active_domains[MAX_OPROF_DOMAINS]; ++ ++int oprofile_set_active(void) ++{ ++ if (oprofile_ops.set_active) ++ return oprofile_ops.set_active(active_domains, adomains); ++ ++ return -EINVAL; ++} ++ + int oprofile_setup(void) + { + int err; +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.h ./drivers/oprofile/oprof.h +--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.h 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/oprof.h 2006-04-03 15:53:05.000000000 +0100 +@@ -35,5 +35,7 @@ void oprofile_create_files(struct super_ + void oprofile_timer_init(struct oprofile_operations * ops); + + int oprofile_set_backtrace(unsigned long depth); ++ ++int oprofile_set_active(void); + + #endif /* OPROF_H */ +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c ./drivers/oprofile/oprofile_files.c +--- ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c 2006-03-20 05:53:29.000000000 +0000 ++++ ./drivers/oprofile/oprofile_files.c 2006-04-03 15:53:05.000000000 +0100 +@@ -5,15 +5,21 @@ + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include <linux/fs.h> + #include <linux/oprofile.h> ++#include <asm/uaccess.h> ++#include <linux/ctype.h> + + #include "event_buffer.h" + #include "oprofile_stats.h" + #include "oprof.h" +- ++ + unsigned long fs_buffer_size = 131072; + unsigned long fs_cpu_buffer_size = 8192; + unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +@@ -117,11 +123,79 @@ static ssize_t dump_write(struct file * + static struct file_operations dump_fops = { + .write = dump_write, + }; +- ++ ++#define TMPBUFSIZE 512 ++ ++unsigned int adomains = 0; ++long active_domains[MAX_OPROF_DOMAINS]; ++ ++static ssize_t adomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char tmpbuf[TMPBUFSIZE]; ++ char * startp = tmpbuf; ++ char * endp = tmpbuf; ++ int i; ++ unsigned long val; ++ ++ if (*offset) ++ return -EINVAL; ++ if (!count) ++ return 0; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ memset(tmpbuf, 0x0, TMPBUFSIZE); ++ ++ if (copy_from_user(tmpbuf, buf, count)) ++ return -EFAULT; ++ ++ for (i = 0; i < MAX_OPROF_DOMAINS; i++) ++ active_domains[i] = -1; ++ adomains = 0; ++ ++ while (1) { ++ val = simple_strtol(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp)) ++ endp++; ++ active_domains[adomains++] = val; ++ if (adomains >= MAX_OPROF_DOMAINS) ++ break; ++ startp = endp; ++ } ++ if (oprofile_set_active()) ++ return -EINVAL; ++ return count; ++} ++ ++static ssize_t adomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char tmpbuf[TMPBUFSIZE]; ++ size_t len = 0; ++ int i; ++ /* This is all screwed up if we run out of space */ ++ for (i = 0; i < adomains; i++) ++ len += snprintf(tmpbuf + len, TMPBUFSIZE - len, ++ "%u ", (unsigned int)active_domains[i]); ++ len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n"); ++ return simple_read_from_buffer((void __user *)buf, count, ++ offset, tmpbuf, len); ++} ++ ++ ++static struct file_operations active_domain_ops = { ++ .read = adomain_read, ++ .write = adomain_write, ++}; ++ + void oprofile_create_files(struct super_block * sb, struct dentry * root) + { + oprofilefs_create_file(sb, root, "enable", &enable_fops); + oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); ++ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); + oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); +diff -pruN ../pristine-linux-2.6.16/include/linux/oprofile.h ./include/linux/oprofile.h +--- ../pristine-linux-2.6.16/include/linux/oprofile.h 2006-03-20 05:53:29.000000000 +0000 ++++ ./include/linux/oprofile.h 2006-04-03 15:53:05.000000000 +0100 +@@ -16,6 +16,8 @@ + #include <linux/types.h> + #include <linux/spinlock.h> + #include <asm/atomic.h> ++ ++#include <xen/interface/xenoprof.h> + + struct super_block; + struct dentry; +@@ -27,6 +29,8 @@ struct oprofile_operations { + /* create any necessary configuration files in the oprofile fs. + * Optional. */ + int (*create_files)(struct super_block * sb, struct dentry * root); ++ /* setup active domains with Xen */ ++ int (*set_active)(int *active_domains, unsigned int adomains); + /* Do any necessary interrupt setup. Optional. */ + int (*setup)(void); + /* Do any necessary interrupt shutdown. Optional. */ diff -r 5719550652a1 -r 5cc367720223 tools/xm-test/grouptest/medium --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xm-test/grouptest/medium Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,25 @@ +block-create 09_block_attach_and_dettach_device_check_data_pos.test +block-list +console +destroy 01_destroy_basic_pos.test 02_destroy_noparm_neg.test 03_destroy_nonexist_neg.test 04_destroy_badparm_neg.test 05_destroy_byid_pos.test 06_destroy_dom0_neg.test +dmesg +domid +domname +enforce_dom0_cpus +help +info +list +memmax +memset 01_memset_basic_pos.test 02_memset_badparm_neg.test 04_memset_smallmem_pos.test +migrate +network-attach 03_network_attach_detach_multiple_pos.test +reboot +restore 02_restore_badparm_neg.test 03_restore_badfilename_neg.test 04_restore_withdevices_pos.test +save +sedf +shutdown +sysrq 01_sysrq_basic_neg.test 02_sysrq_sync_pos.test +unpause +vcpu-disable +vcpu-pin +vtpm diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/genapic/delivery.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/genapic/delivery.c Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,68 @@ +#include <xen/config.h> +#include <xen/irq.h> +#include <xen/sched.h> +#include <asm/current.h> +#include <asm/smp.h> +#include <asm/hardirq.h> +#include <mach_apic.h> + + +/* + * LOGICAL FLAT DELIVERY MODE (multicast via bitmask to <= 8 logical APIC IDs). + */ + +void init_apic_ldr_flat(void) +{ + unsigned long val; + + apic_write_around(APIC_DFR, APIC_DFR_FLAT); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); + apic_write_around(APIC_LDR, val); +} + +void clustered_apic_check_flat(void) +{ + printk("Enabling APIC mode: Flat. Using %d I/O APICs\n", nr_ioapics); +} + +cpumask_t target_cpus_flat(void) +{ + return cpu_online_map; +} + +unsigned int cpu_mask_to_apicid_flat(cpumask_t cpumask) +{ + return cpus_addr(cpumask)[0]; +} + + +/* + * PHYSICAL DELIVERY MODE (unicast to physical APIC IDs). + */ + +void init_apic_ldr_phys(void) +{ + unsigned long val; + apic_write_around(APIC_DFR, APIC_DFR_FLAT); + /* A dummy logical ID should be fine. We only deliver in phys mode. */ + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + apic_write_around(APIC_LDR, val); +} + +void clustered_apic_check_phys(void) +{ + printk("Enabling APIC mode: Phys. Using %d I/O APICs\n", nr_ioapics); +} + +cpumask_t target_cpus_phys(void) +{ + /* IRQs will get bound more accurately later. */ + return cpumask_of_cpu(0); +} + +unsigned int cpu_mask_to_apicid_phys(cpumask_t cpumask) +{ + /* As we are using single CPU as destination, pick only one CPU here */ + return cpu_physical_id(first_cpu(cpumask)); +} diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/oprofile/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/oprofile/Makefile Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,5 @@ +obj-y += xenoprof.o +obj-y += nmi_int.o +obj-y += op_model_p4.o +obj-y += op_model_ppro.o +obj-y += op_model_athlon.o diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/oprofile/nmi_int.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/oprofile/nmi_int.c Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,391 @@ +/** + * @file nmi_int.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified for Xen: by Aravind Menon & Jose Renato Santos + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include <xen/event.h> +#include <xen/types.h> +#include <xen/errno.h> +#include <xen/init.h> +#include <public/xen.h> +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/apic.h> +#include <asm/regs.h> +#include <asm/current.h> +#include <xen/delay.h> + +#include "op_counter.h" +#include "op_x86_model.h" + +static struct op_x86_model_spec const * model; +static struct op_msrs cpu_msrs[NR_CPUS]; +static unsigned long saved_lvtpc[NR_CPUS]; + +#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1) +extern int active_domains[MAX_OPROF_DOMAINS]; +extern unsigned int adomains; +extern struct domain *primary_profiler; +extern struct domain *adomain_ptrs[MAX_OPROF_DOMAINS]; +extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE]; +extern int is_active(struct domain *d); +extern int active_id(struct domain *d); +extern int is_profiled(struct domain *d); + +extern size_t strlcpy(char *dest, const char *src, size_t size); + + +int nmi_callback(struct cpu_user_regs *regs, int cpu) +{ + int xen_mode, ovf; + + ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs); + xen_mode = ring_0(regs); + if ( ovf && is_active(current->domain) && !xen_mode ) + send_guest_vcpu_virq(current, VIRQ_XENOPROF); + + return 1; +} + + +static void nmi_cpu_save_registers(struct op_msrs *msrs) +{ + unsigned int const nr_ctrs = model->num_counters; + unsigned int const nr_ctrls = model->num_controls; + struct op_msr *counters = msrs->counters; + struct op_msr *controls = msrs->controls; + unsigned int i; + + for (i = 0; i < nr_ctrs; ++i) { + rdmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } + + for (i = 0; i < nr_ctrls; ++i) { + rdmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } +} + + +static void nmi_save_registers(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + model->fill_in_addresses(msrs); + nmi_cpu_save_registers(msrs); +} + + +static void free_msrs(void) +{ + int i; + for (i = 0; i < NR_CPUS; ++i) { + xfree(cpu_msrs[i].counters); + cpu_msrs[i].counters = NULL; + xfree(cpu_msrs[i].controls); + cpu_msrs[i].controls = NULL; + } +} + + +static int allocate_msrs(void) +{ + int success = 1; + size_t controls_size = sizeof(struct op_msr) * model->num_controls; + size_t counters_size = sizeof(struct op_msr) * model->num_counters; + + int i; + for (i = 0; i < NR_CPUS; ++i) { + if (!test_bit(i, &cpu_online_map)) + continue; + + cpu_msrs[i].counters = xmalloc_bytes(counters_size); + if (!cpu_msrs[i].counters) { + success = 0; + break; + } + cpu_msrs[i].controls = xmalloc_bytes(controls_size); + if (!cpu_msrs[i].controls) { + success = 0; + break; + } + } + + if (!success) + free_msrs(); + + return success; +} + + +static void nmi_cpu_setup(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + model->setup_ctrs(msrs); +} + + +int nmi_setup_events(void) +{ + on_each_cpu(nmi_cpu_setup, NULL, 0, 1); + return 0; +} + +int nmi_reserve_counters(void) +{ + if (!allocate_msrs()) + return -ENOMEM; + + /* We walk a thin line between law and rape here. + * We need to be careful to install our NMI handler + * without actually triggering any NMIs as this will + * break the core code horrifically. + */ + if (reserve_lapic_nmi() < 0) { + free_msrs(); + return -EBUSY; + } + /* We need to serialize save and setup for HT because the subset + * of msrs are distinct for save and setup operations + */ + on_each_cpu(nmi_save_registers, NULL, 0, 1); + return 0; +} + +int nmi_enable_virq(void) +{ + set_nmi_callback(nmi_callback); + return 0; +} + + +void nmi_disable_virq(void) +{ + unset_nmi_callback(); +} + + +static void nmi_restore_registers(struct op_msrs * msrs) +{ + unsigned int const nr_ctrs = model->num_counters; + unsigned int const nr_ctrls = model->num_controls; + struct op_msr * counters = msrs->counters; + struct op_msr * controls = msrs->controls; + unsigned int i; + + for (i = 0; i < nr_ctrls; ++i) { + wrmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } + + for (i = 0; i < nr_ctrs; ++i) { + wrmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } +} + + +static void nmi_cpu_shutdown(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + nmi_restore_registers(msrs); +} + + +void nmi_release_counters(void) +{ + on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); + release_lapic_nmi(); + free_msrs(); +} + + +static void nmi_cpu_start(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs const * msrs = &cpu_msrs[cpu]; + saved_lvtpc[cpu] = apic_read(APIC_LVTPC); + apic_write(APIC_LVTPC, APIC_DM_NMI); + model->start(msrs); +} + + +int nmi_start(void) +{ + on_each_cpu(nmi_cpu_start, NULL, 0, 1); + return 0; +} + + +static void nmi_cpu_stop(void * dummy) +{ + unsigned int v; + int cpu = smp_processor_id(); + struct op_msrs const * msrs = &cpu_msrs[cpu]; + model->stop(msrs); + + /* restoring APIC_LVTPC can trigger an apic error because the delivery + * mode and vector nr combination can be illegal. That's by design: on + * power on apic lvt contain a zero vector nr which are legal only for + * NMI delivery mode. So inhibit apic err before restoring lvtpc + */ + if ( !(apic_read(APIC_LVTPC) & APIC_DM_NMI) + || (apic_read(APIC_LVTPC) & APIC_LVT_MASKED) ) + { + printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC)); + mdelay(5000); + } + v = apic_read(APIC_LVTERR); + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTPC, saved_lvtpc[cpu]); + apic_write(APIC_LVTERR, v); +} + + +void nmi_stop(void) +{ + on_each_cpu(nmi_cpu_stop, NULL, 0, 1); +} + + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +static int __init p4_init(char * cpu_type) +{ + __u8 cpu_model = current_cpu_data.x86_model; + + if (cpu_model > 4) + return 0; + +#ifndef CONFIG_SMP + strncpy (cpu_type, "i386/p4", XENOPROF_CPU_TYPE_SIZE - 1); + model = &op_p4_spec; + return 1; +#else + switch (smp_num_siblings) { + case 1: + strncpy (cpu_type, "i386/p4", + XENOPROF_CPU_TYPE_SIZE - 1); + model = &op_p4_spec; + return 1; + + case 2: + strncpy (cpu_type, "i386/p4-ht", + XENOPROF_CPU_TYPE_SIZE - 1); + model = &op_p4_ht2_spec; + return 1; + } +#endif + printk("Xenoprof ERROR: P4 HyperThreading detected with > 2 threads\n"); + + return 0; +} + + +static int __init ppro_init(char *cpu_type) +{ + __u8 cpu_model = current_cpu_data.x86_model; + + if (cpu_model > 0xd) + return 0; + + if (cpu_model == 9) { + strncpy (cpu_type, "i386/p6_mobile", XENOPROF_CPU_TYPE_SIZE - 1); + } else if (cpu_model > 5) { + strncpy (cpu_type, "i386/piii", XENOPROF_CPU_TYPE_SIZE - 1); + } else if (cpu_model > 2) { + strncpy (cpu_type, "i386/pii", XENOPROF_CPU_TYPE_SIZE - 1); + } else { + strncpy (cpu_type, "i386/ppro", XENOPROF_CPU_TYPE_SIZE - 1); + } + + model = &op_ppro_spec; + return 1; +} + +int nmi_init(int *num_events, int *is_primary, char *cpu_type) +{ + __u8 vendor = current_cpu_data.x86_vendor; + __u8 family = current_cpu_data.x86; + int prim = 0; + + if (!cpu_has_apic) + return -ENODEV; + + if (primary_profiler == NULL) { + /* For now, only dom0 can be the primary profiler */ + if (current->domain->domain_id == 0) { + primary_profiler = current->domain; + prim = 1; + } + } + + /* Make sure string is NULL terminated */ + cpu_type[XENOPROF_CPU_TYPE_SIZE - 1] = 0; + + switch (vendor) { + case X86_VENDOR_AMD: + /* Needs to be at least an Athlon (or hammer in 32bit mode) */ + + switch (family) { + default: + return -ENODEV; + case 6: + model = &op_athlon_spec; + strncpy (cpu_type, "i386/athlon", + XENOPROF_CPU_TYPE_SIZE - 1); + break; + case 0xf: + model = &op_athlon_spec; + /* Actually it could be i386/hammer too, but give + user space an consistent name. */ + strncpy (cpu_type, "x86-64/hammer", + XENOPROF_CPU_TYPE_SIZE - 1); + break; + } + break; + + case X86_VENDOR_INTEL: + switch (family) { + /* Pentium IV */ + case 0xf: + if (!p4_init(cpu_type)) + return -ENODEV; + break; + + /* A P6-class processor */ + case 6: + if (!ppro_init(cpu_type)) + return -ENODEV; + break; + + default: + return -ENODEV; + } + break; + + default: + return -ENODEV; + } + + *num_events = model->num_counters; + *is_primary = prim; + + return 0; +} + diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/oprofile/op_counter.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/oprofile/op_counter.h Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,29 @@ +/** + * @file op_counter.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +#define OP_MAX_COUNTER 8 + +/* Per-perfctr configuration as set via + * oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; +}; + +extern struct op_counter_config counter_config[]; + +#endif /* OP_COUNTER_H */ diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/oprofile/op_model_athlon.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/oprofile/op_model_athlon.c Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,168 @@ +/** + * @file op_model_athlon.h + * athlon / K7 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + */ + +#include <xen/types.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/processor.h> +#include <xen/sched.h> +#include <asm/regs.h> +#include <asm/current.h> + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_COUNTERS 4 +#define NUM_CONTROLS 4 + +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) +#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) +#define CTRL_CLEAR(x) (x &= (1<<21)) +#define CTRL_SET_ENABLE(val) (val |= 1<<20) +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) +#define CTRL_SET_UM(val, m) (val |= (m << 8)) +#define CTRL_SET_EVENT(val, e) (val |= e) + +static unsigned long reset_value[NUM_COUNTERS]; + +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip, + int mode, int event); + +static void athlon_fill_in_addresses(struct op_msrs * const msrs) +{ + msrs->counters[0].addr = MSR_K7_PERFCTR0; + msrs->counters[1].addr = MSR_K7_PERFCTR1; + msrs->counters[2].addr = MSR_K7_PERFCTR2; + msrs->counters[3].addr = MSR_K7_PERFCTR3; + + msrs->controls[0].addr = MSR_K7_EVNTSEL0; + msrs->controls[1].addr = MSR_K7_EVNTSEL1; + msrs->controls[2].addr = MSR_K7_EVNTSEL2; + msrs->controls[3].addr = MSR_K7_EVNTSEL3; +} + + +static void athlon_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < NUM_CONTROLS; ++i) { + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_WRITE(low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < NUM_COUNTERS; ++i) { + CTR_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + + CTR_WRITE(counter_config[i].count, msrs, i); + + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[i].user); + CTRL_SET_KERN(low, counter_config[i].kernel); + CTRL_SET_UM(low, counter_config[i].unit_mask); + CTRL_SET_EVENT(low, counter_config[i].event); + CTRL_WRITE(low, high, msrs, i); + } else { + reset_value[i] = 0; + } + } +} + + +static int athlon_check_ctrs(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs) + +{ + unsigned int low, high; + int i; + int ovf = 0; + unsigned long eip = regs->eip; + int mode = 0; + + if (guest_kernel_mode(current, regs)) + mode = 1; + else if (ring_0(regs)) + mode = 2; + + for (i = 0 ; i < NUM_COUNTERS; ++i) { + CTR_READ(low, high, msrs, i); + if (CTR_OVERFLOWED(low)) { + xenoprof_log_event(current, eip, mode, i); + CTR_WRITE(reset_value[i], msrs, i); + ovf = 1; + } + } + + /* See op_model_ppro.c */ + return ovf; +} + + +static void athlon_start(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (reset_value[i]) { + CTRL_READ(low, high, msrs, i); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } +} + + +static void athlon_stop(struct op_msrs const * const msrs) +{ + unsigned int low,high; + int i; + + /* Subtle: stop on all counters to avoid race with + * setting our pm callback */ + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + CTRL_READ(low, high, msrs, i); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } +} + + +struct op_x86_model_spec const op_athlon_spec = { + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &athlon_fill_in_addresses, + .setup_ctrs = &athlon_setup_ctrs, + .check_ctrs = &athlon_check_ctrs, + .start = &athlon_start, + .stop = &athlon_stop +}; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/oprofile/op_model_p4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/oprofile/op_model_p4.c Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,739 @@ +/** + * @file op_model_p4.c + * P4 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Graydon Hoare + */ + +#include <xen/types.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/processor.h> +#include <xen/sched.h> +#include <asm/regs.h> +#include <asm/current.h> + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_EVENTS 39 + +#define NUM_COUNTERS_NON_HT 8 +#define NUM_ESCRS_NON_HT 45 +#define NUM_CCCRS_NON_HT 18 +#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) + +#define NUM_COUNTERS_HT2 4 +#define NUM_ESCRS_HT2 23 +#define NUM_CCCRS_HT2 9 +#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) + +static unsigned int num_counters = NUM_COUNTERS_NON_HT; + + +/* this has to be checked dynamically since the + hyper-threadedness of a chip is discovered at + kernel boot-time. */ +static inline void setup_num_counters(void) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + num_counters = NUM_COUNTERS_HT2; +#endif +} + +static int inline addr_increment(void) +{ +#ifdef CONFIG_SMP + return smp_num_siblings == 2 ? 2 : 1; +#else + return 1; +#endif +} + + +/* tables to simulate simplified hardware view of p4 registers */ +struct p4_counter_binding { + int virt_counter; + int counter_address; + int cccr_address; +}; + +struct p4_event_binding { + int escr_select; /* value to put in CCCR */ + int event_select; /* value to put in ESCR */ + struct { + int virt_counter; /* for this counter... */ + int escr_address; /* use this ESCR */ + } bindings[2]; +}; + +/* nb: these CTR_* defines are a duplicate of defines in + event/i386.p4*events. */ + + +#define CTR_BPU_0 (1 << 0) +#define CTR_MS_0 (1 << 1) +#define CTR_FLAME_0 (1 << 2) +#define CTR_IQ_4 (1 << 3) +#define CTR_BPU_2 (1 << 4) +#define CTR_MS_2 (1 << 5) +#define CTR_FLAME_2 (1 << 6) +#define CTR_IQ_5 (1 << 7) + +static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { + { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, + { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, + { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, + { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, + { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, + { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, + { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, + { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } +}; + +#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT + +/* All cccr we don't use. */ +static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { + MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, + MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, + MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, + MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, + MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 +}; + +/* p4 event codes in libop/op_event.h are indices into this table. */ + +static struct p4_event_binding p4_events[NUM_EVENTS] = { + + { /* BRANCH_RETIRED */ + 0x05, 0x06, + { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, + {CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* MISPRED_BRANCH_RETIRED */ + 0x04, 0x03, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* TC_DELIVER_MODE */ + 0x01, 0x01, + { { CTR_MS_0, MSR_P4_TC_ESCR0}, + { CTR_MS_2, MSR_P4_TC_ESCR1} } + }, + + { /* BPU_FETCH_REQUEST */ + 0x00, 0x03, + { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, + { CTR_BPU_2, MSR_P4_BPU_ESCR1} } + }, + + { /* ITLB_REFERENCE */ + 0x03, 0x18, + { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, + { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } + }, + + { /* MEMORY_CANCEL */ + 0x05, 0x02, + { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, + { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } + }, + + { /* MEMORY_COMPLETE */ + 0x02, 0x08, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* LOAD_PORT_REPLAY */ + 0x02, 0x04, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* STORE_PORT_REPLAY */ + 0x02, 0x05, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* MOB_LOAD_REPLAY */ + 0x02, 0x03, + { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, + { CTR_BPU_2, MSR_P4_MOB_ESCR1} } + }, + + { /* PAGE_WALK_TYPE */ + 0x04, 0x01, + { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, + { CTR_BPU_2, MSR_P4_PMH_ESCR1} } + }, + + { /* BSQ_CACHE_REFERENCE */ + 0x07, 0x0c, + { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, + { CTR_BPU_2, MSR_P4_BSU_ESCR1} } + }, + + { /* IOQ_ALLOCATION */ + 0x06, 0x03, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { 0, 0 } } + }, + + { /* IOQ_ACTIVE_ENTRIES */ + 0x06, 0x1a, + { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, + { 0, 0 } } + }, + + { /* FSB_DATA_ACTIVITY */ + 0x06, 0x17, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { CTR_BPU_2, MSR_P4_FSB_ESCR1} } + }, + + { /* BSQ_ALLOCATION */ + 0x07, 0x05, + { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, + { 0, 0 } } + }, + + { /* BSQ_ACTIVE_ENTRIES */ + 0x07, 0x06, + { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, + { 0, 0 } } + }, + + { /* X87_ASSIST */ + 0x05, 0x03, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* SSE_INPUT_ASSIST */ + 0x01, 0x34, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* PACKED_SP_UOP */ + 0x01, 0x08, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* PACKED_DP_UOP */ + 0x01, 0x0c, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* SCALAR_SP_UOP */ + 0x01, 0x0a, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* SCALAR_DP_UOP */ + 0x01, 0x0e, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* 64BIT_MMX_UOP */ + 0x01, 0x02, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* 128BIT_MMX_UOP */ + 0x01, 0x1a, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* X87_FP_UOP */ + 0x01, 0x04, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* X87_SIMD_MOVES_UOP */ + 0x01, 0x2e, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* MACHINE_CLEAR */ + 0x05, 0x02, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* GLOBAL_POWER_EVENTS */ + 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { CTR_BPU_2, MSR_P4_FSB_ESCR1} } + }, + + { /* TC_MS_XFER */ + 0x00, 0x05, + { { CTR_MS_0, MSR_P4_MS_ESCR0}, + { CTR_MS_2, MSR_P4_MS_ESCR1} } + }, + + { /* UOP_QUEUE_WRITES */ + 0x00, 0x09, + { { CTR_MS_0, MSR_P4_MS_ESCR0}, + { CTR_MS_2, MSR_P4_MS_ESCR1} } + }, + + { /* FRONT_END_EVENT */ + 0x05, 0x08, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* EXECUTION_EVENT */ + 0x05, 0x0c, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* REPLAY_EVENT */ + 0x05, 0x09, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* INSTR_RETIRED */ + 0x04, 0x02, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* UOPS_RETIRED */ + 0x04, 0x01, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* UOP_TYPE */ + 0x02, 0x02, + { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, + { CTR_IQ_5, MSR_P4_RAT_ESCR1} } + }, + + { /* RETIRED_MISPRED_BRANCH_TYPE */ + 0x02, 0x05, + { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, + { CTR_MS_2, MSR_P4_TBPU_ESCR1} } + }, + + { /* RETIRED_BRANCH_TYPE */ + 0x02, 0x04, + { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, + { CTR_MS_2, MSR_P4_TBPU_ESCR1} } + } +}; + + +#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) + +#define ESCR_RESERVED_BITS 0x80000003 +#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) +#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) +#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) +#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) +#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) +#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) +#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) +#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) +#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) + +#define CCCR_RESERVED_BITS 0x38030FFF +#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) +#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) +#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) +#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) +#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) +#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) +#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) +#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) +#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) + +#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) +#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) +#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) + + +/* this assigns a "stagger" to the current CPU, which is used throughout + the code in this module as an extra array offset, to select the "even" + or "odd" part of all the divided resources. */ +static unsigned int get_stagger(void) +{ +#ifdef CONFIG_SMP + int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu])); +#endif + return 0; +} + + +/* finally, mediate access to a real hardware counter + by passing a "virtual" counter numer to this macro, + along with your stagger setting. */ +#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) + +static unsigned long reset_value[NUM_COUNTERS_NON_HT]; + + +static void p4_fill_in_addresses(struct op_msrs * const msrs) +{ + unsigned int i; + unsigned int addr, stag; + + setup_num_counters(); + stag = get_stagger(); + + /* the counter registers we pay attention to */ + for (i = 0; i < num_counters; ++i) { + msrs->counters[i].addr = + p4_counters[VIRT_CTR(stag, i)].counter_address; + } + + /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ + + /* 18 CCCR registers */ + for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; + addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + /* 43 ESCR registers in three or four discontiguous group */ + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 + * to avoid special case in nmi_{save|restore}_registers() */ + if (boot_cpu_data.x86_model >= 0x3) { + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + } else { + for (addr = MSR_P4_IQ_ESCR0 + stag; + addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + } + + for (addr = MSR_P4_RAT_ESCR0 + stag; + addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + for (addr = MSR_P4_MS_ESCR0 + stag; + addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + for (addr = MSR_P4_IX_ESCR0 + stag; + addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + /* there are 2 remaining non-contiguously located ESCRs */ + + if (num_counters == NUM_COUNTERS_NON_HT) { + /* standard non-HT CPUs handle both remaining ESCRs*/ + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + + } else if (stag == 0) { + /* HT CPUs give the first remainder to the even thread, as + the 32nd control register */ + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + + } else { + /* and two copies of the second to the odd thread, + for the 22st and 23nd control registers */ + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + } +} + + +static void pmc_setup_one_p4_counter(unsigned int ctr) +{ + int i; + int const maxbind = 2; + unsigned int cccr = 0; + unsigned int escr = 0; + unsigned int high = 0; + unsigned int counter_bit; + struct p4_event_binding *ev = NULL; + unsigned int stag; + + stag = get_stagger(); + + /* convert from counter *number* to counter *bit* */ + counter_bit = 1 << VIRT_CTR(stag, ctr); + + /* find our event binding structure. */ + if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { + printk(KERN_ERR + "oprofile: P4 event code 0x%lx out of range\n", + counter_config[ctr].event); + return; + } + + ev = &(p4_events[counter_config[ctr].event - 1]); + + for (i = 0; i < maxbind; i++) { + if (ev->bindings[i].virt_counter & counter_bit) { + + /* modify ESCR */ + ESCR_READ(escr, high, ev, i); + ESCR_CLEAR(escr); + if (stag == 0) { + ESCR_SET_USR_0(escr, counter_config[ctr].user); + ESCR_SET_OS_0(escr, counter_config[ctr].kernel); + } else { + ESCR_SET_USR_1(escr, counter_config[ctr].user); + ESCR_SET_OS_1(escr, counter_config[ctr].kernel); + } + ESCR_SET_EVENT_SELECT(escr, ev->event_select); + ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); + ESCR_WRITE(escr, high, ev, i); + + /* modify CCCR */ + CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); + CCCR_CLEAR(cccr); + CCCR_SET_REQUIRED_BITS(cccr); + CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); + if (stag == 0) { + CCCR_SET_PMI_OVF_0(cccr); + } else { + CCCR_SET_PMI_OVF_1(cccr); + } + CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); + return; + } + } + + printk(KERN_ERR + "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", + counter_config[ctr].event, stag, ctr); +} + + +static void p4_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int i; + unsigned int low, high; + unsigned int addr; + unsigned int stag; + + stag = get_stagger(); + + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (! MISC_PMC_ENABLED_P(low)) { + printk(KERN_ERR "oprofile: P4 PMC not available\n"); + return; + } + + /* clear the cccrs we will use */ + for (i = 0 ; i < num_counters ; i++) { + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); + CCCR_CLEAR(low); + CCCR_SET_REQUIRED_BITS(low); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); + } + + /* clear cccrs outside our concern */ + for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { + rdmsr(p4_unused_cccr[i], low, high); + CCCR_CLEAR(low); + CCCR_SET_REQUIRED_BITS(low); + wrmsr(p4_unused_cccr[i], low, high); + } + + /* clear all escrs (including those outside our concern) */ + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { + wrmsr(addr, 0, 0); + } + + /* On older models clear also MSR_P4_IQ_ESCR0/1 */ + if (boot_cpu_data.x86_model < 0x3) { + wrmsr(MSR_P4_IQ_ESCR0, 0, 0); + wrmsr(MSR_P4_IQ_ESCR1, 0, 0); + } + + for (addr = MSR_P4_RAT_ESCR0 + stag; + addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { + wrmsr(addr, 0, 0); + } + + for (addr = MSR_P4_MS_ESCR0 + stag; + addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ + wrmsr(addr, 0, 0); + } + + for (addr = MSR_P4_IX_ESCR0 + stag; + addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ + wrmsr(addr, 0, 0); + } + + if (num_counters == NUM_COUNTERS_NON_HT) { + wrmsr(MSR_P4_CRU_ESCR4, 0, 0); + wrmsr(MSR_P4_CRU_ESCR5, 0, 0); + } else if (stag == 0) { + wrmsr(MSR_P4_CRU_ESCR4, 0, 0); + } else { + wrmsr(MSR_P4_CRU_ESCR5, 0, 0); + } + + /* setup all counters */ + for (i = 0 ; i < num_counters ; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + pmc_setup_one_p4_counter(i); + CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); + } else { + reset_value[i] = 0; + } + } +} + + +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip, + int mode, int event); + +static int p4_check_ctrs(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs) +{ + unsigned long ctr, low, high, stag, real; + int i; + int ovf = 0; + unsigned long eip = regs->eip; + int mode = 0; + + if (guest_kernel_mode(current, regs)) + mode = 1; + else if (ring_0(regs)) + mode = 2; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + + if (!reset_value[i]) + continue; + + /* + * there is some eccentricity in the hardware which + * requires that we perform 2 extra corrections: + * + * - check both the CCCR:OVF flag for overflow and the + * counter high bit for un-flagged overflows. + * + * - write the counter back twice to ensure it gets + * updated properly. + * + * the former seems to be related to extra NMIs happening + * during the current NMI; the latter is reported as errata + * N15 in intel doc 249199-029, pentium 4 specification + * update, though their suggested work-around does not + * appear to solve the problem. + */ + + real = VIRT_CTR(stag, i); + + CCCR_READ(low, high, real); + CTR_READ(ctr, high, real); + if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { + xenoprof_log_event(current, eip, mode, i); + CTR_WRITE(reset_value[i], real); + CCCR_CLEAR_OVF(low); + CCCR_WRITE(low, high, real); + CTR_WRITE(reset_value[i], real); + ovf = 1; + } + } + + /* P4 quirk: you have to re-unmask the apic vector */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + + return ovf; +} + + +static void p4_start(struct op_msrs const * const msrs) +{ + unsigned int low, high, stag; + int i; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + if (!reset_value[i]) + continue; + CCCR_READ(low, high, VIRT_CTR(stag, i)); + CCCR_SET_ENABLE(low); + CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + } +} + + +static void p4_stop(struct op_msrs const * const msrs) +{ + unsigned int low, high, stag; + int i; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + CCCR_READ(low, high, VIRT_CTR(stag, i)); + CCCR_SET_DISABLE(low); + CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + } +} + + +#ifdef CONFIG_SMP +struct op_x86_model_spec const op_p4_ht2_spec = { + .num_counters = NUM_COUNTERS_HT2, + .num_controls = NUM_CONTROLS_HT2, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop +}; +#endif + +struct op_x86_model_spec const op_p4_spec = { + .num_counters = NUM_COUNTERS_NON_HT, + .num_controls = NUM_CONTROLS_NON_HT, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop +}; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/oprofile/op_model_ppro.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/oprofile/op_model_ppro.c Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,153 @@ +/** + * @file op_model_ppro.h + * pentium pro / P6 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + */ + +#include <xen/types.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/processor.h> +#include <xen/sched.h> +#include <asm/regs.h> +#include <asm/current.h> + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_COUNTERS 2 +#define NUM_CONTROLS 2 + +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) +#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) +#define CTRL_CLEAR(x) (x &= (1<<21)) +#define CTRL_SET_ENABLE(val) (val |= 1<<20) +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) +#define CTRL_SET_UM(val, m) (val |= (m << 8)) +#define CTRL_SET_EVENT(val, e) (val |= e) + +static unsigned long reset_value[NUM_COUNTERS]; + +static void ppro_fill_in_addresses(struct op_msrs * const msrs) +{ + msrs->counters[0].addr = MSR_P6_PERFCTR0; + msrs->counters[1].addr = MSR_P6_PERFCTR1; + + msrs->controls[0].addr = MSR_P6_EVNTSEL0; + msrs->controls[1].addr = MSR_P6_EVNTSEL1; +} + + +static void ppro_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < NUM_CONTROLS; ++i) { + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_WRITE(low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < NUM_COUNTERS; ++i) { + CTR_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + + CTR_WRITE(counter_config[i].count, msrs, i); + + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[i].user); + CTRL_SET_KERN(low, counter_config[i].kernel); + CTRL_SET_UM(low, counter_config[i].unit_mask); + CTRL_SET_EVENT(low, counter_config[i].event); + CTRL_WRITE(low, high, msrs, i); + } + } +} + + +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip, + int mode, int event); + +static int ppro_check_ctrs(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs) +{ + unsigned int low, high; + int i; + int ovf = 0; + unsigned long eip = regs->eip; + int mode = 0; + + if ( guest_kernel_mode(current, regs) ) + mode = 1; + else if ( ring_0(regs) ) + mode = 2; + + for (i = 0 ; i < NUM_COUNTERS; ++i) { + CTR_READ(low, high, msrs, i); + if (CTR_OVERFLOWED(low)) { + xenoprof_log_event(current, eip, mode, i); + CTR_WRITE(reset_value[i], msrs, i); + ovf = 1; + } + } + + /* Only P6 based Pentium M need to re-unmask the apic vector but it + * doesn't hurt other P6 variant */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + + return ovf; +} + + +static void ppro_start(struct op_msrs const * const msrs) +{ + unsigned int low,high; + CTRL_READ(low, high, msrs, 0); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, 0); +} + + +static void ppro_stop(struct op_msrs const * const msrs) +{ + unsigned int low,high; + CTRL_READ(low, high, msrs, 0); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, 0); +} + + +struct op_x86_model_spec const op_ppro_spec = { + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &ppro_fill_in_addresses, + .setup_ctrs = &ppro_setup_ctrs, + .check_ctrs = &ppro_check_ctrs, + .start = &ppro_start, + .stop = &ppro_stop +}; diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/oprofile/op_x86_model.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/oprofile/op_x86_model.h Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,51 @@ +/** + * @file op_x86_model.h + * interface to x86 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Graydon Hoare + */ + +#ifndef OP_X86_MODEL_H +#define OP_X86_MODEL_H + +struct op_saved_msr { + unsigned int high; + unsigned int low; +}; + +struct op_msr { + unsigned long addr; + struct op_saved_msr saved; +}; + +struct op_msrs { + struct op_msr * counters; + struct op_msr * controls; +}; + +struct pt_regs; + +/* The model vtable abstracts the differences between + * various x86 CPU model's perfctr support. + */ +struct op_x86_model_spec { + unsigned int const num_counters; + unsigned int const num_controls; + void (*fill_in_addresses)(struct op_msrs * const msrs); + void (*setup_ctrs)(struct op_msrs const * const msrs); + int (*check_ctrs)(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs); + void (*start)(struct op_msrs const * const msrs); + void (*stop)(struct op_msrs const * const msrs); +}; + +extern struct op_x86_model_spec const op_ppro_spec; +extern struct op_x86_model_spec const op_p4_spec; +extern struct op_x86_model_spec const op_p4_ht2_spec; +extern struct op_x86_model_spec const op_athlon_spec; + +#endif /* OP_X86_MODEL_H */ diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/oprofile/xenoprof.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/oprofile/xenoprof.c Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2005 Hewlett-Packard Co. + * written by Aravind Menon & Jose Renato Santos + * (email: xenoprof@xxxxxxxxxxxxx) + */ + +#include <xen/sched.h> +#include <public/xenoprof.h> + +#include "op_counter.h" + +/* Limit amount of pages used for shared buffer (per domain) */ +#define MAX_OPROF_SHARED_PAGES 32 + +int active_domains[MAX_OPROF_DOMAINS]; +int active_ready[MAX_OPROF_DOMAINS]; +unsigned int adomains; +unsigned int activated; +struct domain *primary_profiler; +int xenoprof_state = XENOPROF_IDLE; + +u64 total_samples; +u64 invalid_buffer_samples; +u64 corrupted_buffer_samples; +u64 lost_samples; +u64 active_samples; +u64 idle_samples; +u64 others_samples; + + +extern int nmi_init(int *num_events, int *is_primary, char *cpu_type); +extern int nmi_reserve_counters(void); +extern int nmi_setup_events(void); +extern int nmi_enable_virq(void); +extern int nmi_start(void); +extern void nmi_stop(void); +extern void nmi_disable_virq(void); +extern void nmi_release_counters(void); + +int is_active(struct domain *d) +{ + struct xenoprof *x = d->xenoprof; + return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_ACTIVE)); +} + +int is_profiled(struct domain *d) +{ + return is_active(d); +} + +static void xenoprof_reset_stat(void) +{ + total_samples = 0; + invalid_buffer_samples = 0; + corrupted_buffer_samples = 0; + lost_samples = 0; + active_samples = 0; + idle_samples = 0; + others_samples = 0; +} + +static void xenoprof_reset_buf(struct domain *d) +{ + int j; + struct xenoprof_buf *buf; + + if ( d->xenoprof == NULL ) + { + printk("xenoprof_reset_buf: ERROR - Unexpected " + "Xenoprof NULL pointer \n"); + return; + } + + for ( j = 0; j < MAX_VIRT_CPUS; j++ ) + { + buf = d->xenoprof->vcpu[j].buffer; + if ( buf != NULL ) + { + buf->event_head = 0; + buf->event_tail = 0; + } + } +} + +int active_index(struct domain *d) +{ + int i, id = d->domain_id; + + for ( i = 0; i < adomains; i++ ) + if ( active_domains[i] == id ) + return i; + + return -1; +} + +int set_active(struct domain *d) +{ + int ind; + struct xenoprof *x; + + ind = active_index(d); + if ( ind < 0 ) + return -EPERM; + + x = d->xenoprof; + if ( x == NULL ) + return -EPERM; + + x->domain_ready = 1; + x->domain_type = XENOPROF_DOMAIN_ACTIVE; + active_ready[ind] = 1; + activated++; + + return 0; +} + +int reset_active(struct domain *d) +{ + int ind; + struct xenoprof *x; + + ind = active_index(d); + if ( ind < 0 ) + return -EPERM; + + x = d->xenoprof; + if ( x == NULL ) + return -EPERM; + + x->domain_ready = 0; + x->domain_type = XENOPROF_DOMAIN_IGNORED; + active_ready[ind] = 0; + activated--; + if ( activated <= 0 ) + adomains = 0; + + return 0; +} + +int set_active_domains(int num) +{ + int primary; + int i; + struct domain *d; + + /* Reset any existing active domains from previous runs. */ + for ( i = 0; i < adomains; i++ ) + { + if ( active_ready[i] ) + { + d = find_domain_by_id(active_domains[i]); + if ( d != NULL ) + { + reset_active(d); + put_domain(d); + } + } + } + + adomains = num; + + /* Add primary profiler to list of active domains if not there yet */ + primary = active_index(primary_profiler); + if ( primary == -1 ) + { + /* Return if there is no space left on list. */ + if ( num >= MAX_OPROF_DOMAINS ) + return -E2BIG; + active_domains[num] = primary_profiler->domain_id; + num++; + } + + adomains = num; + activated = 0; + + for ( i = 0; i < adomains; i++ ) + active_ready[i] = 0; + + return 0; +} + +void xenoprof_log_event( + struct vcpu *vcpu, unsigned long eip, int mode, int event) +{ + struct xenoprof_vcpu *v; + struct xenoprof_buf *buf; + int head; + int tail; + int size; + + + total_samples++; + + /* ignore samples of un-monitored domains */ + /* Count samples in idle separate from other unmonitored domains */ + if ( !is_profiled(vcpu->domain) ) + { + others_samples++; + return; + } + + v = &vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id]; + + /* Sanity check. Should never happen */ + if ( v->buffer == NULL ) + { + invalid_buffer_samples++; + return; + } + + buf = vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id].buffer; + + head = buf->event_head; + tail = buf->event_tail; + size = v->event_size; + + /* make sure indexes in shared buffer are sane */ + if ( (head < 0) || (head >= size) || (tail < 0) || (tail >= size) ) + { + corrupted_buffer_samples++; + return; + } + + if ( (head == tail - 1) || (head == size - 1 && tail == 0) ) + { + buf->lost_samples++; + lost_samples++; + } + else + { + buf->event_log[head].eip = eip; + buf->event_log[head].mode = mode; + buf->event_log[head].event = event; + head++; + if ( head >= size ) + head = 0; + buf->event_head = head; + active_samples++; + if ( mode == 0 ) + buf->user_samples++; + else if ( mode == 1 ) + buf->kernel_samples++; + else + buf->xen_samples++; + } +} + +char *alloc_xenoprof_buf(struct domain *d, int npages) +{ + char *rawbuf; + int i, order; + + /* allocate pages to store sample buffer shared with domain */ + order = get_order_from_pages(npages); + rawbuf = alloc_xenheap_pages(order); + if ( rawbuf == NULL ) + { + printk("alloc_xenoprof_buf(): memory allocation failed\n"); + return 0; + } + + /* Share pages so that kernel can map it */ + for ( i = 0; i < npages; i++ ) + share_xen_page_with_guest( + virt_to_page(rawbuf + i * PAGE_SIZE), + d, XENSHARE_writable); + + return rawbuf; +} + +int alloc_xenoprof_struct(struct domain *d, int max_samples) +{ + struct vcpu *v; + int nvcpu, npages, bufsize, max_bufsize; + int i; + + d->xenoprof = xmalloc(struct xenoprof); + + if ( d->xenoprof == NULL ) + { + printk ("alloc_xenoprof_struct(): memory " + "allocation (xmalloc) failed\n"); + return -ENOMEM; + } + + memset(d->xenoprof, 0, sizeof(*d->xenoprof)); + + nvcpu = 0; + for_each_vcpu ( d, v ) + nvcpu++; + + /* reduce buffer size if necessary to limit pages allocated */ + bufsize = sizeof(struct xenoprof_buf) + + (max_samples - 1) * sizeof(struct event_log); + max_bufsize = (MAX_OPROF_SHARED_PAGES * PAGE_SIZE) / nvcpu; + if ( bufsize > max_bufsize ) + { + bufsize = max_bufsize; + max_samples = ( (max_bufsize - sizeof(struct xenoprof_buf)) / + sizeof(struct event_log) ) + 1; + } + + npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1; + d->xenoprof->rawbuf = alloc_xenoprof_buf(d, npages); + if ( d->xenoprof->rawbuf == NULL ) + { + xfree(d->xenoprof); + d->xenoprof = NULL; + return -ENOMEM; + } + + d->xenoprof->npages = npages; + d->xenoprof->nbuf = nvcpu; + d->xenoprof->bufsize = bufsize; + d->xenoprof->domain_ready = 0; + d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED; + + /* Update buffer pointers for active vcpus */ + i = 0; + for_each_vcpu ( d, v ) + { + d->xenoprof->vcpu[v->vcpu_id].event_size = max_samples; + d->xenoprof->vcpu[v->vcpu_id].buffer = + (struct xenoprof_buf *)&d->xenoprof->rawbuf[i * bufsize]; + d->xenoprof->vcpu[v->vcpu_id].buffer->event_size = max_samples; + d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id = v->vcpu_id; + + i++; + /* in the unlikely case that the number of active vcpus changes */ + if ( i >= nvcpu ) + break; + } + + return 0; +} + +void free_xenoprof_pages(struct domain *d) +{ + struct xenoprof *x; + int order; + + x = d->xenoprof; + if ( x == NULL ) + return; + + if ( x->rawbuf != NULL ) + { + order = get_order_from_pages(x->npages); + free_xenheap_pages(x->rawbuf, order); + } + + xfree(x); + d->xenoprof = NULL; +} + +int xenoprof_init(int max_samples, xenoprof_init_result_t *init_result) +{ + xenoprof_init_result_t result; + int is_primary, num_events; + struct domain *d = current->domain; + int ret; + + ret = nmi_init(&num_events, &is_primary, result.cpu_type); + if ( is_primary ) + primary_profiler = current->domain; + + if ( ret < 0 ) + goto err; + + /* + * We allocate xenoprof struct and buffers only at first time xenoprof_init + * is called. Memory is then kept until domain is destroyed. + */ + if ( (d->xenoprof == NULL) && + ((ret = alloc_xenoprof_struct(d, max_samples)) < 0) ) + goto err; + + xenoprof_reset_buf(d); + + d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED; + d->xenoprof->domain_ready = 0; + d->xenoprof->is_primary = is_primary; + + result.is_primary = is_primary; + result.num_events = num_events; + result.nbuf = d->xenoprof->nbuf; + result.bufsize = d->xenoprof->bufsize; + result.buf_maddr = __pa(d->xenoprof->rawbuf); + + if ( copy_to_user((void *)init_result, (void *)&result, sizeof(result)) ) + { + ret = -EFAULT; + goto err; + } + + return ret; + + err: + if ( primary_profiler == current->domain ) + primary_profiler = NULL; + return ret; +} + +#define PRIV_OP(op) ( (op == XENOPROF_set_active) \ + || (op == XENOPROF_reserve_counters) \ + || (op == XENOPROF_setup_events) \ + || (op == XENOPROF_start) \ + || (op == XENOPROF_stop) \ + || (op == XENOPROF_release_counters) \ + || (op == XENOPROF_shutdown)) + +int do_xenoprof_op(int op, unsigned long arg1, unsigned long arg2) +{ + int ret = 0; + + if ( PRIV_OP(op) && (current->domain != primary_profiler) ) + { + printk("xenoprof: dom %d denied privileged operation %d\n", + current->domain->domain_id, op); + return -EPERM; + } + + switch ( op ) + { + case XENOPROF_init: + ret = xenoprof_init((int)arg1, (xenoprof_init_result_t *)arg2); + break; + + case XENOPROF_set_active: + if ( xenoprof_state != XENOPROF_IDLE ) + return -EPERM; + if ( arg2 > MAX_OPROF_DOMAINS ) + return -E2BIG; + if ( copy_from_user((void *)&active_domains, + (void *)arg1, arg2*sizeof(int)) ) + return -EFAULT; + ret = set_active_domains(arg2); + break; + + case XENOPROF_reserve_counters: + if ( xenoprof_state != XENOPROF_IDLE ) + return -EPERM; + ret = nmi_reserve_counters(); + if ( !ret ) + xenoprof_state = XENOPROF_COUNTERS_RESERVED; + break; + + case XENOPROF_setup_events: + if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED ) + return -EPERM; + if ( adomains == 0 ) + set_active_domains(0); + + if ( copy_from_user((void *)&counter_config, (void *)arg1, + arg2 * sizeof(struct op_counter_config)) ) + return -EFAULT; + ret = nmi_setup_events(); + if ( !ret ) + xenoprof_state = XENOPROF_READY; + break; + + case XENOPROF_enable_virq: + if ( current->domain == primary_profiler ) + { + nmi_enable_virq(); + xenoprof_reset_stat(); + } + xenoprof_reset_buf(current->domain); + ret = set_active(current->domain); + break; + + case XENOPROF_start: + ret = -EPERM; + if ( (xenoprof_state == XENOPROF_READY) && + (activated == adomains) ) + ret = nmi_start(); + + if ( ret == 0 ) + xenoprof_state = XENOPROF_PROFILING; + break; + + case XENOPROF_stop: + if ( xenoprof_state != XENOPROF_PROFILING ) + return -EPERM; + nmi_stop(); + xenoprof_state = XENOPROF_READY; + break; + + case XENOPROF_disable_virq: + if ( (xenoprof_state == XENOPROF_PROFILING) && + (is_active(current->domain)) ) + return -EPERM; + ret = reset_active(current->domain); + break; + + case XENOPROF_release_counters: + ret = -EPERM; + if ( (xenoprof_state == XENOPROF_COUNTERS_RESERVED) || + (xenoprof_state == XENOPROF_READY) ) + { + xenoprof_state = XENOPROF_IDLE; + nmi_release_counters(); + nmi_disable_virq(); + ret = 0; + } + break; + + case XENOPROF_shutdown: + ret = -EPERM; + if ( xenoprof_state == XENOPROF_IDLE ) + { + activated = 0; + adomains=0; + primary_profiler = NULL; + ret = 0; + } + break; + + default: + ret = -EINVAL; + } + + if ( ret < 0 ) + printk("xenoprof: operation %d failed for dom %d (status : %d)\n", + op, current->domain->domain_id, ret); + + return ret; +} diff -r 5719550652a1 -r 5cc367720223 xen/arch/x86/shutdown.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/shutdown.c Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,342 @@ +/****************************************************************************** + * arch/x86/shutdown.c + * + * x86-specific shutdown handling. + */ + +#include <xen/config.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/smp.h> +#include <xen/delay.h> +#include <xen/dmi.h> +#include <asm/regs.h> +#include <asm/mc146818rtc.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/mpspec.h> +#include <xen/irq.h> +#include <xen/console.h> +#include <asm/msr.h> + +/* opt_noreboot: If true, machine will need manual reset on error. */ +static int opt_noreboot = 0; +boolean_param("noreboot", opt_noreboot); + +/* reboot_str: comma-separated list of reboot options. */ +static char __initdata reboot_str[10] = ""; +string_param("reboot", reboot_str); + +static long no_idt[2]; +static int reboot_mode; + +static inline void kb_wait(void) +{ + int i; + + for ( i = 0; i < 0x10000; i++ ) + if ( (inb_p(0x64) & 0x02) == 0 ) + break; +} + +void __attribute__((noreturn)) __machine_halt(void *unused) +{ + for ( ; ; ) + __asm__ __volatile__ ( "hlt" ); +} + +void machine_halt(void) +{ + watchdog_disable(); + console_start_sync(); + smp_call_function(__machine_halt, NULL, 1, 0); + __machine_halt(NULL); +} + +#ifdef __i386__ + +static int reboot_thru_bios; + +/* The following code and data reboots the machine by switching to real + mode and jumping to the BIOS reset entry point, as if the CPU has + really been reset. The previous version asked the keyboard + controller to pulse the CPU reset line, which is more thorough, but + doesn't work with at least one type of 486 motherboard. It is easy + to stop this code working; hence the copious comments. */ + +static unsigned long long +real_mode_gdt_entries [3] = +{ + 0x0000000000000000ULL, /* Null descriptor */ + 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ +}; + +static const struct +{ + unsigned short size __attribute__ ((packed)); + unsigned long long * base __attribute__ ((packed)); +} +real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries }, +real_mode_idt = { 0x3ff, NULL }; + + +/* This is 16-bit protected mode code to disable paging and the cache, + switch to real mode and jump to the BIOS reset code. + + The instruction that switches to real mode by writing to CR0 must be + followed immediately by a far jump instruction, which set CS to a + valid value for real mode, and flushes the prefetch queue to avoid + running instructions that have already been decoded in protected + mode. + + Clears all the flags except ET, especially PG (paging), PE + (protected-mode enable) and TS (task switch for coprocessor state + save). Flushes the TLB after paging has been disabled. Sets CD and + NW, to disable the cache on a 486, and invalidates the cache. This + is more like the state of a 486 after reset. I don't know if + something else should be done for other chips. + + More could be done here to set up the registers as if a CPU reset had + occurred; hopefully real BIOSs don't assume much. */ + +static const unsigned char real_mode_switch [] = +{ + 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ + 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ + 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */ + 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */ + 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */ + 0x0f, 0x20, 0xc2, /* movl %cr0,%edx */ + 0x66, 0x81, 0xe2, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%edx */ + 0x74, 0x02, /* jz f */ + 0x0f, 0x09, /* wbinvd */ + 0x24, 0x10, /* f: andb $0x10,al */ + 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ +}; +#define MAX_LENGTH 0x40 +static const unsigned char jump_to_bios [] = +{ + 0xea, 0xf0, 0xff, 0x00, 0xf0 /* ljmp $0xf000,$0xfff0 */ +}; + +/* + * Switch to real mode and then execute the code + * specified by the code and length parameters. + * We assume that length will aways be less that MAX_LENGTH! + */ +void machine_real_restart(const unsigned char *code, unsigned length) +{ + local_irq_disable(); + + /* Write zero to CMOS register number 0x0f, which the BIOS POST + routine will recognize as telling it to do a proper reboot. (Well + that's what this book in front of me says -- it may only apply to + the Phoenix BIOS though, it's not clear). At the same time, + disable NMIs by setting the top bit in the CMOS address register, + as we're about to do peculiar things to the CPU. */ + + spin_lock(&rtc_lock); + CMOS_WRITE(0x00, 0x8f); + spin_unlock(&rtc_lock); + + /* Identity-map virtual address zero. */ + + map_pages_to_xen(0, 0, 1, __PAGE_HYPERVISOR|MAP_SMALL_PAGES); + set_current(idle_vcpu[0]); + write_ptbase(idle_vcpu[0]); + + /* For the switch to real mode, copy some code to low memory. It has + to be in the first 64k because it is running in 16-bit mode, and it + has to have the same physical and virtual address, because it turns + off paging. Copy it near the end of the first page, out of the way + of BIOS variables. */ + + memcpy((void *)(PAGE_SIZE - sizeof(real_mode_switch) - MAX_LENGTH), + real_mode_switch, sizeof(real_mode_switch)); + memcpy((void *)(PAGE_SIZE - MAX_LENGTH), code, length); + + /* Set up the IDT for real mode. */ + + __asm__ __volatile__("lidt %0": : "m" (real_mode_idt)); + + /* Set up a GDT from which we can load segment descriptors for real + mode. The GDT is not used in real mode; it is just needed here to + prepare the descriptors. */ + + __asm__ __volatile__("lgdt %0": : "m" (real_mode_gdt)); + + /* Load the data segment registers, and thus the descriptors ready for + real mode. The base address of each segment is 0x100, 16 times the + selector value being loaded here. This is so that the segment + registers don't have to be reloaded after switching to real mode: + the values are consistent for real mode operation already. */ + + __asm__ __volatile__ ("\tmov %0,%%ds\n" + "\tmov %0,%%es\n" + "\tmov %0,%%fs\n" + "\tmov %0,%%gs\n" + "\tmov %0,%%ss" + : + : "r" (0x0010)); + + /* Jump to the 16-bit code that we copied earlier. It disables paging + and the cache, switches to real mode, and jumps to the BIOS reset + entry point. */ + + __asm__ __volatile__ ("ljmp $0x0008,%0" + : + : "i" ((void *)(PAGE_SIZE - + sizeof(real_mode_switch) - + MAX_LENGTH))); +} + +#else /* __x86_64__ */ + +#define machine_real_restart(x, y) +#define reboot_thru_bios 0 + +#endif + +void machine_restart(char * __unused) +{ + int i; + + if ( opt_noreboot ) + { + printk("Reboot disabled on cmdline: require manual reset\n"); + machine_halt(); + } + + watchdog_disable(); + console_start_sync(); + + local_irq_enable(); + + /* Ensure we are the boot CPU. */ + if ( GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid ) + { + smp_call_function((void *)machine_restart, NULL, 1, 0); + for ( ; ; ) + safe_halt(); + } + + /* + * Stop all CPUs and turn off local APICs and the IO-APIC, so + * other OSs see a clean IRQ state. + */ + smp_send_stop(); + disable_IO_APIC(); + hvm_disable(); + + /* Rebooting needs to touch the page at absolute address 0. */ + *((unsigned short *)__va(0x472)) = reboot_mode; + + if (reboot_thru_bios <= 0) + { + for ( ; ; ) + { + /* Pulse the keyboard reset line. */ + for ( i = 0; i < 100; i++ ) + { + kb_wait(); + udelay(50); + outb(0xfe,0x64); /* pulse reset low */ + udelay(50); + } + + /* That didn't work - force a triple fault.. */ + __asm__ __volatile__("lidt %0": "=m" (no_idt)); + __asm__ __volatile__("int3"); + } + } + machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); +} + +#ifndef reboot_thru_bios +static int __init set_bios_reboot(struct dmi_system_id *d) +{ + if ( !reboot_thru_bios ) + { + reboot_thru_bios = 1; + printk("%s series board detected. " + "Selecting BIOS-method for reboots.\n", d->ident); + } + return 0; +} + +static struct dmi_system_id __initdata reboot_dmi_table[] = { + { /* Handle problems with rebooting on Dell 1300's */ + .callback = set_bios_reboot, + .ident = "Dell PowerEdge 1300", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"), + }, + }, + { /* Handle problems with rebooting on Dell 300's */ + .callback = set_bios_reboot, + .ident = "Dell PowerEdge 300", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), + }, + }, + { /* Handle problems with rebooting on Dell 2400's */ + .callback = set_bios_reboot, + .ident = "Dell PowerEdge 2400", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), + }, + }, + { /* Handle problems with rebooting on HP laptops */ + .callback = set_bios_reboot, + .ident = "HP Compaq Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), + }, + }, + { } +}; +#endif + +static int __init reboot_init(void) +{ + const char *str; + + for ( str = reboot_str; *str != '\0'; str++ ) + { + switch ( *str ) + { + case 'n': /* no reboot */ + opt_noreboot = 1; + break; + case 'w': /* "warm" reboot (no memory testing etc) */ + reboot_mode = 0x1234; + break; + case 'c': /* "cold" reboot (with memory testing etc) */ + reboot_mode = 0x0; + break; +#ifndef reboot_thru_bios + case 'b': /* "bios" reboot by jumping through the BIOS */ + reboot_thru_bios = 1; + break; + case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */ + reboot_thru_bios = -1; + break; +#endif + } + if ( (str = strchr(str, ',')) == NULL ) + break; + } + +#ifndef reboot_thru_bios + dmi_check_system(reboot_dmi_table); +#endif + return 0; +} +__initcall(reboot_init); diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/ipi.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/ipi.h Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,8 @@ +#ifndef __ASM_IPI_H +#define __ASM_IPI_H + +#include <asm/genapic.h> + +#define send_IPI_mask (genapic->send_IPI_mask) + +#endif /* __ASM_IPI_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-default/mach_mpspec.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/mach-default/mach_mpspec.h Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,10 @@ +#ifndef __ASM_MACH_MPSPEC_H +#define __ASM_MACH_MPSPEC_H + +#define MAX_IRQ_SOURCES 256 + +/* Summit or generic (i.e. installer) kernels need lots of bus entries. */ +/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ +#define MAX_MP_BUSSES 260 + +#endif /* __ASM_MACH_MPSPEC_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/public/callback.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/public/callback.h Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,57 @@ +/****************************************************************************** + * callback.h + * + * Register guest OS callbacks with Xen. + * + * Copyright (c) 2006, Ian Campbell + */ + +#ifndef __XEN_PUBLIC_CALLBACK_H__ +#define __XEN_PUBLIC_CALLBACK_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * long callback_op(int cmd, void *extra_args) + * @cmd == CALLBACKOP_??? (callback operation). + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +#define CALLBACKTYPE_event 0 +#define CALLBACKTYPE_failsafe 1 +#define CALLBACKTYPE_syscall 2 /* x86_64 only */ + +/* + * Register a callback. + */ +#define CALLBACKOP_register 0 +typedef struct callback_register { + int type; + xen_callback_t address; +} callback_register_t; +DEFINE_GUEST_HANDLE(callback_register_t); + +/* + * Unregister a callback. + * + * Not all callbacks can be unregistered. -EINVAL will be returned if + * you attempt to unregister such a callback. + */ +#define CALLBACKOP_unregister 1 +typedef struct callback_unregister { + int type; +} callback_unregister_t; +DEFINE_GUEST_HANDLE(callback_unregister_t); + +#endif /* __XEN_PUBLIC_CALLBACK_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 5719550652a1 -r 5cc367720223 xen/include/public/xen-compat.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/public/xen-compat.h Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,31 @@ +/****************************************************************************** + * xen-compat.h + * + * Guest OS interface to Xen. Compatibility layer. + * + * Copyright (c) 2006, Christian Limpach + */ + +#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ +#define __XEN_PUBLIC_XEN_COMPAT_H__ + +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030101 + +#if defined(__XEN__) +/* Xen is built with matching headers and implements the latest interface. */ +#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ +#elif !defined(__XEN_INTERFACE_VERSION__) +/* Guests which do not specify a version get the legacy interface. */ +#define __XEN_INTERFACE_VERSION__ 0x00000000 +#endif + +#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ +#error "These header files do not support the requested interface version." +#endif + +#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#undef __HYPERVISOR_sched_op +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat +#endif + +#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ diff -r 5719550652a1 -r 5cc367720223 xen/include/public/xenoprof.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/public/xenoprof.h Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,83 @@ +/****************************************************************************** + * xenoprof.h + * + * Interface for enabling system wide profiling based on hardware performance + * counters + * + * Copyright (C) 2005 Hewlett-Packard Co. + * Written by Aravind Menon & Jose Renato Santos + */ + +#ifndef __XEN_PUBLIC_XENOPROF_H__ +#define __XEN_PUBLIC_XENOPROF_H__ + +/* + * Commands to HYPERVISOR_pmc_op(). + */ +#define XENOPROF_init 0 +#define XENOPROF_set_active 1 +#define XENOPROF_reserve_counters 3 +#define XENOPROF_setup_events 4 +#define XENOPROF_enable_virq 5 +#define XENOPROF_start 6 +#define XENOPROF_stop 7 +#define XENOPROF_disable_virq 8 +#define XENOPROF_release_counters 9 +#define XENOPROF_shutdown 10 + +#define MAX_OPROF_EVENTS 32 +#define MAX_OPROF_DOMAINS 25 +#define XENOPROF_CPU_TYPE_SIZE 64 + +/* Xenoprof performance events (not Xen events) */ +struct event_log { + uint64_t eip; + uint8_t mode; + uint8_t event; +}; + +/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ +typedef struct xenoprof_buf { + uint32_t event_head; + uint32_t event_tail; + uint32_t event_size; + uint32_t vcpu_id; + uint64_t xen_samples; + uint64_t kernel_samples; + uint64_t user_samples; + uint64_t lost_samples; + struct event_log event_log[1]; +} xenoprof_buf_t; +DEFINE_GUEST_HANDLE(xenoprof_buf_t); + +typedef struct xenoprof_init_result { + int32_t num_events; + int32_t is_primary; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_maddr; + char cpu_type[XENOPROF_CPU_TYPE_SIZE]; +} xenoprof_init_result_t; +DEFINE_GUEST_HANDLE(xenoprof_init_result_t); + +typedef struct xenoprof_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; +} xenoprof_counter_config_t; +DEFINE_GUEST_HANDLE(xenoprof_counter_config_t); + +#endif /* __XEN_PUBLIC_XENOPROF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 5719550652a1 -r 5cc367720223 xen/include/xen/xenoprof.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/xen/xenoprof.h Tue Apr 11 18:54:18 2006 -0600 @@ -0,0 +1,42 @@ +/****************************************************************************** + * xenoprof.h + * + * Xenoprof: Xenoprof enables performance profiling in Xen + * + * Copyright (C) 2005 Hewlett-Packard Co. + * written by Aravind Menon & Jose Renato Santos + */ + +#ifndef __XEN_XENOPROF_H__ +#define __XEN_XENOPROF_H__ + +#include <public/xenoprof.h> + +#define XENOPROF_DOMAIN_IGNORED 0 +#define XENOPROF_DOMAIN_ACTIVE 1 + +#define XENOPROF_IDLE 0 +#define XENOPROF_COUNTERS_RESERVED 1 +#define XENOPROF_READY 2 +#define XENOPROF_PROFILING 3 + +struct xenoprof_vcpu { + int event_size; + struct xenoprof_buf *buffer; +}; + +struct xenoprof { + char* rawbuf; + int npages; + int nbuf; + int bufsize; + int domain_type; + int domain_ready; + int is_primary; + struct xenoprof_vcpu vcpu [MAX_VIRT_CPUS]; +}; + +struct domain; +void free_xenoprof_pages(struct domain *d); + +#endif /* __XEN__XENOPROF_H__ */ diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/include/linux/irq.h --- a/linux-2.6-xen-sparse/include/linux/irq.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,244 +0,0 @@ -#ifndef __irq_h -#define __irq_h - -/* - * Please do not include this file in generic code. There is currently - * no requirement for any architecture to implement anything held - * within this file. - * - * Thanks. --rmk - */ - -#include <linux/config.h> -#include <linux/smp.h> - -#if !defined(CONFIG_S390) - -#include <linux/linkage.h> -#include <linux/cache.h> -#include <linux/spinlock.h> -#include <linux/cpumask.h> - -#include <asm/irq.h> -#include <asm/ptrace.h> - -/* - * IRQ line status. - */ -#define IRQ_INPROGRESS 1 /* IRQ handler active - do not enter! */ -#define IRQ_DISABLED 2 /* IRQ disabled - do not enter! */ -#define IRQ_PENDING 4 /* IRQ pending - replay on enable */ -#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */ -#define IRQ_AUTODETECT 16 /* IRQ is being autodetected */ -#define IRQ_WAITING 32 /* IRQ not yet seen - for autodetection */ -#define IRQ_LEVEL 64 /* IRQ level triggered */ -#define IRQ_MASKED 128 /* IRQ masked - shouldn't be seen again */ -#if defined(ARCH_HAS_IRQ_PER_CPU) -# define IRQ_PER_CPU 256 /* IRQ is per CPU */ -# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) -#else -# define CHECK_IRQ_PER_CPU(var) 0 -#endif - -/* - * Interrupt controller descriptor. This is all we need - * to describe about the low-level hardware. - */ -struct hw_interrupt_type { - const char * typename; - unsigned int (*startup)(unsigned int irq); - void (*shutdown)(unsigned int irq); - void (*enable)(unsigned int irq); - void (*disable)(unsigned int irq); - void (*ack)(unsigned int irq); - void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, cpumask_t dest); - /* Currently used only by UML, might disappear one day.*/ -#ifdef CONFIG_IRQ_RELEASE_METHOD - void (*release)(unsigned int irq, void *dev_id); -#endif -}; - -typedef struct hw_interrupt_type hw_irq_controller; - -/* - * This is the "IRQ descriptor", which contains various information - * about the irq, including what kind of hardware handling it has, - * whether it is disabled etc etc. - * - * Pad this out to 32 bytes for cache and indexing reasons. - */ -typedef struct irq_desc { - hw_irq_controller *handler; - void *handler_data; - struct irqaction *action; /* IRQ action list */ - unsigned int status; /* IRQ status */ - unsigned int depth; /* nested irq disables */ - unsigned int irq_count; /* For detecting broken interrupts */ - unsigned int irqs_unhandled; - spinlock_t lock; -#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) - unsigned int move_irq; /* Flag need to re-target intr dest*/ -#endif -} ____cacheline_aligned irq_desc_t; - -extern irq_desc_t irq_desc [NR_IRQS]; - -/* Return a pointer to the irq descriptor for IRQ. */ -static inline irq_desc_t * -irq_descp (int irq) -{ - return irq_desc + irq; -} - -#include <asm/hw_irq.h> /* the arch dependent stuff */ - -extern int setup_irq(unsigned int irq, struct irqaction * new); -#ifdef CONFIG_XEN -extern int teardown_irq(unsigned int irq, struct irqaction * old); -#endif - -#ifdef CONFIG_GENERIC_HARDIRQS -extern cpumask_t irq_affinity[NR_IRQS]; - -#ifdef CONFIG_SMP -static inline void set_native_irq_info(int irq, cpumask_t mask) -{ - irq_affinity[irq] = mask; -} -#else -static inline void set_native_irq_info(int irq, cpumask_t mask) -{ -} -#endif - -#ifdef CONFIG_SMP - -#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) -extern cpumask_t pending_irq_cpumask[NR_IRQS]; - -static inline void set_pending_irq(unsigned int irq, cpumask_t mask) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - desc->move_irq = 1; - pending_irq_cpumask[irq] = mask; - spin_unlock_irqrestore(&desc->lock, flags); -} - -static inline void -move_native_irq(int irq) -{ - cpumask_t tmp; - irq_desc_t *desc = irq_descp(irq); - - if (likely (!desc->move_irq)) - return; - - desc->move_irq = 0; - - if (likely(cpus_empty(pending_irq_cpumask[irq]))) - return; - - if (!desc->handler->set_affinity) - return; - - /* note - we hold the desc->lock */ - cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); - - /* - * If there was a valid mask to work with, please - * do the disable, re-program, enable sequence. - * This is *not* particularly important for level triggered - * but in a edge trigger case, we might be setting rte - * when an active trigger is comming in. This could - * cause some ioapics to mal-function. - * Being paranoid i guess! - */ - if (unlikely(!cpus_empty(tmp))) { - desc->handler->disable(irq); - desc->handler->set_affinity(irq,tmp); - desc->handler->enable(irq); - } - cpus_clear(pending_irq_cpumask[irq]); -} - -#ifdef CONFIG_PCI_MSI -/* - * Wonder why these are dummies? - * For e.g the set_ioapic_affinity_vector() calls the set_ioapic_affinity_irq() - * counter part after translating the vector to irq info. We need to perform - * this operation on the real irq, when we dont use vector, i.e when - * pci_use_vector() is false. - */ -static inline void move_irq(int irq) -{ -} - -static inline void set_irq_info(int irq, cpumask_t mask) -{ -} - -#else // CONFIG_PCI_MSI - -static inline void move_irq(int irq) -{ - move_native_irq(irq); -} - -static inline void set_irq_info(int irq, cpumask_t mask) -{ - set_native_irq_info(irq, mask); -} -#endif // CONFIG_PCI_MSI - -#else // CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE - -#define move_irq(x) -#define move_native_irq(x) -#define set_pending_irq(x,y) -static inline void set_irq_info(int irq, cpumask_t mask) -{ - set_native_irq_info(irq, mask); -} - -#endif // CONFIG_GENERIC_PENDING_IRQ - -#else // CONFIG_SMP - -#define move_irq(x) -#define move_native_irq(x) - -#endif // CONFIG_SMP - -extern int no_irq_affinity; -extern int noirqdebug_setup(char *str); - -extern fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs, - struct irqaction *action); -extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs); -extern void note_interrupt(unsigned int irq, irq_desc_t *desc, - int action_ret, struct pt_regs *regs); -extern int can_request_irq(unsigned int irq, unsigned long irqflags); - -extern void init_irq_proc(void); - -#ifdef CONFIG_AUTO_IRQ_AFFINITY -extern int select_smp_affinity(unsigned int irq); -#else -static inline int -select_smp_affinity(unsigned int irq) -{ - return 1; -} -#endif - -#endif - -extern hw_irq_controller no_irq_type; /* needed in every arch ? */ - -#endif - -#endif /* __irq_h */ diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/kernel/irq/manage.c --- a/linux-2.6-xen-sparse/kernel/irq/manage.c Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,425 +0,0 @@ -/* - * linux/kernel/irq/manage.c - * - * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar - * - * This file contains driver APIs to the irq subsystem. - */ - -#include <linux/config.h> -#include <linux/irq.h> -#include <linux/module.h> -#include <linux/random.h> -#include <linux/interrupt.h> - -#include "internals.h" - -#ifdef CONFIG_SMP - -cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; - -#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) -cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; -#endif - -/** - * synchronize_irq - wait for pending IRQ handlers (on other CPUs) - * @irq: interrupt number to wait for - * - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ -void synchronize_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_desc + irq; - - if (irq >= NR_IRQS) - return; - - while (desc->status & IRQ_INPROGRESS) - cpu_relax(); -} - -EXPORT_SYMBOL(synchronize_irq); - -#endif - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. - * - * This function may be called from IRQ context. - */ -void disable_irq_nosync(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->handler->disable(irq); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -EXPORT_SYMBOL(disable_irq_nosync); - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ -void disable_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - - if (irq >= NR_IRQS) - return; - - disable_irq_nosync(irq); - if (desc->action) - synchronize_irq(irq); -} - -EXPORT_SYMBOL(disable_irq); - -/** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable - * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. - * - * This function may be called from IRQ context. - */ -void enable_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - spin_lock_irqsave(&desc->lock, flags); - switch (desc->depth) { - case 0: - WARN_ON(1); - break; - case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; - - desc->status = status; - if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - desc->status = status | IRQ_REPLAY; - hw_resend_irq(desc->handler,irq); - } - desc->handler->enable(irq); - /* fall-through */ - } - default: - desc->depth--; - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -EXPORT_SYMBOL(enable_irq); - -/* - * Internal function that tells the architecture code whether a - * particular irq has been exclusively allocated or is available - * for driver use. - */ -int can_request_irq(unsigned int irq, unsigned long irqflags) -{ - struct irqaction *action; - - if (irq >= NR_IRQS) - return 0; - - action = irq_desc[irq].action; - if (action) - if (irqflags & action->flags & SA_SHIRQ) - action = NULL; - - return !action; -} - -/** - * setup_irq - register an irqaction structure - * @irq: Interrupt to register - * @irqaction: The irqaction structure to be registered - * - * Normally called by request_irq, this function can be used - * directly to allocate special interrupts that are part of the - * architecture. - */ -int setup_irq(unsigned int irq, struct irqaction * new) -{ - struct irq_desc *desc = irq_desc + irq; - struct irqaction *old, **p; - unsigned long flags; - int shared = 0; - - if (irq >= NR_IRQS) - return -EINVAL; - - if (desc->handler == &no_irq_type) - return -ENOSYS; - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & SA_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - /* - * The following block of code has to be executed atomically - */ - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&desc->lock,flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; - } - - *p = new; - - if (!shared) { - desc->depth = 0; - desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | - IRQ_WAITING | IRQ_INPROGRESS); - if (desc->handler->startup) - desc->handler->startup(irq); - else - desc->handler->enable(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - - new->irq = irq; - register_irq_proc(irq); - new->dir = NULL; - register_handler_proc(irq, new); - - return 0; -} - -/* - * teardown_irq - unregister an irqaction - * @irq: Interrupt line being freed - * @old: Pointer to the irqaction that is to be unregistered - * - * This function is called by free_irq and does the actual - * business of unregistering the handler. It exists as a - * seperate function to enable handlers to be unregistered - * for irqactions that have been allocated statically at - * boot time. - * - * This function must not be called from interrupt context. - */ -#ifndef CONFIG_XEN -static -#endif -int teardown_irq(unsigned int irq, struct irqaction * old) -{ - struct irq_desc *desc; - struct irqaction **p; - unsigned long flags; - - if (irq >= NR_IRQS) - return -ENOENT; - - desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - for (;;) { - struct irqaction * action = *p; - - if (action) { - struct irqaction **pp = p; - - p = &action->next; - if (action != old) - continue; - - /* Found it - now remove it from the list of entries */ - *pp = action->next; - - /* Currently used only by UML, might disappear one day.*/ -#ifdef CONFIG_IRQ_RELEASE_METHOD - if (desc->handler->release) - desc->handler->release(irq, dev_id); -#endif - - if (!desc->action) { - desc->status |= IRQ_DISABLED; - if (desc->handler->shutdown) - desc->handler->shutdown(irq); - else - desc->handler->disable(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - unregister_handler_proc(irq, action); - - /* Make sure it's not being used on another CPU */ - synchronize_irq(irq); - return 0; - } - printk(KERN_ERR "Trying to teardown free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); - return -ENOENT; - } -} - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function must not be called from interrupt context. - */ -void free_irq(unsigned int irq, void *dev_id) -{ - struct irq_desc *desc; - struct irqaction *action; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock,flags); - for (action = desc->action; action != NULL; action = action->next) { - if (action->dev_id != dev_id) - continue; - - spin_unlock_irqrestore(&desc->lock,flags); - - if (teardown_irq(irq, action) == 0) - kfree(action); - return; - } - printk(KERN_ERR "Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); - return; -} - -EXPORT_SYMBOL(free_irq); - -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * SA_INTERRUPT Disable local interrupts while processing - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ -int request_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, const char * devname, void *dev_id) -{ - struct irqaction * action; - int retval; - - /* - * Sanity-check: shared interrupts must pass in a real dev-ID, - * otherwise we'll have trouble later trying to figure out - * which interrupt is which (messes up the interrupt freeing - * logic etc). - */ - if ((irqflags & SA_SHIRQ) && !dev_id) - return -EINVAL; - if (irq >= NR_IRQS) - return -EINVAL; - if (!handler) - return -EINVAL; - - action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irqflags; - cpus_clear(action->mask); - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - select_smp_affinity(irq); - - retval = setup_irq(irq, action); - if (retval) - kfree(action); - - return retval; -} - -EXPORT_SYMBOL(request_irq); - diff -r 5719550652a1 -r 5cc367720223 linux-2.6-xen-sparse/lib/Kconfig.debug --- a/linux-2.6-xen-sparse/lib/Kconfig.debug Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,224 +0,0 @@ - -config PRINTK_TIME - bool "Show timing information on printks" - help - Selecting this option causes timing information to be - included in printk output. This allows you to measure - the interval between kernel operations, including bootup - operations. This is useful for identifying long delays - in kernel startup. - - -config MAGIC_SYSRQ - bool "Magic SysRq key" - depends on !UML - help - If you say Y here, you will have some control over the system even - if the system crashes for example during kernel debugging (e.g., you - will be able to flush the buffer cache to disk, reboot the system - immediately or dump some status information). This is accomplished - by pressing various keys while holding SysRq (Alt+PrintScreen). It - also works on a serial console (on PC hardware at least), if you - send a BREAK and then within 5 seconds a command keypress. The - keys are documented in <file:Documentation/sysrq.txt>. Don't say Y - unless you really know what this hack does. - -config DEBUG_KERNEL - bool "Kernel debugging" - help - Say Y here if you are developing drivers or trying to debug and - identify kernel problems. - -config LOG_BUF_SHIFT - int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL - range 12 21 - default 17 if S390 - default 16 if X86_NUMAQ || IA64 - default 15 if SMP - default 14 - help - Select kernel log buffer size as a power of 2. - Defaults and Examples: - 17 => 128 KB for S/390 - 16 => 64 KB for x86 NUMAQ or IA-64 - 15 => 32 KB for SMP - 14 => 16 KB for uniprocessor - 13 => 8 KB - 12 => 4 KB - -config DETECT_SOFTLOCKUP - bool "Detect Soft Lockups" - depends on DEBUG_KERNEL - default y - help - Say Y here to enable the kernel to detect "soft lockups", - which are bugs that cause the kernel to loop in kernel - mode for more than 10 seconds, without giving other tasks a - chance to run. - - When a soft-lockup is detected, the kernel will print the - current stack trace (which you should report), but the - system will stay locked up. This feature has negligible - overhead. - - (Note that "hard lockups" are separate type of bugs that - can be detected via the NMI-watchdog, on platforms that - support it.) - -config SCHEDSTATS - bool "Collect scheduler statistics" - depends on DEBUG_KERNEL && PROC_FS - help - If you say Y here, additional code will be inserted into the - scheduler and related routines to collect statistics about - scheduler behavior and provide them in /proc/schedstat. These - stats may be useful for both tuning and debugging the scheduler - If you aren't debugging the scheduler or trying to tune a specific - application, you can say N to avoid the very slight overhead - this adds. - -config DEBUG_SLAB - bool "Debug memory allocations" - depends on DEBUG_KERNEL && SLAB - help - Say Y here to have the kernel do limited verification on memory - allocation as well as poisoning memory on free to catch use of freed - memory. This can make kmalloc/kfree-intensive workloads much slower. - -config DEBUG_PREEMPT - bool "Debug preemptible kernel" - depends on DEBUG_KERNEL && PREEMPT - default y - help - If you say Y here then the kernel will use a debug variant of the - commonly used smp_processor_id() function and will print warnings - if kernel code uses it in a preemption-unsafe way. Also, the kernel - will detect preemption count underflows. - -config DEBUG_MUTEXES - bool "Mutex debugging, deadlock detection" - default y - depends on DEBUG_KERNEL - help - This allows mutex semantics violations and mutex related deadlocks - (lockups) to be detected and reported automatically. - -config DEBUG_SPINLOCK - bool "Spinlock debugging" - depends on DEBUG_KERNEL - help - Say Y here and build SMP to catch missing spinlock initialization - and certain other kinds of spinlock errors commonly made. This is - best used in conjunction with the NMI watchdog so that spinlock - deadlocks are also debuggable. - -config DEBUG_SPINLOCK_SLEEP - bool "Sleep-inside-spinlock checking" - depends on DEBUG_KERNEL - help - If you say Y here, various routines which may sleep will become very - noisy if they are called with a spinlock held. - -config DEBUG_KOBJECT - bool "kobject debugging" - depends on DEBUG_KERNEL - help - If you say Y here, some extra kobject debugging messages will be sent - to the syslog. - -config DEBUG_HIGHMEM - bool "Highmem debugging" - depends on DEBUG_KERNEL && HIGHMEM - help - This options enables addition error checking for high memory systems. - Disable for production systems. - -config DEBUG_BUGVERBOSE - bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED - depends on BUG - depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV - default !EMBEDDED - help - Say Y here to make BUG() panics output the file name and line number - of the BUG call as well as the EIP and oops trace. This aids - debugging but costs about 70-100K of memory. - -config DEBUG_INFO - bool "Compile the kernel with debug info" - depends on DEBUG_KERNEL && !X86_64_XEN - help - If you say Y here the resulting kernel image will include - debugging info resulting in a larger kernel image. - Say Y here only if you plan to debug the kernel. - - If unsure, say N. - -config DEBUG_IOREMAP - bool "Enable ioremap() debugging" - depends on DEBUG_KERNEL && PARISC - help - Enabling this option will cause the kernel to distinguish between - ioremapped and physical addresses. It will print a backtrace (at - most one every 10 seconds), hopefully allowing you to see which - drivers need work. Fixing all these problems is a prerequisite - for turning on USE_HPPA_IOREMAP. The warnings are harmless; - the kernel has enough information to fix the broken drivers - automatically, but we'd like to make it more efficient by not - having to do that. - -config DEBUG_FS - bool "Debug Filesystem" - depends on DEBUG_KERNEL && SYSFS - help - debugfs is a virtual file system that kernel developers use to put - debugging files into. Enable this option to be able to read and - write to these files. - - If unsure, say N. - -config DEBUG_VM - bool "Debug VM" - depends on DEBUG_KERNEL - help - Enable this to turn on extended checks in the virtual-memory system - that may impact performance. - - If unsure, say N. - -config FRAME_POINTER - bool "Compile the kernel with frame pointers" - depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML) - default y if DEBUG_INFO && UML - help - If you say Y here the resulting kernel image will be slightly larger - and slower, but it might give very useful debugging information on - some architectures or if you use external debuggers. - If you don't debug the kernel, you can say N. - -config FORCED_INLINING - bool "Force gcc to inline functions marked 'inline'" - depends on DEBUG_KERNEL - default y - help - This option determines if the kernel forces gcc to inline the functions - developers have marked 'inline'. Doing so takes away freedom from gcc to - do what it thinks is best, which is desirable for the gcc 3.x series of - compilers. The gcc 4.x series have a rewritten inlining algorithm and - disabling this option will generate a smaller kernel there. Hopefully - this algorithm is so good that allowing gcc4 to make the decision can - become the default in the future, until then this option is there to - test gcc for this. - -config RCU_TORTURE_TEST - tristate "torture tests for RCU" - depends on DEBUG_KERNEL - default n - help - This option provides a kernel module that runs torture tests - on the RCU infrastructure. The kernel module may be built - after the fact on the running kernel to be tested, if desired. - - Say Y here if you want RCU torture tests to start automatically - at boot time (you probably don't). - Say M if you want the RCU torture tests to build as a module. - Say N if you are unsure. diff -r 5719550652a1 -r 5cc367720223 xen/Post.mk --- a/xen/Post.mk Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -# Ensure each subdirectory has exactly one trailing slash. -subdir-n := $(patsubst %,%/,$(patsubst %/,%,$(subdir-n))) -subdir-y := $(patsubst %,%/,$(patsubst %/,%,$(subdir-y))) - -# Add explicitly declared subdirectories to the object list. -obj-y += $(patsubst %,%/built_in.o,$(subdir-y)) - -# Add implicitly declared subdirectories (in the object list) to the -# subdirectory list, and rewrite the object-list entry. -subdir-y += $(filter %/,$(obj-y)) -obj-y := $(patsubst %/,%/built-in.o,$(obj-y)) - -subdir-all := $(subdir-y) $(subdir-n) - -built_in.o: $(obj-y) - $(LD) $(LDFLAGS) -r -o $@ $^ - -.PHONY: FORCE -FORCE: - -%/built_in.o: FORCE - $(MAKE) -C $* - -clean:: $(addprefix _clean_, $(subdir-all)) FORCE - rm -f *.o *~ core -_clean_%/: FORCE - $(MAKE) -C $* clean diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-bigsmp/mach_apic.h --- a/xen/include/asm-x86/mach-bigsmp/mach_apic.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,138 +0,0 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H - - -extern u8 bios_cpu_apicid[]; - -#define xapic_phys_to_log_apicid(cpu) (bios_cpu_apicid[cpu]) -#define esr_disable (1) - -static inline int apic_id_registered(void) -{ - return (1); -} - -/* Round robin the irqs amoung the online cpus */ -static inline cpumask_t target_cpus(void) -{ - static unsigned long cpu = NR_CPUS; - do { - if (cpu >= NR_CPUS) - cpu = first_cpu(cpu_online_map); - else - cpu = next_cpu(cpu, cpu_online_map); - } while (cpu >= NR_CPUS); - return cpumask_of_cpu(cpu); -} - -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0 -#define TARGET_CPUS (target_cpus()) -#define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target proc */ -#define NO_BALANCE_IRQ (0) -#define WAKE_SECONDARY_VIA_INIT - - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return (0); -} - -static inline unsigned long check_apicid_present(int bit) -{ - return (1); -} - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long val, id; - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - id = xapic_phys_to_log_apicid(cpu); - val |= SET_APIC_LOGICAL_ID(id); - return val; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write_around(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write_around(APIC_LDR, val); -} - -static inline void clustered_apic_check(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Physflat", nr_ioapics); -} - -static inline int apicid_to_node(int logical_apicid) -{ - return (0); -} - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < NR_CPUS) - return (int) bios_cpu_apicid[mps_cpu]; - - return BAD_APICID; -} - -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ - if (cpu >= NR_CPUS) - return BAD_APICID; - return cpu_physical_id(cpu); -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); -} - -static inline void enable_apic_mode(void) -{ -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return (1); -} - -/* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - int cpu; - int apicid; - - cpu = first_cpu(cpumask); - apicid = cpu_to_logical_apicid(cpu); - return apicid; -} - -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_MACH_APIC_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-bigsmp/mach_apicdef.h --- a/xen/include/asm-x86/mach-bigsmp/mach_apicdef.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-bigsmp/mach_ipi.h --- a/xen/include/asm-x86/mach-bigsmp/mach_ipi.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H - -void send_IPI_mask_sequence(cpumask_t mask, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* __ASM_MACH_IPI_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-default/mach_apic.h --- a/xen/include/asm-x86/mach-default/mach_apic.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,111 +0,0 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H - -#include <mach_apicdef.h> -#include <asm/smp.h> - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -static inline cpumask_t target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_map; -#else - return cpumask_of_cpu(0); -#endif -} -#define TARGET_CPUS (target_cpus()) - -#define NO_BALANCE_IRQ (0) -#define esr_disable (0) - -#define INT_DELIVERY_MODE dest_LowestPrio -#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} - -static inline unsigned long check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void init_apic_ldr(void) -{ - unsigned long val; - - apic_write_around(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write_around(APIC_LDR, val); -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - return phys_map; -} - -static inline void clustered_apic_check(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Flat", nr_ioapics); -} - -static inline int apicid_to_node(int logical_apicid) -{ - return 0; -} - -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ - return 1 << cpu; -} - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < get_physical_broadcast()) - return mps_cpu; - else - return BAD_APICID; -} - -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); -} - -static inline int apic_id_registered(void) -{ - return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map); -} - -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - return cpus_addr(cpumask)[0]; -} - -static inline void enable_apic_mode(void) -{ -} - -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_MACH_APIC_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-default/mach_apicdef.h --- a/xen/include/asm-x86/mach-default/mach_apicdef.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-default/mach_ipi.h --- a/xen/include/asm-x86/mach-default/mach_ipi.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H - -void send_IPI_mask_bitmask(cpumask_t mask, int vector); -void __send_IPI_shortcut(unsigned int shortcut, int vector); - -extern int no_broadcast; - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_bitmask(mask, vector); -} - -static inline void __local_send_IPI_allbutself(int vector) -{ - if (no_broadcast) { - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); - send_IPI_mask(mask, vector); - } else - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); -} - -static inline void __local_send_IPI_all(int vector) -{ - if (no_broadcast) - send_IPI_mask(cpu_online_map, vector); - else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then we get an APIC send - * error if we try to broadcast, thus avoid sending IPIs in this case. - */ - if (!(num_online_cpus() > 1)) - return; - - __local_send_IPI_allbutself(vector); - return; -} - -static inline void send_IPI_all(int vector) -{ - __local_send_IPI_all(vector); -} - -#endif /* __ASM_MACH_IPI_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-es7000/mach_apic.h --- a/xen/include/asm-x86/mach-es7000/mach_apic.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,185 +0,0 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H - -extern u8 bios_cpu_apicid[]; - -#define xapic_phys_to_log_apicid(cpu) (bios_cpu_apicid[cpu]) -#define esr_disable (1) - -static inline int apic_id_registered(void) -{ - return (1); -} - -static inline cpumask_t target_cpus(void) -{ -#if defined CONFIG_ES7000_CLUSTERED_APIC - return CPU_MASK_ALL; -#else - return cpumask_of_cpu(smp_processor_id()); -#endif -} -#define TARGET_CPUS (target_cpus()) - -#if defined CONFIG_ES7000_CLUSTERED_APIC -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE (dest_LowestPrio) -#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ -#define NO_BALANCE_IRQ (1) -#undef WAKE_SECONDARY_VIA_INIT -#define WAKE_SECONDARY_VIA_MIP -#else -#define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target procs */ -#define NO_BALANCE_IRQ (0) -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0x0 -#define WAKE_SECONDARY_VIA_INIT -#endif - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} -static inline unsigned long check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long id; - id = xapic_phys_to_log_apicid(cpu); - return (SET_APIC_LOGICAL_ID(id)); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LdR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write_around(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write_around(APIC_LDR, val); -} - -extern void es7000_sw_apic(void); -static inline void enable_apic_mode(void) -{ - es7000_sw_apic(); - return; -} - -extern int apic_version [MAX_APICS]; -static inline void clustered_apic_check(void) -{ - int apic = bios_cpu_apicid[smp_processor_id()]; - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]); -} - -static inline int apicid_to_node(int logical_apicid) -{ - return 0; -} - - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (!mps_cpu) - return boot_cpu_physical_apicid; - else if (mps_cpu < NR_CPUS) - return (int) bios_cpu_apicid[mps_cpu]; - else - return BAD_APICID; -} - -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) -{ - static int id = 0; - physid_mask_t mask; - mask = physid_mask_of_physid(id); - ++id; - return mask; -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); -} - -extern unsigned int boot_cpu_physical_apicid; -static inline int check_phys_apicid_present(int cpu_physical_apicid) -{ - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - return (1); -} - -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid; - - num_bits_set = cpus_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) -#if defined CONFIG_ES7000_CLUSTERED_APIC - return 0xFF; -#else - return cpu_to_logical_apicid(0); -#endif - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = first_cpu(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n",__FUNCTION__); -#if defined CONFIG_ES7000_CLUSTERED_APIC - return 0xFF; -#else - return cpu_to_logical_apicid(0); -#endif - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_MACH_APIC_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-es7000/mach_apicdef.h --- a/xen/include/asm-x86/mach-es7000/mach_apicdef.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-es7000/mach_ipi.h --- a/xen/include/asm-x86/mach-es7000/mach_ipi.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H - -void send_IPI_mask_sequence(cpumask_t mask, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* __ASM_MACH_IPI_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-es7000/mach_wakecpu.h --- a/xen/include/asm-x86/mach-es7000/mach_wakecpu.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -#ifndef __ASM_MACH_WAKECPU_H -#define __ASM_MACH_WAKECPU_H - -/* - * This file copes with machines that wakeup secondary CPUs by the - * INIT, INIT, STARTUP sequence. - */ - -#ifdef CONFIG_ES7000_CLUSTERED_APIC -#define WAKE_SECONDARY_VIA_MIP -#else -#define WAKE_SECONDARY_VIA_INIT -#endif - -#ifdef WAKE_SECONDARY_VIA_MIP -extern int es7000_start_cpu(int cpu, unsigned long eip); -static inline int -wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) -{ - int boot_error = 0; - boot_error = es7000_start_cpu(phys_apicid, start_eip); - return boot_error; -} -#endif - -#define TRAMPOLINE_LOW maddr_to_virt(0x467) -#define TRAMPOLINE_HIGH maddr_to_virt(0x469) - -#define boot_cpu_apicid boot_cpu_physical_apicid - -static inline void wait_for_init_deassert(atomic_t *deassert) -{ -#ifdef WAKE_SECONDARY_VIA_INIT - while (!atomic_read(deassert)); -#endif - return; -} - -/* Nothing to do for most platforms, since cleared by the INIT cycle */ -static inline void smp_callin_clear_local_apic(void) -{ -} - -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -#if APIC_DEBUG - #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid) -#else - #define inquire_remote_apic(apicid) {} -#endif - -#endif /* __ASM_MACH_WAKECPU_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-generic/mach_apicdef.h --- a/xen/include/asm-x86/mach-generic/mach_apicdef.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -#ifndef _GENAPIC_MACH_APICDEF_H -#define _GENAPIC_MACH_APICDEF_H 1 - -#ifndef APIC_DEFINITION -#include <asm/genapic.h> - -#define GET_APIC_ID (genapic->get_apic_id) -#endif - -#endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-generic/mach_ipi.h --- a/xen/include/asm-x86/mach-generic/mach_ipi.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -#ifndef _MACH_IPI_H -#define _MACH_IPI_H 1 - -#include <asm/genapic.h> - -#define send_IPI_mask (genapic->send_IPI_mask) -#define send_IPI_allbutself (genapic->send_IPI_allbutself) -#define send_IPI_all (genapic->send_IPI_all) - -#endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-generic/mach_mpspec.h --- a/xen/include/asm-x86/mach-generic/mach_mpspec.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -#ifndef __ASM_MACH_MPSPEC_H -#define __ASM_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -/* Summit or generic (i.e. installer) kernels need lots of bus entries. */ -/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#define MAX_MP_BUSSES 260 - -#endif /* __ASM_MACH_MPSPEC_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-summit/mach_apic.h --- a/xen/include/asm-x86/mach-summit/mach_apic.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,167 +0,0 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H - -#include <xen/config.h> -#include <asm/smp.h> - -#define esr_disable (1) -#define NO_BALANCE_IRQ (0) - -/* In clustered mode, the high nibble of APIC ID is a cluster number. - * The low nibble is a 4-bit bitmap. */ -#define XAPIC_DEST_CPUS_SHIFT 4 -#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) -#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) - -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline cpumask_t target_cpus(void) -{ - /* CPU_MASK_ALL (0xff) has undefined behaviour with - * dest_LowestPrio mode logical clustered apic interrupt routing - * Just start on cpu 0. IRQ balancing will spread load - */ - return cpumask_of_cpu(0); -} -#define TARGET_CPUS (target_cpus()) - -#define INT_DELIVERY_MODE (dest_LowestPrio) -#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} - -/* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long check_apicid_present(int bit) -{ - return 1; -} - -#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) - -extern u8 bios_cpu_apicid[]; -extern u8 cpu_2_logical_apicid[]; - -static inline void init_apic_ldr(void) -{ - unsigned long val, id; - int i, count; - u8 lid; - u8 my_id = (u8)hard_smp_processor_id(); - u8 my_cluster = (u8)apicid_cluster(my_id); - - /* Create logical APIC IDs by counting CPUs already in cluster. */ - for (count = 0, i = NR_CPUS; --i >= 0; ) { - lid = cpu_2_logical_apicid[i]; - if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) - ++count; - } - /* We only have a 4 wide bitmap in cluster mode. If a deranged - * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ - BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); - id = my_cluster | (1UL << count); - apic_write_around(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(id); - apic_write_around(APIC_LDR, val); -} - -static inline int apic_id_registered(void) -{ - return 1; -} - -static inline void clustered_apic_check(void) -{ - printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", - nr_ioapics); -} - -static inline int apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 5; /* 2 clusterids per CEC */ -} - -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -} - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < NR_CPUS) - return (int)bios_cpu_apicid[mps_cpu]; - else - return BAD_APICID; -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); -} - -static inline physid_mask_t apicid_to_cpu_present(int apicid) -{ - return physid_mask_of_physid(0); -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -static inline void enable_apic_mode(void) -{ -} - -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid; - - num_bits_set = cpus_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) - return (int) 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = first_cpu(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n",__FUNCTION__); - return 0xFF; - } - apicid = apicid | new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -/* cpuid returns the value latched in the HW at reset, not the APIC ID - * register's value. For any box whose BIOS changes APIC IDs, like - * clustered APIC systems, we must use hard_smp_processor_id. - * - * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. - */ -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - -#endif /* __ASM_MACH_APIC_H */ diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-summit/mach_apicdef.h --- a/xen/include/asm-x86/mach-summit/mach_apicdef.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff -r 5719550652a1 -r 5cc367720223 xen/include/asm-x86/mach-summit/mach_ipi.h --- a/xen/include/asm-x86/mach-summit/mach_ipi.h Tue Apr 11 13:55:47 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H - -void send_IPI_mask_sequence(cpumask_t mask, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* __ASM_MACH_IPI_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |