Xen project Mailing List

[Xen-changelog] Merge

From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>

Date: Wed, 12 Oct 2005 08:32:17 +0000

Delivery-date: Wed, 12 Oct 2005 08:36:23 +0000

List-id: BK change log <xen-changelog.lists.xensource.com>

# HG changeset patch # User djm@xxxxxxxxxxxxxxx # Node ID 4e0c94871be28ce4936169205430d1cb1b0b451b # Parent 0ba10f7fef519e0a9065c570653f955b66d930ea # Parent 2144de6eabcc7fc6272a8ca088008ef92c05aa6b Merge diff -r 0ba10f7fef51 -r 4e0c94871be2 .hgignore --- a/.hgignore Sat Oct 8 17:37:45 2005 +++ b/.hgignore Sat Oct 8 20:28:24 2005 @@ -155,7 +155,7 @@ ^tools/xenstore/xenstore-read$ ^tools/xenstore/xenstore-rm$ ^tools/xenstore/xenstore-write$ -^tools/xenstore/xs_dom0_test$ +^tools/xenstore/xs_crashme$ ^tools/xenstore/xs_random$ ^tools/xenstore/xs_stress$ ^tools/xenstore/xs_tdb_dump$ diff -r 0ba10f7fef51 -r 4e0c94871be2 Config.mk --- a/Config.mk Sat Oct 8 17:37:45 2005 +++ b/Config.mk Sat Oct 8 20:28:24 2005 @@ -19,6 +19,13 @@ STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump + +DISTDIR ?= $(XEN_ROOT)/dist + +INSTALL = install +INSTALL_DIR = $(INSTALL) -d -m0755 +INSTALL_DATA = $(INSTALL) -m0644 +INSTALL_PROG = $(INSTALL) -m0755 ifeq ($(XEN_TARGET_ARCH),x86_64) LIBDIR = lib64 @@ -49,3 +56,5 @@ XENSTAT_XENTOP ?= y VTPM_TOOLS ?= n + +-include $(XEN_ROOT)/.config diff -r 0ba10f7fef51 -r 4e0c94871be2 Makefile --- a/Makefile Sat Oct 8 17:37:45 2005 +++ b/Makefile Sat Oct 8 20:28:24 2005 @@ -2,22 +2,10 @@ # Grand Unified Makefile for Xen. # -# Default is to install to local 'dist' directory. -DISTDIR ?= $(CURDIR)/dist -DESTDIR ?= $(DISTDIR)/install - -INSTALL := install -INSTALL_DIR := $(INSTALL) -d -m0755 -INSTALL_DATA := $(INSTALL) -m0644 -INSTALL_PROG := $(INSTALL) -m0755 - KERNELS ?= linux-2.6-xen0 linux-2.6-xenU -# linux-2.4-xen0 linux-2.4-xenU netbsd-2.0-xenU -# You may use wildcards in the above e.g. KERNELS=*2.4* +# You may use wildcards in the above e.g. KERNELS=*2.6* XKERNELS := $(foreach kernel, $(KERNELS), $(patsubst buildconfigs/mk.%,%,$(wildcard buildconfigs/mk.$(kernel))) ) - -export DESTDIR # Export target architecture overrides to Xen and Linux sub-trees. ifneq ($(XEN_TARGET_ARCH),) @@ -28,6 +16,7 @@ # Default target must appear before any include lines all: dist +XEN_ROOT=$(CURDIR) include Config.mk include buildconfigs/Rules.mk @@ -46,24 +35,40 @@ $(MAKE) -C tools build $(MAKE) -C docs build +# The test target is for unit tests that can run without an installation. Of +# course, many tests require a machine running Xen itself, and these are +# handled elsewhere. +test: + $(MAKE) -C tools/python test + # build and install everything into local dist directory -dist: xen kernels tools docs +dist: DESTDIR=$(DISTDIR)/install +dist: dist-xen dist-kernels dist-tools dist-docs $(INSTALL_DIR) $(DISTDIR)/check $(INSTALL_DATA) ./COPYING $(DISTDIR) $(INSTALL_DATA) ./README $(DISTDIR) $(INSTALL_PROG) ./install.sh $(DISTDIR) $(INSTALL_PROG) tools/check/chk tools/check/check_* $(DISTDIR)/check +dist-%: DESTDIR=$(DISTDIR)/install +dist-%: install-% + @: # do nothing -xen: +# Legacy dist targets +xen: dist-xen +tools: dist-tools +kernels: dist-kernels +docs: dist-docs + +install-xen: $(MAKE) -C xen install -tools: +install-tools: $(MAKE) -C tools install -kernels: +install-kernels: for i in $(XKERNELS) ; do $(MAKE) $$i-build || exit 1; done -docs: +install-docs: sh ./docs/check_pkgs && $(MAKE) -C docs install || true dev-docs: @@ -119,10 +124,6 @@ tar -jxf iptables-1.2.11.tar.bz2 $(MAKE) -C iptables-1.2.11 PREFIX= KERNEL_DIR=../linux-$(LINUX_VER)-xen0 install -install-%: DESTDIR= -install-%: % - @: # do nothing - help: @echo 'Installation targets:' @echo ' install - build and install everything' @@ -161,7 +162,6 @@ @echo ' with extreme care!)' # Use this target with extreme care! -uninstall: DESTDIR= uninstall: D=$(DESTDIR) uninstall: [ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-`date +%s` || true @@ -187,12 +187,5 @@ rm -rf $(D)/usr/share/man/man8/xen* # Legacy targets for compatibility -linux24: - $(MAKE) 'KERNELS=linux-2.4*' kernels - linux26: $(MAKE) 'KERNELS=linux-2.6*' kernels - -netbsd20: - $(MAKE) netbsd-2.0-xenU-build - diff -r 0ba10f7fef51 -r 4e0c94871be2 README --- a/README Sat Oct 8 17:37:45 2005 +++ b/README Sat Oct 8 20:28:24 2005 @@ -22,7 +22,7 @@ GNU GPL. The 2.0 release offers excellent performance, hardware support and -enterprise-grade features such as live migration. Linux 2.6, 2.4 and +enterprise-grade features such as live migration. Linux 2.6 and NetBSD 2.0 are already available for Xen, with more operating system ports on the way. @@ -97,8 +97,8 @@ version for unstable. 2. cd to xen-unstable (or whatever you sensibly rename it to). - The Linux (2.4 and 2.6), netbsd and freebsd kernel source - trees are in the $os-$version-xen-sparse directories. + The Linux, netbsd and freebsd kernel source trees are in + the $os-$version-xen-sparse directories. On Linux: diff -r 0ba10f7fef51 -r 4e0c94871be2 buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Sat Oct 8 17:37:45 2005 +++ b/buildconfigs/Rules.mk Sat Oct 8 20:28:24 2005 @@ -1,10 +1,7 @@ include Config.mk -# We expect these two to already be set if people -# are using the top-level Makefile -DISTDIR ?= $(CURDIR)/dist -DESTDIR ?= $(DISTDIR)/install +export DESTDIR ALLKERNELS = $(patsubst buildconfigs/mk.%,%,$(wildcard buildconfigs/mk.*)) ALLSPARSETREES = $(patsubst %-xen-sparse,%,$(wildcard *-xen-sparse)) diff -r 0ba10f7fef51 -r 4e0c94871be2 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Sat Oct 8 17:37:45 2005 +++ b/buildconfigs/mk.linux-2.6-xen Sat Oct 8 20:28:24 2005 @@ -4,7 +4,7 @@ LINUX_SERIES = 2.6 LINUX_VER = 2.6.12 -EXTRAVERSION = xen +EXTRAVERSION ?= xen LINUX_DIR = $(OS)-$(LINUX_VER)-$(EXTRAVERSION) diff -r 0ba10f7fef51 -r 4e0c94871be2 buildconfigs/mk.linux-2.6-xen0 --- a/buildconfigs/mk.linux-2.6-xen0 Sat Oct 8 17:37:45 2005 +++ b/buildconfigs/mk.linux-2.6-xen0 Sat Oct 8 20:28:24 2005 @@ -1,51 +1,2 @@ - -OS = linux - -LINUX_SERIES = 2.6 -LINUX_VER = 2.6.12 - EXTRAVERSION = xen0 - -LINUX_DIR = $(OS)-$(LINUX_VER)-$(EXTRAVERSION) - -include buildconfigs/Rules.mk - -.PHONY: build clean delete - -# The real action starts here! -build: $(LINUX_DIR)/include/linux/autoconf.h - if grep "^CONFIG_MODULES=" $(LINUX_DIR)/.config ; then \ - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) modules ; \ - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_MOD_PATH=$(DESTDIR) modules_install ; \ - fi - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_PATH=$(DESTDIR) install - -$(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref - rm -rf $(LINUX_DIR) - cp -al $(<D) $(LINUX_DIR) - # Apply arch-xen patches - ( cd linux-$(LINUX_SERIES)-xen-sparse ; \ - LINUX_ARCH=$(LINUX_ARCH) ./mkbuildtree ../$(LINUX_DIR) ) - # Re-use config from install dir if one exits else use default config - CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \ - [ -r $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \ - cp $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \ - || cp $(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \ - $(LINUX_DIR)/.config - # See if we need to munge config to enable PAE - $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk config-update-pae - # Patch kernel Makefile to set EXTRAVERSION - ( cd $(LINUX_DIR) ; \ - sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST = -$(EXTRAVERSION)/' Makefile >Mk.tmp ; \ - rm -f Makefile ; mv Mk.tmp Makefile ) - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) oldconfig - -config: CONFIGMODE = menuconfig -config: $(LINUX_DIR)/include/linux/autoconf.h - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) $(CONFIGMODE) - -clean:: - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) clean - -delete: - rm -rf tmp-$(OS)-$(LINUX_VER) $(LINUX_DIR) +include buildconfigs/mk.linux-2.6-xen diff -r 0ba10f7fef51 -r 4e0c94871be2 buildconfigs/mk.linux-2.6-xenU --- a/buildconfigs/mk.linux-2.6-xenU Sat Oct 8 17:37:45 2005 +++ b/buildconfigs/mk.linux-2.6-xenU Sat Oct 8 20:28:24 2005 @@ -1,51 +1,2 @@ - -OS = linux - -LINUX_SERIES = 2.6 -LINUX_VER = 2.6.12 - EXTRAVERSION = xenU - -LINUX_DIR = $(OS)-$(LINUX_VER)-$(EXTRAVERSION) - -include buildconfigs/Rules.mk - -.PHONY: build clean delete - -# The real action starts here! -build: $(LINUX_DIR)/include/linux/autoconf.h - if grep "^CONFIG_MODULES=" $(LINUX_DIR)/.config ; then \ - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) modules ; \ - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_MOD_PATH=$(DESTDIR) modules_install ; \ - fi - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_PATH=$(DESTDIR) install - -$(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref - rm -rf $(LINUX_DIR) - cp -al $(<D) $(LINUX_DIR) - # Apply arch-xen patches - ( cd linux-$(LINUX_SERIES)-xen-sparse ; \ - LINUX_ARCH=$(LINUX_ARCH) ./mkbuildtree ../$(LINUX_DIR) ) - # Re-use config from install dir if one exits else use default config - CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \ - [ -r $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \ - cp $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \ - || cp $(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \ - $(LINUX_DIR)/.config - # See if we need to munge config to enable PAE - $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk config-update-pae - # Patch kernel Makefile to set EXTRAVERSION - ( cd $(LINUX_DIR) ; \ - sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST = -$(EXTRAVERSION)/' Makefile >Mk.tmp ; \ - rm -f Makefile ; mv Mk.tmp Makefile ) - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) oldconfig - -config: CONFIGMODE = menuconfig -config: $(LINUX_DIR)/include/linux/autoconf.h - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) $(CONFIGMODE) - -clean:: - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) clean - -delete: - rm -rf tmp-$(OS)-$(LINUX_VER) $(LINUX_DIR) +include buildconfigs/mk.linux-2.6-xen diff -r 0ba10f7fef51 -r 4e0c94871be2 docs/misc/xen_config.html --- a/docs/misc/xen_config.html Sat Oct 8 17:37:45 2005 +++ b/docs/misc/xen_config.html Sat Oct 8 20:28:24 2005 @@ -154,7 +154,7 @@ (memory 64) (image (linux - (kernel /boot/vmlinuz-2.4.26-xen) + (kernel /boot/vmlinuz-2.6.12-xen) (ip ::::xendom1:eth0:dhcp) (root /dev/xda1) (args 'rw fastboot 4') diff -r 0ba10f7fef51 -r 4e0c94871be2 docs/src/user/installation.tex --- a/docs/src/user/installation.tex Sat Oct 8 17:37:45 2005 +++ b/docs/src/user/installation.tex Sat Oct 8 20:28:24 2005 @@ -1,7 +1,7 @@ \chapter{Installation} The Xen distribution includes three main components: Xen itself, ports -of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the userspace +of Linux and NetBSD to run on Xen, and the userspace tools required to manage a Xen-based system. This chapter describes how to install the Xen~2.0 distribution from source. Alternatively, there may be pre-built packages available as part of your operating @@ -169,7 +169,7 @@ You can edit this line to include any set of operating system kernels which have configurations in the top-level \path{buildconfigs/} -directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4 +directory, for example \path{mk.linux-2.6-xenU} to build a Linux 2.6 kernel containing only virtual device drivers. %% Inspect the Makefile if you want to see what goes on during a @@ -190,10 +190,6 @@ %% mkbuildtree} script to add the Xen patches to the kernel. -%% The procedure is similar to build the Linux 2.4 port: \\ -%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24! - - %% \framebox{\parbox{5in}{ %% {\bf Distro specific:} \\ %% {\it Gentoo} --- if not using udev (most installations, diff -r 0ba10f7fef51 -r 4e0c94871be2 docs/src/user/introduction.tex --- a/docs/src/user/introduction.tex Sat Oct 8 17:37:45 2005 +++ b/docs/src/user/introduction.tex Sat Oct 8 20:28:24 2005 @@ -28,7 +28,7 @@ space applications and libraries \emph{do not} require modification. Xen support is available for increasingly many operating systems: -right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0. +right now, Linux and NetBSD are available for Xen 2.0. A FreeBSD port is undergoing testing and will be incorporated into the release soon. Other OS ports, including Plan 9, are in progress. We hope that that arch-xen patches will be incorporated into the diff -r 0ba10f7fef51 -r 4e0c94871be2 install.sh --- a/install.sh Sat Oct 8 17:37:45 2005 +++ b/install.sh Sat Oct 8 20:28:24 2005 @@ -23,9 +23,15 @@ fi echo "Installing Xen from '$src' to '$dst'..." -(cd $src; tar -cf - --exclude etc/init.d * ) | tar -C $dst -xf - +(cd $src; tar -cf - --exclude etc/init.d --exclude etc/hotplug --exclude etc/udev * ) | tar -C $dst -xf - cp -fdRL $src/etc/init.d/* $dst/etc/init.d/ echo "All done." + +if [ -x /sbin/udev ] && [ ! -z `udev -V` ] && [ `/sbin/udev -V` -ge 059 ]; then + cp -f $src/etc/udev/rules.d/*.rules $dst/etc/udev/rules.d/ +else + cp -f $src/etc/hotplug/*.agent $dst/etc/hotplug/ +fi echo "Checking to see whether prerequisite tools are installed..." cd $src/../check diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/ia64/Makefile --- a/linux-2.6-xen-sparse/arch/ia64/Makefile Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/ia64/Makefile Sat Oct 8 20:28:24 2005 @@ -88,7 +88,6 @@ CLEAN_FILES += include/asm-ia64/.offsets.h.stamp vmlinux.gz bootloader #CLEAN_FILES += include/asm-xen/xen-public include/asm-ia64/xen/asm-xsi-offsets.h -#CLEAN_FILES += include/asm-xen/linux-public/xenstored.h #CLEAN_FILES += include/asm-xen/linux-public include/asm-xen/asm-ia64/hypervisor.h MRPROPER_FILES += include/asm-ia64/offsets.h @@ -119,9 +118,6 @@ # [ -e include/asm-xen/linux-public ] \ # || ln -s $(XEN_PATH)/linux-2.6-xen-sparse/include/asm-xen/linux-public \ include/asm-xen/linux-public - [ -e include/asm-xen/linux-public/xenstored.h ] \ - || ln -s $(XEN_PATH)/tools/xenstore/xenstored.h \ - include/asm-xen/linux-public/xenstored.h [ -e include/asm-xen/asm-ia64/hypervisor.h ] \ || ln -s $(XEN_PATH)/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h \ include/asm-xen/asm-ia64/hypervisor.h diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/Makefile --- a/linux-2.6-xen-sparse/arch/xen/Makefile Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/Makefile Sat Oct 8 20:28:24 2005 @@ -63,6 +63,9 @@ vmlinuz: vmlinux $(Q)$(MAKE) $(build)=arch/xen/boot vmlinuz +bzImage: vmlinuz + $(Q)$(MAKE) $(build)=arch/xen/boot bzImage + XINSTALL_NAME ?= $(KERNELRELEASE) install: vmlinuz install kernel_install: diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/boot/Makefile --- a/linux-2.6-xen-sparse/arch/xen/boot/Makefile Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/boot/Makefile Sat Oct 8 20:28:24 2005 @@ -6,3 +6,6 @@ vmlinux-stripped: vmlinux FORCE $(call if_changed,objcopy) + +bzImage: vmlinuz + $(Q)$(LN) -sf ../../../vmlinuz $(srctree)/arch/xen/boot/bzImage diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Sat Oct 8 20:28:24 2005 @@ -86,7 +86,7 @@ $(call if_changed,syscall) c-link := -s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o vsyscall.lds.o vsyscall-note.o +s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o vsyscall.lds.o $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)): @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Sat Oct 8 20:28:24 2005 @@ -49,6 +49,7 @@ #include <asm/irq.h> #include <asm/desc.h> #include <asm-xen/xen-public/physdev.h> +#include <asm-xen/xen-public/vcpu.h> #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> #endif @@ -105,7 +106,8 @@ local_irq_enable(); } else { stop_hz_timer(); - HYPERVISOR_block(); /* implicit local_irq_enable() */ + /* Blocking includes an implicit local_irq_enable(). */ + HYPERVISOR_sched_op(SCHEDOP_block, 0); start_hz_timer(); } } @@ -121,7 +123,7 @@ { /* Death loop */ while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) - HYPERVISOR_yield(); + HYPERVISOR_sched_op(SCHEDOP_yield, 0); __flush_tlb_all(); /* @@ -140,6 +142,13 @@ BUG(); } #endif /* CONFIG_HOTPLUG_CPU */ + +void cpu_restore(void) +{ + play_dead(); + local_irq_enable(); + cpu_idle(); +} /* * The idle thread. There's no useful work to be @@ -171,7 +180,7 @@ don't printk. */ __get_cpu_var(cpu_state) = CPU_DEAD; /* Tell hypervisor to take vcpu down. */ - HYPERVISOR_vcpu_down(cpu); + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); #endif play_dead(); local_irq_enable(); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Sat Oct 8 20:28:24 2005 @@ -365,6 +365,7 @@ /* Raw start-of-day parameters from the hypervisor. */ start_info_t *xen_start_info; +EXPORT_SYMBOL(xen_start_info); static void __init limit_regions(unsigned long long size) { @@ -1783,7 +1784,7 @@ static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - HYPERVISOR_crash(); + HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_crash); /* we're never actually going to get here... */ return NOTIFY_DONE; } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Sat Oct 8 20:28:24 2005 @@ -63,6 +63,7 @@ #include <smpboot_hooks.h> #include <asm-xen/evtchn.h> +#include <asm-xen/xen-public/vcpu.h> /* Set if we find a B stepping CPU */ static int __initdata smp_b_stepping; @@ -802,7 +803,6 @@ extern void hypervisor_callback(void); extern void failsafe_callback(void); extern void smp_trap_init(trap_info_t *); - int i; cpu = ++cpucount; /* @@ -853,12 +853,6 @@ /* FPU is set up to default initial state. */ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); - /* Virtual IDT is empty at start-of-day. */ - for ( i = 0; i < 256; i++ ) - { - ctxt.trap_ctxt[i].vector = i; - ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS; - } smp_trap_init(ctxt.trap_ctxt); /* No LDT. */ @@ -889,11 +883,13 @@ ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT; - boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt); + boot_error = HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt); if (boot_error) printk("boot error: %ld\n", boot_error); if (!boot_error) { + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + /* * allow APs to start initializing. */ @@ -1506,7 +1502,7 @@ #ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_XEN /* Tell hypervisor to bring vcpu up. */ - HYPERVISOR_vcpu_up(cpu); + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); #endif /* Already up, and in cpu_quiescent now? */ if (cpu_isset(cpu, smp_commenced_mask)) { @@ -1585,61 +1581,49 @@ local_setup_timer_irq(); } -static atomic_t vcpus_rebooting; - -static void restore_vcpu_ready(void) -{ - - atomic_dec(&vcpus_rebooting); -} - -void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) -{ - int r; - int gdt_pages; - r = HYPERVISOR_vcpu_pickle(vcpu, ctxt); - if (r != 0) - panic("pickling vcpu %d -> %d!\n", vcpu, r); - - /* Translate from machine to physical addresses where necessary, - so that they can be translated to our new machine address space - after resume. libxc is responsible for doing this to vcpu0, - but we do it to the others. */ - gdt_pages = (ctxt->gdt_ents + 511) / 512; - ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]); - for (r = 0; r < gdt_pages; r++) - ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]); -} - -int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) -{ - int r; - int gdt_pages = (ctxt->gdt_ents + 511) / 512; - - /* This is kind of a hack, and implicitly relies on the fact that - the vcpu stops in a place where all of the call clobbered - registers are already dead. */ - ctxt->user_regs.esp -= 4; - ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip; - ctxt->user_regs.eip = (unsigned long)restore_vcpu_ready; - - /* De-canonicalise. libxc handles this for vcpu 0, but we need - to do it for the other vcpus. */ - ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]); - for (r = 0; r < gdt_pages; r++) - ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]); - - atomic_set(&vcpus_rebooting, 1); - r = HYPERVISOR_boot_vcpu(vcpu, ctxt); - if (r != 0) { - printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r); - return -1; - } - - /* Make sure we wait for the new vcpu to come up before trying to do - anything with it or starting the next one. */ - while (atomic_read(&vcpus_rebooting)) - barrier(); - - return 0; -} +void vcpu_prepare(int vcpu) +{ + extern void hypervisor_callback(void); + extern void failsafe_callback(void); + extern void smp_trap_init(trap_info_t *); + extern void cpu_restore(void); + vcpu_guest_context_t ctxt; + struct task_struct *idle = idle_task(vcpu); + + if (vcpu == 0) + return; + + memset(&ctxt, 0, sizeof(ctxt)); + + ctxt.user_regs.ds = __USER_DS; + ctxt.user_regs.es = __USER_DS; + ctxt.user_regs.fs = 0; + ctxt.user_regs.gs = 0; + ctxt.user_regs.ss = __KERNEL_DS; + ctxt.user_regs.cs = __KERNEL_CS; + ctxt.user_regs.eip = (unsigned long)cpu_restore; + ctxt.user_regs.esp = idle->thread.esp; + ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1; + + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); + + smp_trap_init(ctxt.trap_ctxt); + + ctxt.ldt_ents = 0; + + ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[vcpu].address); + ctxt.gdt_ents = cpu_gdt_descr[vcpu].size / 8; + + ctxt.kernel_ss = __KERNEL_DS; + ctxt.kernel_sp = idle->thread.esp0; + + ctxt.event_callback_cs = __KERNEL_CS; + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; + ctxt.failsafe_callback_cs = __KERNEL_CS; + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + + ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT; + + (void)HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt); + (void)HYPERVISOR_vcpu_op(VCPUOP_up, vcpu, NULL); +} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Sat Oct 8 20:28:24 2005 @@ -1012,6 +1012,12 @@ void smp_trap_init(trap_info_t *trap_ctxt) { trap_info_t *t = trap_table; + int i; + + for (i = 0; i < 256; i++) { + trap_ctxt[i].vector = i; + trap_ctxt[i].cs = FLAT_KERNEL_CS; + } for (t = trap_table; t->address; t++) { trap_ctxt[t->vector].flags = t->flags; diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Sat Oct 8 20:28:24 2005 @@ -52,24 +52,30 @@ pgprot_t prot, domid_t domid) { - int i; + int i, rc; unsigned long start_address; -#define MAX_DIRECTMAP_MMU_QUEUE 130 - mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u, *w = u; + mmu_update_t *u, *v, *w; + + u = v = w = (mmu_update_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + if (u == NULL) + return -ENOMEM; start_address = address; flush_cache_all(); for (i = 0; i < size; i += PAGE_SIZE) { - if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) { + if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) { /* Fill in the PTE pointers. */ - generic_page_range(mm, start_address, - address - start_address, - direct_remap_area_pte_fn, &w); + rc = generic_page_range(mm, start_address, + address - start_address, + direct_remap_area_pte_fn, &w); + if (rc) + goto out; w = u; + rc = -EFAULT; if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) - return -EFAULT; + goto out; v = u; start_address = address; } @@ -89,13 +95,19 @@ /* get the ptep's filled in */ generic_page_range(mm, start_address, address - start_address, direct_remap_area_pte_fn, &w); + rc = -EFAULT; if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)) - return -EFAULT; - } - + goto out; + } + + rc = 0; + + out: flush_tlb_all(); - return 0; + free_page((unsigned long)u); + + return rc; } int direct_remap_pfn_range(struct vm_area_struct *vma, diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Sat Oct 8 20:28:24 2005 @@ -180,15 +180,15 @@ int bind_virq_to_irq(int virq) { - evtchn_op_t op; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_virq }; int evtchn, irq; int cpu = smp_processor_id(); spin_lock(&irq_mapping_update_lock); if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) { - op.cmd = EVTCHNOP_bind_virq; op.u.bind_virq.virq = virq; + op.u.bind_virq.vcpu = cpu; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); evtchn = op.u.bind_virq.port; @@ -211,7 +211,7 @@ void unbind_virq_from_irq(int virq) { - evtchn_op_t op; + evtchn_op_t op = { .cmd = EVTCHNOP_close }; int cpu = smp_processor_id(); int irq = per_cpu(virq_to_irq, cpu)[virq]; int evtchn = irq_to_evtchn[irq]; @@ -219,8 +219,6 @@ spin_lock(&irq_mapping_update_lock); if (--irq_bindcount[irq] == 0) { - op.cmd = EVTCHNOP_close; - op.u.close.dom = DOMID_SELF; op.u.close.port = evtchn; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); @@ -244,14 +242,14 @@ int bind_ipi_to_irq(int ipi) { - evtchn_op_t op; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_ipi }; int evtchn, irq; int cpu = smp_processor_id(); spin_lock(&irq_mapping_update_lock); if ((evtchn = per_cpu(ipi_to_evtchn, cpu)[ipi]) == -1) { - op.cmd = EVTCHNOP_bind_ipi; + op.u.bind_ipi.vcpu = cpu; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); evtchn = op.u.bind_ipi.port; @@ -276,7 +274,7 @@ void unbind_ipi_from_irq(int ipi) { - evtchn_op_t op; + evtchn_op_t op = { .cmd = EVTCHNOP_close }; int cpu = smp_processor_id(); int evtchn = per_cpu(ipi_to_evtchn, cpu)[ipi]; int irq = evtchn_to_irq[evtchn]; @@ -284,8 +282,6 @@ spin_lock(&irq_mapping_update_lock); if (--irq_bindcount[irq] == 0) { - op.cmd = EVTCHNOP_close; - op.u.close.dom = DOMID_SELF; op.u.close.port = evtchn; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); @@ -300,7 +296,7 @@ } EXPORT_SYMBOL(unbind_ipi_from_irq); -int bind_evtchn_to_irq(unsigned int evtchn) +static int bind_evtchn_to_irq(unsigned int evtchn) { int irq; @@ -318,18 +314,15 @@ return irq; } -EXPORT_SYMBOL(bind_evtchn_to_irq); - -void unbind_evtchn_from_irq(unsigned int irq) -{ - evtchn_op_t op; + +static void unbind_evtchn_from_irq(unsigned int irq) +{ + evtchn_op_t op = { .cmd = EVTCHNOP_close }; int evtchn = irq_to_evtchn[irq]; spin_lock(&irq_mapping_update_lock); if ((--irq_bindcount[irq] == 0) && (evtchn != -1)) { - op.cmd = EVTCHNOP_close; - op.u.close.dom = DOMID_SELF; op.u.close.port = evtchn; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); @@ -339,7 +332,6 @@ spin_unlock(&irq_mapping_update_lock); } -EXPORT_SYMBOL(unbind_evtchn_from_irq); int bind_evtchn_to_irqhandler( unsigned int evtchn, @@ -353,8 +345,10 @@ irq = bind_evtchn_to_irq(evtchn); retval = request_irq(irq, handler, irqflags, devname, dev_id); - if (retval != 0) + if (retval != 0) { unbind_evtchn_from_irq(irq); + return retval; + } return irq; } @@ -376,7 +370,7 @@ /* Rebind an evtchn so that it gets delivered to a specific cpu */ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { - evtchn_op_t op; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_vcpu }; int evtchn; spin_lock(&irq_mapping_update_lock); @@ -387,7 +381,6 @@ } /* Send future instances of this interrupt to other vcpu. */ - op.cmd = EVTCHNOP_bind_vcpu; op.u.bind_vcpu.port = evtchn; op.u.bind_vcpu.vcpu = tcpu; @@ -516,10 +509,12 @@ static unsigned int startup_pirq(unsigned int irq) { - evtchn_op_t op; - int evtchn; - - op.cmd = EVTCHNOP_bind_pirq; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_pirq }; + int evtchn = irq_to_evtchn[irq]; + + if (VALID_EVTCHN(evtchn)) + goto out; + op.u.bind_pirq.pirq = irq; /* NB. We are happy to share unless we are probing. */ op.u.bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; @@ -537,6 +532,7 @@ evtchn_to_irq[evtchn] = irq; irq_to_evtchn[irq] = evtchn; + out: unmask_evtchn(evtchn); pirq_unmask_notify(irq_to_pirq(irq)); @@ -545,7 +541,7 @@ static void shutdown_pirq(unsigned int irq) { - evtchn_op_t op; + evtchn_op_t op = { .cmd = EVTCHNOP_close }; int evtchn = irq_to_evtchn[irq]; if (!VALID_EVTCHN(evtchn)) @@ -553,8 +549,6 @@ mask_evtchn(evtchn); - op.cmd = EVTCHNOP_close; - op.u.close.dom = DOMID_SELF; op.u.close.port = evtchn; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); @@ -629,6 +623,7 @@ if (VALID_EVTCHN(evtchn)) notify_remote_via_evtchn(evtchn); } +EXPORT_SYMBOL(notify_remote_via_irq); void irq_resume(void) { @@ -663,8 +658,10 @@ continue; /* Get a new binding from Xen. */ + memset(&op, 0, sizeof(op)); op.cmd = EVTCHNOP_bind_virq; op.u.bind_virq.virq = virq; + op.u.bind_virq.vcpu = 0; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); evtchn = op.u.bind_virq.port; @@ -685,7 +682,9 @@ evtchn_to_irq[evtchn] = -1; /* Get a new binding from Xen. */ + memset(&op, 0, sizeof(op)); op.cmd = EVTCHNOP_bind_ipi; + op.u.bind_ipi.vcpu = 0; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); evtchn = op.u.bind_ipi.port; diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/kernel/reboot.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Sat Oct 8 20:28:24 2005 @@ -20,13 +20,19 @@ #define SHUTDOWN_POWEROFF 0 #define SHUTDOWN_REBOOT 1 #define SHUTDOWN_SUSPEND 2 +// Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only +// report a crash, not be instructed to crash! +// HALT is the same as POWEROFF, as far as we're concerned. The tools use +// the distinction when we return the reason code to them. +#define SHUTDOWN_HALT 4 + void machine_restart(char * __unused) { /* We really want to get pending console data out before we die. */ extern void xencons_force_flush(void); xencons_force_flush(); - HYPERVISOR_reboot(); + HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_reboot); } void machine_halt(void) @@ -39,7 +45,7 @@ /* We really want to get pending console data out before we die. */ extern void xencons_force_flush(void); xencons_force_flush(); - HYPERVISOR_shutdown(); + HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_poweroff); } int reboot_thru_bios = 0; /* for dmi_scan.c */ @@ -74,11 +80,8 @@ extern unsigned long *pfn_to_mfn_frame_list[]; #ifdef CONFIG_SMP - static vcpu_guest_context_t suspended_cpu_records[NR_CPUS]; - cpumask_t prev_online_cpus, prev_present_cpus; - - void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); - int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); + cpumask_t prev_online_cpus; + int vcpu_prepare(int vcpu); #endif extern void xencons_resume(void); @@ -98,25 +101,20 @@ xenbus_suspend(); - preempt_disable(); + lock_cpu_hotplug(); #ifdef CONFIG_SMP - /* Take all of the other cpus offline. We need to be careful not - to get preempted between the final test for num_online_cpus() - == 1 and disabling interrupts, since otherwise userspace could - bring another cpu online, and then we'd be stuffed. At the - same time, cpu_down can reschedule, so we need to enable - preemption while doing that. This kind of sucks, but should be - correct. */ - /* (We don't need to worry about other cpus bringing stuff up, - since by the time num_online_cpus() == 1, there aren't any - other cpus) */ + /* + * Take all other CPUs offline. We hold the hotplug semaphore to + * avoid other processes bringing up CPUs under our feet. + */ cpus_clear(prev_online_cpus); while (num_online_cpus() > 1) { - preempt_enable(); for_each_online_cpu(i) { if (i == 0) continue; + unlock_cpu_hotplug(); err = cpu_down(i); + lock_cpu_hotplug(); if (err != 0) { printk(KERN_CRIT "Failed to take all CPUs " "down: %d.\n", err); @@ -124,39 +122,32 @@ } cpu_set(i, prev_online_cpus); } - preempt_disable(); - } -#endif - - __cli(); - - preempt_enable(); - -#ifdef CONFIG_SMP - cpus_clear(prev_present_cpus); - for_each_present_cpu(i) { - if (i == 0) - continue; - save_vcpu_context(i, &suspended_cpu_records[i]); - cpu_set(i, prev_present_cpus); - } -#endif - - gnttab_suspend(); + } +#endif + + preempt_disable(); #ifdef __i386__ mm_pin_all(); kmem_cache_shrink(pgd_cache); #endif + __cli(); + preempt_enable(); + unlock_cpu_hotplug(); + + gnttab_suspend(); + HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; clear_fixmap(FIX_SHARED_INFO); xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); xen_start_info->console_mfn = mfn_to_pfn(xen_start_info->console_mfn); - /* We'll stop somewhere inside this hypercall. When it returns, - we'll start resuming after the restore. */ + /* + * We'll stop somewhere inside this hypercall. When it returns, + * we'll start resuming after the restore. + */ HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); shutting_down = SHUTDOWN_INVALID; @@ -189,18 +180,16 @@ time_resume(); + __sti(); + + xencons_resume(); + + xenbus_resume(); + #ifdef CONFIG_SMP - for_each_cpu_mask(i, prev_present_cpus) - restore_vcpu_context(i, &suspended_cpu_records[i]); -#endif - - __sti(); - - xencons_resume(); - - xenbus_resume(); - -#ifdef CONFIG_SMP + for_each_present_cpu(i) + vcpu_prepare(i); + out_reenable_cpus: for_each_cpu_mask(i, prev_online_cpus) { j = cpu_up(i); @@ -230,6 +219,7 @@ switch (shutting_down) { case SHUTDOWN_POWEROFF: + case SHUTDOWN_HALT: if (execve("/sbin/poweroff", poweroff_argv, envp) < 0) { sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, @@ -311,6 +301,8 @@ shutting_down = SHUTDOWN_REBOOT; else if (strcmp(str, "suspend") == 0) shutting_down = SHUTDOWN_SUSPEND; + else if (strcmp(str, "halt") == 0) + shutting_down = SHUTDOWN_HALT; else { printk("Ignoring shutdown request: %s\n", str); shutting_down = SHUTDOWN_INVALID; diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Sat Oct 8 20:28:24 2005 @@ -51,7 +51,7 @@ cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../../i386/kernel/cpuid.o topology-y += ../../../i386/mach-default/topology.o #swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o -microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../../i386/kernel/microcode.o +microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o intel_cacheinfo-y += ../../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c Sat Oct 8 20:28:24 2005 @@ -96,7 +96,8 @@ local_irq_enable(); } else { stop_hz_timer(); - HYPERVISOR_block(); /* implicit local_irq_enable() */ + /* Blocking includes an implicit local_irq_enable(). */ + HYPERVISOR_sched_op(SCHEDOP_block, 0); start_hz_timer(); } } @@ -114,7 +115,7 @@ * it "work" for testing purposes. */ /* Death loop */ while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) - HYPERVISOR_yield(); + HYPERVISOR_sched_op(SCHEDOP_yield, 0); local_irq_disable(); __flush_tlb_all(); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Sat Oct 8 20:28:24 2005 @@ -86,6 +86,7 @@ /* Raw start-of-day parameters from the hypervisor. */ start_info_t *xen_start_info; +EXPORT_SYMBOL(xen_start_info); #endif /* diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Sat Oct 8 20:28:24 2005 @@ -62,8 +62,8 @@ #include <asm/nmi.h> #ifdef CONFIG_XEN #include <asm/arch_hooks.h> - #include <asm-xen/evtchn.h> +#include <asm-xen/xen-public/vcpu.h> #endif /* Change for real CPU hotplug. Note other files need to be fixed @@ -742,12 +742,6 @@ /* FPU is set up to default initial state. */ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); - /* Virtual IDT is empty at start-of-day. */ - for ( i = 0; i < 256; i++ ) - { - ctxt.trap_ctxt[i].vector = i; - ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS; - } smp_trap_init(ctxt.trap_ctxt); /* No LDT. */ @@ -777,11 +771,13 @@ ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT; - boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt); + boot_error = HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt); if (boot_error) printk("boot error: %ld\n", boot_error); if (!boot_error) { + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + /* * allow APs to start initializing. */ @@ -1267,13 +1263,8 @@ local_setup_timer_irq(); } -void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) -{ -} - -int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) -{ - return 0; -} - -#endif +void vcpu_prepare(int vcpu) +{ +} + +#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Sat Oct 8 20:28:24 2005 @@ -956,6 +956,12 @@ void smp_trap_init(trap_info_t *trap_ctxt) { trap_info_t *t = trap_table; + int i; + + for (i = 0; i < 256; i++) { + trap_ctxt[i].vector = i; + trap_ctxt[i].cs = FLAT_KERNEL_CS; + } for (t = trap_table; t->address; t++) { trap_ctxt[t->vector].flags = t->flags; diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/char/tpm/Makefile --- a/linux-2.6-xen-sparse/drivers/char/tpm/Makefile Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/Makefile Sat Oct 8 20:28:24 2005 @@ -5,8 +5,9 @@ obj-$(CONFIG_TCG_TPM) += tpm.o obj-$(CONFIG_TCG_NSC) += tpm_nsc.o obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o -obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o +obj-$(CONFIG_TCG_TIS) += tpm_tis.o +obj-$(CONFIG_TCG_XEN) += tpm_xen.o else -obj-$(CONFIG_TCG_TPM) += tpm_nopci.o +obj-$(CONFIG_TCG_TPM) += tpm.o obj-$(CONFIG_TCG_XEN) += tpm_xen.o endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Sat Oct 8 20:28:24 2005 @@ -25,7 +25,7 @@ #include <linux/tpmfe.h> #include <linux/device.h> #include <linux/interrupt.h> -#include "tpm_nopci.h" +#include "tpm.h" /* read status bits */ enum { @@ -434,6 +434,21 @@ .release = tpm_release, }; +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel); + +static struct attribute* xen_attrs[] = { + &dev_attr_pubek.attr, + &dev_attr_pcrs.attr, + &dev_attr_caps.attr, + &dev_attr_cancel.attr, + 0, +}; + +static struct attribute_group xen_attr_grp = { .attrs = xen_attrs }; + static struct tpm_vendor_specific tpm_xen = { .recv = tpm_xen_recv, .send = tpm_xen_send, @@ -443,8 +458,9 @@ .req_complete_val = STATUS_DATA_AVAIL, .req_canceled = STATUS_READY, .base = 0, - .attr = TPM_DEVICE_ATTRS, + .attr_group = &xen_attr_grp, .miscdev.fops = &tpm_xen_ops, + .buffersize = 64 * 1024, }; static struct device tpm_device = { @@ -477,7 +493,9 @@ return rc; } - if ((rc = tpm_register_hardware_nopci(&tpm_device, &tpm_xen)) < 0) { + tpm_xen.buffersize = tpmfe.max_tx_size; + + if ((rc = tpm_register_hardware(&tpm_device, &tpm_xen)) < 0) { device_unregister(&tpm_device); tpm_fe_unregister_receiver(); return rc; diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Sat Oct 8 20:28:24 2005 @@ -351,7 +351,8 @@ }; /* React to a change in the target key */ -static void watch_target(struct xenbus_watch *watch, const char *node) +static void watch_target(struct xenbus_watch *watch, + const char **vec, unsigned int len) { unsigned long long new_target; int err; diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sat Oct 8 20:28:24 2005 @@ -68,8 +68,15 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) { blkif_sring_t *sring; - evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; int err; + evtchn_op_t op = { + .cmd = EVTCHNOP_bind_interdomain, + .u.bind_interdomain.remote_dom = blkif->domid, + .u.bind_interdomain.remote_port = evtchn }; + + /* Already connected through? */ + if (blkif->irq) + return 0; if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) return -ENOMEM; @@ -80,10 +87,6 @@ return err; } - op.u.bind_interdomain.dom1 = DOMID_SELF; - op.u.bind_interdomain.dom2 = blkif->domid; - op.u.bind_interdomain.port1 = 0; - op.u.bind_interdomain.port2 = evtchn; err = HYPERVISOR_event_channel_op(&op); if (err) { unmap_frontend_page(blkif); @@ -91,7 +94,7 @@ return err; } - blkif->evtchn = op.u.bind_interdomain.port1; + blkif->evtchn = op.u.bind_interdomain.local_port; sring = (blkif_sring_t *)blkif->blk_ring_area->addr; SHARED_RING_INIT(sring); @@ -108,8 +111,12 @@ { blkif_t *blkif = (blkif_t *)arg; - if (blkif->irq) - unbind_evtchn_from_irqhandler(blkif->irq, blkif); + /* Already disconnected? */ + if (!blkif->irq) + return; + + unbind_evtchn_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; vbd_free(&blkif->vbd); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Sat Oct 8 20:28:24 2005 @@ -55,7 +55,8 @@ } /* Front end tells us frame. */ -static void frontend_changed(struct xenbus_watch *watch, const char *node) +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) { unsigned long ring_ref; unsigned int evtchn; @@ -64,7 +65,7 @@ = container_of(watch, struct backend_info, watch); /* If other end is gone, delete ourself. */ - if (node && !xenbus_exists(be->frontpath, "")) { + if (vec && !xenbus_exists(be->frontpath, "")) { device_unregister(&be->dev->dev); return; } @@ -143,7 +144,8 @@ We provide event channel and device details to front end. Frontend supplies shared frame and event channel. */ -static void backend_changed(struct xenbus_watch *watch, const char *node) +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) { int err; char *p; @@ -195,7 +197,7 @@ } /* Pass in NULL node to skip exist test. */ - frontend_changed(&be->watch, NULL); + frontend_changed(&be->watch, NULL, 0); } } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Sat Oct 8 20:28:24 2005 @@ -53,8 +53,6 @@ #define BLKIF_STATE_DISCONNECTED 0 #define BLKIF_STATE_CONNECTED 1 -static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED; - #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) #define GRANT_INVALID_REF 0 @@ -444,12 +442,16 @@ { "" } }; -static void watch_for_status(struct xenbus_watch *watch, const char *node) +static void watch_for_status(struct xenbus_watch *watch, + const char **vec, unsigned int len) { struct blkfront_info *info; unsigned int binfo; unsigned long sectors, sector_size; int err; + const char *node; + + node = vec[XS_WATCH_PATH]; info = container_of(watch, struct blkfront_info, watch); node += strlen(watch->node); @@ -472,8 +474,6 @@ info->connected = BLKIF_STATE_CONNECTED; xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); - blkif_state = BLKIF_STATE_CONNECTED; - xenbus_dev_ok(info->xbdev); /* Kick pending requests. */ @@ -485,8 +485,11 @@ static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { blkif_sring_t *sring; - evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound }; int err; + evtchn_op_t op = { + .cmd = EVTCHNOP_alloc_unbound, + .u.alloc_unbound.dom = DOMID_SELF, + .u.alloc_unbound.remote_dom = info->backend_id }; info->ring_ref = GRANT_INVALID_REF; @@ -508,7 +511,6 @@ } info->ring_ref = err; - op.u.alloc_unbound.dom = info->backend_id; err = HYPERVISOR_event_channel_op(&op); if (err) { gnttab_end_foreign_access(info->ring_ref, 0); @@ -518,7 +520,9 @@ xenbus_dev_error(dev, err, "allocating event channel"); return err; } + blkif_connect(info, op.u.alloc_unbound.port); + return 0; } @@ -652,8 +656,17 @@ return err; } - /* Call once in case entries already there. */ - watch_for_status(&info->watch, info->watch.node); + { + unsigned int len = max(XS_WATCH_PATH, XS_WATCH_TOKEN) + 1; + const char *vec[len]; + + vec[XS_WATCH_PATH] = info->watch.node; + vec[XS_WATCH_TOKEN] = NULL; + + /* Call once in case entries already there. */ + watch_for_status(&info->watch, vec, len); + } + return 0; } @@ -712,29 +725,7 @@ static void __init init_blk_xenbus(void) { - xenbus_register_device(&blkfront); -} - -static int wait_for_blkif(void) -{ - int err = 0; - int i; - - /* - * We should figure out how many and which devices we need to - * proceed and only wait for those. For now, continue once the - * first device is around. - */ - for (i = 0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - if (blkif_state != BLKIF_STATE_CONNECTED) { - WPRINTK("Timeout connecting to device!\n"); - err = -ENOSYS; - } - return err; + xenbus_register_driver(&blkfront); } static int __init xlblk_init(void) @@ -746,8 +737,6 @@ IPRINTK("Initialising virtual block device driver\n"); init_blk_xenbus(); - - wait_for_blkif(); return 0; } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Sat Oct 8 20:28:24 2005 @@ -238,6 +238,7 @@ gd->first_minor = minor; gd->fops = &xlvbd_block_fops; gd->private_data = info; + gd->driverfs_dev = &(info->xbdev->dev); set_capacity(gd, capacity); if (xlvbd_init_blk_queue(gd, sector_size)) { diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Sat Oct 8 20:28:24 2005 @@ -68,8 +68,11 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) { blkif_sring_t *sring; - evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; int err; + evtchn_op_t op = { + .cmd = EVTCHNOP_bind_interdomain, + .u.bind_interdomain.remote_dom = blkif->domid, + .u.bind_interdomain.remote_port = evtchn }; if ((blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL) return -ENOMEM; @@ -80,10 +83,6 @@ return err; } - op.u.bind_interdomain.dom1 = DOMID_SELF; - op.u.bind_interdomain.dom2 = blkif->domid; - op.u.bind_interdomain.port1 = 0; - op.u.bind_interdomain.port2 = evtchn; err = HYPERVISOR_event_channel_op(&op); if (err) { unmap_frontend_page(blkif); @@ -91,7 +90,7 @@ return err; } - blkif->evtchn = op.u.bind_interdomain.port1; + blkif->evtchn = op.u.bind_interdomain.local_port; sring = (blkif_sring_t *)blkif->blk_ring_area->addr; SHARED_RING_INIT(sring); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Sat Oct 8 20:28:24 2005 @@ -59,7 +59,8 @@ } /* Front end tells us frame. */ -static void frontend_changed(struct xenbus_watch *watch, const char *node) +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) { unsigned long ring_ref; unsigned int evtchn; @@ -68,7 +69,7 @@ = container_of(watch, struct backend_info, watch); /* If other end is gone, delete ourself. */ - if (node && !xenbus_exists(be->frontpath, "")) { + if (vec && !xenbus_exists(be->frontpath, "")) { xenbus_rm(be->dev->nodename, ""); device_unregister(&be->dev->dev); return; @@ -106,7 +107,8 @@ We provide event channel and device details to front end. Frontend supplies shared frame and event channel. */ -static void backend_changed(struct xenbus_watch *watch, const char *node) +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) { int err; char *p; @@ -129,7 +131,7 @@ } /* Pass in NULL node to skip exist test. */ - frontend_changed(&be->watch, NULL); + frontend_changed(&be->watch, NULL, 0); } } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c --- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c Sat Oct 8 20:28:24 2005 @@ -65,7 +65,8 @@ int xencons_ring_send(const char *data, unsigned len) { int sent = __xencons_ring_send(outring(), data, len); - notify_remote_via_irq(xencons_irq); + /* Use evtchn: this is called early, before irq is set up. */ + notify_remote_via_evtchn(xen_start_info->console_evtchn); return sent; } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Sat Oct 8 20:28:24 2005 @@ -44,9 +44,9 @@ #include <linux/poll.h> #include <linux/irq.h> #include <linux/init.h> -#define XEN_EVTCHN_MASK_OPS +#include <linux/gfp.h> #include <asm-xen/evtchn.h> -#include <linux/gfp.h> +#include <asm-xen/linux-public/evtchn.h> struct per_user_data { /* Notification ring, accessed via /dev/xen/evtchn. */ @@ -78,7 +78,8 @@ u->ring[EVTCHN_RING_MASK(u->ring_prod)] = (u16)port; if (u->ring_cons == u->ring_prod++) { wake_up_interruptible(&u->evtchn_wait); - kill_fasync(&u->evtchn_async_queue, SIGIO, POLL_IN); + kill_fasync(&u->evtchn_async_queue, + SIGIO, POLL_IN); } } else { u->ring_overflow = 1; @@ -205,48 +206,143 @@ return rc; } +static void evtchn_bind_to_user(struct per_user_data *u, int port) +{ + spin_lock_irq(&port_user_lock); + BUG_ON(port_user[port] != NULL); + port_user[port] = u; + unmask_evtchn(port); + spin_unlock_irq(&port_user_lock); +} + static int evtchn_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - int rc = 0; + int rc; struct per_user_data *u = file->private_data; - - spin_lock_irq(&port_user_lock); + evtchn_op_t op = { 0 }; + + switch (cmd) { + case IOCTL_EVTCHN_BIND_VIRQ: { + struct ioctl_evtchn_bind_virq bind; + + rc = -EFAULT; + if (copy_from_user(&bind, (void *)arg, sizeof(bind))) + break; + + op.cmd = EVTCHNOP_bind_virq; + op.u.bind_virq.virq = bind.virq; + op.u.bind_virq.vcpu = 0; + rc = HYPERVISOR_event_channel_op(&op); + if (rc != 0) + break; + + rc = op.u.bind_virq.port; + evtchn_bind_to_user(u, rc); + break; + } + + case IOCTL_EVTCHN_BIND_INTERDOMAIN: { + struct ioctl_evtchn_bind_interdomain bind; + + rc = -EFAULT; + if (copy_from_user(&bind, (void *)arg, sizeof(bind))) + break; + + op.cmd = EVTCHNOP_bind_interdomain; + op.u.bind_interdomain.remote_dom = bind.remote_domain; + op.u.bind_interdomain.remote_port = bind.remote_port; + rc = HYPERVISOR_event_channel_op(&op); + if (rc != 0) + break; + + rc = op.u.bind_interdomain.local_port; + evtchn_bind_to_user(u, rc); + break; + } + + case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { + struct ioctl_evtchn_bind_unbound_port bind; + + rc = -EFAULT; + if (copy_from_user(&bind, (void *)arg, sizeof(bind))) + break; + + op.cmd = EVTCHNOP_alloc_unbound; + op.u.alloc_unbound.dom = DOMID_SELF; + op.u.alloc_unbound.remote_dom = bind.remote_domain; + rc = HYPERVISOR_event_channel_op(&op); + if (rc != 0) + break; + + rc = op.u.alloc_unbound.port; + evtchn_bind_to_user(u, rc); + break; + } + + case IOCTL_EVTCHN_UNBIND: { + struct ioctl_evtchn_unbind unbind; + + rc = -EFAULT; + if (copy_from_user(&unbind, (void *)arg, sizeof(unbind))) + break; + + rc = -EINVAL; + if (unbind.port >= NR_EVENT_CHANNELS) + break; + + spin_lock_irq(&port_user_lock); - switch (cmd) { - case EVTCHN_RESET: - /* Initialise the ring to empty. Clear errors. */ - u->ring_cons = u->ring_prod = u->ring_overflow = 0; - break; - - case EVTCHN_BIND: - if (arg >= NR_EVENT_CHANNELS) { + rc = -ENOTCONN; + if (port_user[unbind.port] != u) { + spin_unlock_irq(&port_user_lock); + break; + } + + port_user[unbind.port] = NULL; + mask_evtchn(unbind.port); + + spin_unlock_irq(&port_user_lock); + + op.cmd = EVTCHNOP_close; + op.u.close.port = unbind.port; + BUG_ON(HYPERVISOR_event_channel_op(&op)); + + rc = 0; + break; + } + + case IOCTL_EVTCHN_NOTIFY: { + struct ioctl_evtchn_notify notify; + + rc = -EFAULT; + if (copy_from_user(&notify, (void *)arg, sizeof(notify))) + break; + + if (notify.port >= NR_EVENT_CHANNELS) { rc = -EINVAL; - } else if (port_user[arg] != NULL) { - rc = -EISCONN; - } else { - port_user[arg] = u; - unmask_evtchn(arg); - } - break; - - case EVTCHN_UNBIND: - if (arg >= NR_EVENT_CHANNELS) { - rc = -EINVAL; - } else if (port_user[arg] != u) { + } else if (port_user[notify.port] != u) { rc = -ENOTCONN; } else { - port_user[arg] = NULL; - mask_evtchn(arg); - } - break; + notify_remote_via_evtchn(notify.port); + rc = 0; + } + break; + } + + case IOCTL_EVTCHN_RESET: { + /* Initialise the ring to empty. Clear errors. */ + spin_lock_irq(&port_user_lock); + u->ring_cons = u->ring_prod = u->ring_overflow = 0; + spin_unlock_irq(&port_user_lock); + rc = 0; + break; + } default: rc = -ENOSYS; break; } - - spin_unlock_irq(&port_user_lock); return rc; } @@ -295,6 +391,7 @@ { int i; struct per_user_data *u = filp->private_data; + evtchn_op_t op = { 0 }; spin_lock_irq(&port_user_lock); @@ -302,11 +399,15 @@ for (i = 0; i < NR_EVENT_CHANNELS; i++) { - if (port_user[i] == u) - { - port_user[i] = NULL; - mask_evtchn(i); - } + if (port_user[i] != u) + continue; + + port_user[i] = NULL; + mask_evtchn(i); + + op.cmd = EVTCHNOP_close; + op.u.close.port = i; + BUG_ON(HYPERVISOR_event_channel_op(&op)); } spin_unlock_irq(&port_user_lock); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Sat Oct 8 20:28:24 2005 @@ -177,8 +177,15 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref, unsigned long rx_ring_ref, unsigned int evtchn) { - evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; int err; + evtchn_op_t op = { + .cmd = EVTCHNOP_bind_interdomain, + .u.bind_interdomain.remote_dom = netif->domid, + .u.bind_interdomain.remote_port = evtchn }; + + /* Already connected through? */ + if (netif->irq) + return 0; netif->comms_area = alloc_vm_area(2*PAGE_SIZE); if (netif->comms_area == NULL) @@ -190,10 +197,6 @@ return err; } - op.u.bind_interdomain.dom1 = DOMID_SELF; - op.u.bind_interdomain.dom2 = netif->domid; - op.u.bind_interdomain.port1 = 0; - op.u.bind_interdomain.port2 = evtchn; err = HYPERVISOR_event_channel_op(&op); if (err) { unmap_frontend_pages(netif); @@ -201,7 +204,7 @@ return err; } - netif->evtchn = op.u.bind_interdomain.port1; + netif->evtchn = op.u.bind_interdomain.local_port; netif->irq = bind_evtchn_to_irqhandler( netif->evtchn, netif_be_int, 0, netif->dev->name, netif); @@ -228,13 +231,12 @@ { netif_t *netif = (netif_t *)arg; - /* - * This can't be done in netif_disconnect() because at that point - * there may be outstanding requests in the network stack whose - * asynchronous responses must still be notified to the remote driver. - */ - if (netif->irq) - unbind_evtchn_from_irqhandler(netif->irq, netif); + /* Already disconnected? */ + if (!netif->irq) + return; + + unbind_evtchn_from_irqhandler(netif->irq, netif); + netif->irq = 0; unregister_netdev(netif->dev); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sat Oct 8 20:28:24 2005 @@ -553,9 +553,9 @@ /* No crossing a page as the payload mustn't fragment. */ if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { - DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n", - txreq.addr, txreq.size, - (txreq.addr &~PAGE_MASK) + txreq.size); + DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", + txreq.offset, txreq.size, + (txreq.offset &~PAGE_MASK) + txreq.size); make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); netif_put(netif); continue; diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Sat Oct 8 20:28:24 2005 @@ -57,7 +57,8 @@ } /* Front end tells us frame. */ -static void frontend_changed(struct xenbus_watch *watch, const char *node) +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) { unsigned long tx_ring_ref, rx_ring_ref; unsigned int evtchn; @@ -68,7 +69,7 @@ int i; /* If other end is gone, delete ourself. */ - if (node && !xenbus_exists(be->frontpath, "")) { + if (vec && !xenbus_exists(be->frontpath, "")) { xenbus_rm(be->dev->nodename, ""); device_unregister(&be->dev->dev); return; @@ -126,7 +127,8 @@ We provide event channel and device details to front end. Frontend supplies shared frame and event channel. */ -static void backend_changed(struct xenbus_watch *watch, const char *node) +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) { int err; long int handle; @@ -163,7 +165,7 @@ kobject_hotplug(&dev->dev.kobj, KOBJ_ONLINE); /* Pass in NULL node to skip exist test. */ - frontend_changed(&be->watch, NULL); + frontend_changed(&be->watch, NULL, 0); } } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Sat Oct 8 20:28:24 2005 @@ -87,11 +87,6 @@ #define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */ #endif - -#define NETIF_STATE_DISCONNECTED 0 -#define NETIF_STATE_CONNECTED 1 - -static unsigned int netif_state = NETIF_STATE_DISCONNECTED; static void network_tx_buf_gc(struct net_device *dev); static void network_alloc_rx_buffers(struct net_device *dev); @@ -858,7 +853,7 @@ np->user_state = UST_CLOSED; np->handle = handle; np->xbdev = dev; - + spin_lock_init(&np->tx_lock); spin_lock_init(&np->rx_lock); @@ -902,7 +897,9 @@ netdev->features = NETIF_F_IP_CSUM; SET_ETHTOOL_OPS(netdev, &network_ethtool_ops); - + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &dev->dev); + if ((err = register_netdev(netdev)) != 0) { printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err); @@ -966,14 +963,18 @@ { "" } }; -static void watch_for_status(struct xenbus_watch *watch, const char *node) +static void watch_for_status(struct xenbus_watch *watch, + const char **vec, unsigned int len) { } static int setup_device(struct xenbus_device *dev, struct netfront_info *info) { - evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound }; int err; + evtchn_op_t op = { + .cmd = EVTCHNOP_alloc_unbound, + .u.alloc_unbound.dom = DOMID_SELF, + .u.alloc_unbound.remote_dom = info->backend_id }; info->tx_ring_ref = GRANT_INVALID_REF; info->rx_ring_ref = GRANT_INVALID_REF; @@ -1010,13 +1011,14 @@ } info->rx_ring_ref = err; - op.u.alloc_unbound.dom = info->backend_id; err = HYPERVISOR_event_channel_op(&op); if (err) { xenbus_dev_error(dev, err, "allocating event channel"); goto out; } + connect_device(info, op.u.alloc_unbound.port); + return 0; out: @@ -1172,8 +1174,6 @@ info->backend = backend; - netif_state = NETIF_STATE_CONNECTED; - return 0; abort_transaction: @@ -1272,30 +1272,7 @@ static void __init init_net_xenbus(void) { - xenbus_register_device(&netfront); -} - -static int wait_for_netif(void) -{ - int err = 0; - int i; - - /* - * We should figure out how many and which devices we need to - * proceed and only wait for those. For now, continue once the - * first device is around. - */ - for ( i=0; netif_state != NETIF_STATE_CONNECTED && (i < 10*HZ); i++ ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - if (netif_state != NETIF_STATE_CONNECTED) { - WPRINTK("Timeout connecting to device!\n"); - err = -ENOSYS; - } - return err; + xenbus_register_driver(&netfront); } static int __init netif_init(void) @@ -1313,8 +1290,6 @@ (void)register_inetaddr_notifier(&notifier_inetdev); init_net_xenbus(); - - wait_for_netif(); return err; } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Sat Oct 8 20:28:24 2005 @@ -67,7 +67,7 @@ tpmif_get(tpmif); return tpmif; } else { - return NULL; + return ERR_PTR(-EEXIST); } } } @@ -117,8 +117,11 @@ int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn) { - evtchn_op_t op = {.cmd = EVTCHNOP_bind_interdomain }; int err; + evtchn_op_t op = { + .cmd = EVTCHNOP_bind_interdomain, + .u.bind_interdomain.remote_dom = tpmif->domid, + .u.bind_interdomain.remote_port = evtchn }; if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL) return -ENOMEM; @@ -129,10 +132,6 @@ return err; } - op.u.bind_interdomain.dom1 = DOMID_SELF; - op.u.bind_interdomain.dom2 = tpmif->domid; - op.u.bind_interdomain.port1 = 0; - op.u.bind_interdomain.port2 = evtchn; err = HYPERVISOR_event_channel_op(&op); if (err) { unmap_frontend_page(tpmif); @@ -140,7 +139,7 @@ return err; } - tpmif->evtchn = op.u.bind_interdomain.port1; + tpmif->evtchn = op.u.bind_interdomain.local_port; tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr; diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Sat Oct 8 20:28:24 2005 @@ -22,6 +22,7 @@ #include <asm-xen/xen-public/grant_table.h> +/* local data structures */ struct data_exchange { struct list_head pending_pak; struct list_head current_pak; @@ -45,7 +46,7 @@ enum { PACKET_FLAG_DISCARD_RESPONSE = 1, - PACKET_FLAG_SEND_CONTROLMESSAGE = 2, + PACKET_FLAG_CHECK_RESPONSESTATUS = 2, }; static struct data_exchange dataex; @@ -66,9 +67,26 @@ #define MAX_PENDING_REQS TPMIF_TX_RING_SIZE -static multicall_entry_t tx_mcl[MAX_PENDING_REQS]; - #define MIN(x,y) (x) < (y) ? (x) : (y) + + +/*************************************************************** + Buffer copying +***************************************************************/ +static inline int +copy_from_buffer(void *to, + const void *from, + unsigned long size, + int userbuffer) +{ + if (userbuffer) { + if (copy_from_user(to, from, size)) + return -EFAULT; + } else { + memcpy(to, from, size); + } + return 0; +} /*************************************************************** Packet-related functions @@ -188,15 +206,25 @@ DPRINTK("Supposed to send %d bytes to front-end!\n", size); - if (0 != (pak->flags & PACKET_FLAG_SEND_CONTROLMESSAGE)) { + if (0 != (pak->flags & PACKET_FLAG_CHECK_RESPONSESTATUS)) { #ifdef CONFIG_XEN_TPMDEV_CLOSE_IF_VTPM_FAILS u32 res; - memcpy(&res, &data[2+4], sizeof(res)); + if (copy_from_buffer(&res, + &data[2+4], + sizeof(res), + userbuffer)) { + return -EFAULT; + } + if (res != 0) { /* - * Will close down this device and have the + * Close down this device. Should have the * FE notified about closure. */ + if (!pak->tpmif) { + return -EFAULT; + } + pak->tpmif->status = DISCONNECTING; } #endif } @@ -226,16 +254,15 @@ int rc = 0; unsigned int i = 0; unsigned int offset = 0; - multicall_entry_t *mcl; - - if (tpmif == NULL) + + if (tpmif == NULL) { return -EFAULT; - - if (tpmif->status != CONNECTED) { + } + + if (tpmif->status == DISCONNECTED) { return size; } - mcl = tx_mcl; while (offset < size && i < TPMIF_TX_RING_SIZE) { unsigned int tocopy; struct gnttab_map_grant_ref map_op; @@ -272,22 +299,15 @@ PAGE_SHIFT] = FOREIGN_FRAME(map_op.dev_bus_addr >> PAGE_SHIFT); - tocopy = size - offset; - if (tocopy > PAGE_SIZE) { - tocopy = PAGE_SIZE; - } - if (userbuffer) { - if (copy_from_user((void *)(MMAP_VADDR(tpmif,i) | - (tx->addr & ~PAGE_MASK)), - (void __user *)&data[offset], - tocopy)) { - tpmif_put(tpmif); - return -EFAULT; - } - } else { - memcpy((void *)(MMAP_VADDR(tpmif,i) | - (tx->addr & ~PAGE_MASK)), - &data[offset], tocopy); + tocopy = MIN(size - offset, PAGE_SIZE); + + if (copy_from_buffer((void *)(MMAP_VADDR(tpmif,i)| + (tx->addr & ~PAGE_MASK)), + &data[offset], + tocopy, + userbuffer)) { + tpmif_put(tpmif); + return -EFAULT; } tx->size = tocopy; @@ -306,8 +326,8 @@ } rc = offset; - DPRINTK("Notifying frontend via event channel %d\n", - tpmif->evtchn); + DPRINTK("Notifying frontend via irq %d\n", + tpmif->irq); notify_remote_via_irq(tpmif->irq); return rc; @@ -705,9 +725,13 @@ int tpmif_vtpm_open(tpmif_t *tpmif, domid_t domid, u32 instance) { int rc = 0; - struct packet *pak = packet_alloc(tpmif, sizeof(create_cmd), create_cmd[0], - PACKET_FLAG_DISCARD_RESPONSE| - PACKET_FLAG_SEND_CONTROLMESSAGE); + struct packet *pak; + + pak = packet_alloc(tpmif, + sizeof(create_cmd), + create_cmd[0], + PACKET_FLAG_DISCARD_RESPONSE| + PACKET_FLAG_CHECK_RESPONSESTATUS); if (pak) { u8 buf[sizeof(create_cmd)]; u32 domid_no = htonl((u32)domid); @@ -742,8 +766,7 @@ pak = packet_alloc(NULL, sizeof(create_cmd), create_cmd[0], - PACKET_FLAG_DISCARD_RESPONSE| - PACKET_FLAG_SEND_CONTROLMESSAGE); + PACKET_FLAG_DISCARD_RESPONSE); if (pak) { u8 buf[sizeof(destroy_cmd)]; u32 instid_no = htonl(instid); @@ -896,7 +919,8 @@ */ if (size < 10 || be32_to_cpu(*native_size) != size || - 0 == dataex.has_opener) { + 0 == dataex.has_opener || + tpmif->status != CONNECTED) { rc = -EINVAL; goto failexit; } else { diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sat Oct 8 20:28:24 2005 @@ -59,7 +59,8 @@ } -static void frontend_changed(struct xenbus_watch *watch, const char *node) +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) { unsigned long ringref; unsigned int evtchn; @@ -69,7 +70,7 @@ = container_of(watch, struct backend_info, watch); /* If other end is gone, delete ourself. */ - if (node && !xenbus_exists(be->frontpath, "")) { + if (vec && !xenbus_exists(be->frontpath, "")) { xenbus_rm(be->dev->nodename, ""); device_unregister(&be->dev->dev); return; @@ -142,7 +143,8 @@ } -static void backend_changed(struct xenbus_watch *watch, const char *node) +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) { int err; long int instance; @@ -166,6 +168,9 @@ be->instance = instance; if (be->tpmif == NULL) { + unsigned int len = max(XS_WATCH_PATH, XS_WATCH_TOKEN) + 1; + const char *vec[len]; + be->tpmif = tpmif_find(be->frontend_id, instance); if (IS_ERR(be->tpmif)) { @@ -175,8 +180,11 @@ return; } + vec[XS_WATCH_PATH] = be->frontpath; + vec[XS_WATCH_TOKEN] = NULL; + /* Pass in NULL node to skip exist test. */ - frontend_changed(&be->watch, be->frontpath); + frontend_changed(&be->watch, vec, len); } } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Sat Oct 8 20:28:24 2005 @@ -211,12 +211,14 @@ XENBUS support code **************************************************************/ -static void watch_for_status(struct xenbus_watch *watch, const char *node) +static void watch_for_status(struct xenbus_watch *watch, + const char **vec, unsigned int len) { struct tpmfront_info *info; int err; unsigned long ready; struct tpm_private *tp = &my_private; + const char *node = vec[XS_WATCH_PATH]; info = container_of(watch, struct tpmfront_info, watch); node += strlen(watch->node); @@ -244,9 +246,11 @@ { tpmif_tx_interface_t *sring; struct tpm_private *tp = &my_private; - - evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound }; int err; + evtchn_op_t op = { + .cmd = EVTCHNOP_alloc_unbound, + .u.alloc_unbound.dom = DOMID_SELF, + .u.alloc_unbound.remote_dom = backend_id } ; sring = (void *)__get_free_page(GFP_KERNEL); if (!sring) { @@ -269,7 +273,6 @@ } info->ring_ref = err; - op.u.alloc_unbound.dom = backend_id; err = HYPERVISOR_event_channel_op(&op); if (err) { gnttab_end_foreign_access(info->ring_ref, 0); @@ -278,7 +281,9 @@ xenbus_dev_error(dev, err, "allocating event channel"); return err; } + tpmif_connect(op.u.alloc_unbound.port, backend_id); + return 0; } @@ -293,9 +298,9 @@ tp->tx = NULL; } - if (tpm->irq) + if (tp->irq) unbind_evtchn_from_irqhandler(tp->irq, NULL); - tp->evtchn = tpm->irq = 0; + tp->evtchn = tp->irq = 0; } @@ -439,26 +444,32 @@ return 0; } -static int tpmfront_suspend(struct xenbus_device *dev) +static int +tpmfront_suspend(struct xenbus_device *dev) { struct tpmfront_info *info = dev->data; struct tpm_private *tp = &my_private; - - /* lock so no app can send */ + u32 ctr = 0; + + /* lock, so no app can send */ down(&suspend_lock); - while (atomic_read(&tp->tx_busy)) { - printk("---- TPMIF: Outstanding request.\n"); -#if 0 + while (atomic_read(&tp->tx_busy) && ctr <= 25) { + if ((ctr % 10) == 0) + printk("INFO: Waiting for outstanding request.\n"); /* - * Would like to wait until the outstanding request - * has come back, but this does not work properly, yet. + * Wait for a request to be responded to. */ - interruptible_sleep_on_timeout(&tp->wait_q, - 100); -#else - break; -#endif + interruptible_sleep_on_timeout(&tp->wait_q, 100); + ctr++; + } + + if (atomic_read(&tp->tx_busy)) { + /* + * A temporary work-around. + */ + printk("WARNING: Resetting busy flag."); + atomic_set(&tp->tx_busy, 0); } unregister_xenbus_watch(&info->watch); @@ -466,44 +477,34 @@ kfree(info->backend); info->backend = NULL; - destroy_tpmring(info, tp); - return 0; } -static int tpmif_recover(void) -{ - return 0; -} - -static int tpmfront_resume(struct xenbus_device *dev) +static int +tpmfront_resume(struct xenbus_device *dev) { struct tpmfront_info *info = dev->data; - int err; - - err = talk_to_backend(dev, info); - if (!err) { - tpmif_recover(); - } - - /* unlock so apps can resume */ + int err = talk_to_backend(dev, info); + + /* unlock, so apps can resume sending */ up(&suspend_lock); return err; } -static void tpmif_connect(u16 evtchn, domid_t domid) +static void +tpmif_connect(u16 evtchn, domid_t domid) { int err = 0; struct tpm_private *tp = &my_private; tp->evtchn = evtchn; - tp->backend_id = domid; - - err = bind_evtchn_to_irqhandler( - tp->evtchn, - tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); - if ( err <= 0 ) { + tp->backend_id = domid; + + err = bind_evtchn_to_irqhandler(tp->evtchn, + tpmif_int, SA_SAMPLE_RANDOM, "tpmif", + tp); + if (err <= 0) { WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); return; } @@ -528,7 +529,7 @@ static void __init init_tpm_xenbus(void) { - xenbus_register_device(&tpmfront); + xenbus_register_driver(&tpmfront); } @@ -638,7 +639,7 @@ if (NULL == txb) { DPRINTK("txb (i=%d) is NULL. buffers initilized?\n", i); - DPRINTK("Not transmittin anything!\n"); + DPRINTK("Not transmitting anything!\n"); spin_unlock_irq(&tp->tx_lock); return -EFAULT; } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Sat Oct 8 20:28:24 2005 @@ -147,7 +147,7 @@ data += avail; len -= avail; update_output_chunk(out, avail); - notify_remote_via_irq(xenbus_irq); + notify_remote_via_evtchn(xen_start_info->store_evtchn); } while (len != 0); return 0; @@ -192,7 +192,7 @@ pr_debug("Finished read of %i bytes (%i to go)\n", avail, len); /* If it was full, tell them we've taken some. */ if (was_full) - notify_remote_via_irq(xenbus_irq); + notify_remote_via_evtchn(xen_start_info->store_evtchn); } /* If we left something, wake watch thread to deal with it. */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Sat Oct 8 20:28:24 2005 @@ -44,7 +44,6 @@ #include <asm-xen/xenbus.h> #include <asm-xen/xen_proc.h> #include <asm/hypervisor.h> -#include <asm-xen/linux-public/xenstored.h> struct xenbus_dev_data { /* Are there bytes left to be read in this message? */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Sat Oct 8 20:28:24 2005 @@ -184,7 +184,7 @@ return 0; } -static int xenbus_probe_backend(const char *type, const char *uuid); +static int xenbus_probe_backend(const char *type, const char *domid); static struct xen_bus_type xenbus_backend = { .root = "backend", .levels = 3, /* backend/type/<frontend>/<id> */ @@ -226,8 +226,8 @@ return drv->remove(dev); } -static int xenbus_register_driver(struct xenbus_driver *drv, - struct xen_bus_type *bus) +static int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus) { int err; @@ -243,15 +243,15 @@ return err; } -int xenbus_register_device(struct xenbus_driver *drv) -{ - return xenbus_register_driver(drv, &xenbus_frontend); -} -EXPORT_SYMBOL(xenbus_register_device); +int xenbus_register_driver(struct xenbus_driver *drv) +{ + return xenbus_register_driver_common(drv, &xenbus_frontend); +} +EXPORT_SYMBOL(xenbus_register_driver); int xenbus_register_backend(struct xenbus_driver *drv) { - return xenbus_register_driver(drv, &xenbus_backend); + return xenbus_register_driver_common(drv, &xenbus_backend); } void xenbus_unregister_driver(struct xenbus_driver *drv) @@ -260,6 +260,7 @@ driver_unregister(&drv->driver); up(&xenbus_lock); } +EXPORT_SYMBOL(xenbus_unregister_driver); struct xb_find_info { @@ -347,6 +348,18 @@ return p; } +static ssize_t xendev_show_nodename(struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); +} +DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); + +static ssize_t xendev_show_devtype(struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); +} +DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); + static int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename) @@ -383,6 +396,9 @@ printk("XENBUS: Registering %s device %s: error %i\n", bus->bus.name, xendev->dev.bus_id, err); kfree(xendev); + } else { + device_create_file(&xendev->dev, &dev_attr_nodename); + device_create_file(&xendev->dev, &dev_attr_devtype); } return err; } @@ -419,15 +435,15 @@ return err; } -/* backend/<typename>/<frontend-uuid> */ -static int xenbus_probe_backend(const char *type, const char *uuid) +/* backend/<typename>/<frontend-domid> */ +static int xenbus_probe_backend(const char *type, const char *domid) { char *nodename; int err = 0; char **dir; unsigned int i, dir_n = 0; - nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, uuid); + nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid); if (!nodename) return -ENOMEM; @@ -546,14 +562,16 @@ kfree(root); } -static void frontend_changed(struct xenbus_watch *watch, const char *node) -{ - dev_changed(node, &xenbus_frontend); -} - -static void backend_changed(struct xenbus_watch *watch, const char *node) -{ - dev_changed(node, &xenbus_backend); +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); +} + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); } /* We watch for devices appearing and vanishing. */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Sat Oct 8 20:28:24 2005 @@ -38,7 +38,6 @@ #include <linux/fcntl.h> #include <linux/kthread.h> #include <asm-xen/xenbus.h> -#include <asm-xen/linux-public/xenstored.h> #include "xenbus_comms.h" #define streq(a, b) (strcmp((a), (b)) == 0) @@ -200,14 +199,9 @@ return buffer; } -char **xenbus_directory(const char *dir, const char *node, unsigned int *num) -{ - char *strings, *p, **ret; - unsigned int len; - - strings = xs_single(XS_DIRECTORY, join(dir, node), &len); - if (IS_ERR(strings)) - return (char **)strings; +static char **split(char *strings, unsigned int len, unsigned int *num) +{ + char *p, **ret; /* Count the strings. */ *num = count_strings(strings, len); @@ -224,7 +218,20 @@ strings = (char *)&ret[*num]; for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) ret[(*num)++] = p; + return ret; +} + +char **xenbus_directory(const char *dir, const char *node, unsigned int *num) +{ + char *strings; + unsigned int len; + + strings = xs_single(XS_DIRECTORY, join(dir, node), &len); + if (IS_ERR(strings)) + return (char **)strings; + + return split(strings, len, num); } EXPORT_SYMBOL(xenbus_directory); @@ -425,18 +432,19 @@ return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL)); } -static char *xs_read_watch(char **token) +static char **xs_read_watch(unsigned int *num) { enum xsd_sockmsg_type type; - char *ret; - - ret = read_reply(&type, NULL); - if (IS_ERR(ret)) - return ret; + char *strings; + unsigned int len; + + strings = read_reply(&type, &len); + if (IS_ERR(strings)) + return (char **)strings; BUG_ON(type != XS_WATCH_EVENT); - *token = ret + strlen(ret) + 1; - return ret; + + return split(strings, len, num); } static int xs_acknowledge_watch(const char *token) @@ -519,8 +527,8 @@ static int watch_thread(void *unused) { for (;;) { - char *token; - char *node = NULL; + char **vec = NULL; + unsigned int num; wait_event(xb_waitq, xs_input_avail()); @@ -530,23 +538,23 @@ */ down(&xenbus_lock); if (xs_input_avail()) - node = xs_read_watch(&token); - - if (node && !IS_ERR(node)) { + vec = xs_read_watch(&num); + + if (vec && !IS_ERR(vec)) { struct xenbus_watch *w; int err; - err = xs_acknowledge_watch(token); + err = xs_acknowledge_watch(vec[XS_WATCH_TOKEN]); if (err) printk(KERN_WARNING "XENBUS ack %s fail %i\n", - node, err); - w = find_watch(token); + vec[XS_WATCH_TOKEN], err); + w = find_watch(vec[XS_WATCH_TOKEN]); BUG_ON(!w); - w->callback(w, node); - kfree(node); - } else if (node) + w->callback(w, (const char **)vec, num); + kfree(vec); + } else if (vec) printk(KERN_WARNING "XENBUS xs_read_watch: %li\n", - PTR_ERR(node)); + PTR_ERR(vec)); up(&xenbus_lock); } } diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Sat Oct 8 20:28:24 2005 @@ -31,6 +31,7 @@ #define __HYPERCALL_H__ #include <asm-xen/xen-public/xen.h> +#include <asm-xen/xen-public/sched.h> #define _hypercall0(type, name) \ ({ \ @@ -160,41 +161,10 @@ } static inline int -HYPERVISOR_yield( - void) -{ - return _hypercall2(int, sched_op, SCHEDOP_yield, 0); -} - -static inline int -HYPERVISOR_block( - void) -{ - return _hypercall2(int, sched_op, SCHEDOP_block, 0); -} - -static inline int -HYPERVISOR_shutdown( - void) -{ - return _hypercall2(int, sched_op, SCHEDOP_shutdown | - (SHUTDOWN_poweroff << SCHEDOP_reasonshift), 0); -} - -static inline int -HYPERVISOR_reboot( - void) -{ - return _hypercall2(int, sched_op, SCHEDOP_shutdown | - (SHUTDOWN_reboot << SCHEDOP_reasonshift), 0); -} - -static inline int -HYPERVISOR_crash( - void) -{ - return _hypercall2(int, sched_op, SCHEDOP_shutdown | - (SHUTDOWN_crash << SCHEDOP_reasonshift), 0); +HYPERVISOR_sched_op( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op, cmd, arg); } static inline long @@ -316,63 +286,18 @@ } static inline int -HYPERVISOR_boot_vcpu( - unsigned long vcpu, vcpu_guest_context_t *ctxt) -{ - return _hypercall2(int, boot_vcpu, vcpu, ctxt); -} - -static inline int -HYPERVISOR_vcpu_up( - int vcpu) -{ - return _hypercall2(int, sched_op, SCHEDOP_vcpu_up | - (vcpu << SCHEDOP_vcpushift), 0); -} - -static inline int -HYPERVISOR_vcpu_pickle( - int vcpu, vcpu_guest_context_t *ctxt) -{ - return _hypercall2(int, sched_op, SCHEDOP_vcpu_pickle | - (vcpu << SCHEDOP_vcpushift), ctxt); +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); } static inline int HYPERVISOR_suspend( unsigned long srec) { - int ret; - unsigned long ign1, ign2; - - /* On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=S" (ign2) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_suspend << - SCHEDOP_reasonshift)), - "2" (srec) : "memory", "ecx"); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_down( - int vcpu) -{ - int ret; - unsigned long ign1; - /* Yes, I really do want to clobber edx here: when we resume a - vcpu after unpickling a multi-processor domain, it returns - here, but clobbers all of the call clobbered registers. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift)) - : "memory", "ecx", "edx" ); - return ret; + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); } #endif /* __HYPERCALL_H__ */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Sat Oct 8 20:28:24 2005 @@ -412,17 +412,6 @@ ptep_set_access_flags(__vma, __address, __ptep, __entry, 1); \ } while (0) -#define __HAVE_ARCH_PTEP_ESTABLISH_NEW -#define ptep_establish_new(__vma, __address, __ptep, __entry) \ -do { \ - if (likely((__vma)->vm_mm == current->mm)) { \ - BUG_ON(HYPERVISOR_update_va_mapping((__address), \ - __entry, 0)); \ - } else { \ - xen_l1_entry_update((__ptep), (__entry)); \ - } \ -} while (0) - #ifndef CONFIG_XEN_SHADOW_MODE void make_lowmem_page_readonly(void *va); void make_lowmem_page_writable(void *va); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Sat Oct 8 20:28:24 2005 @@ -29,7 +29,9 @@ #ifndef __HYPERCALL_H__ #define __HYPERCALL_H__ + #include <asm-xen/xen-public/xen.h> +#include <asm-xen/xen-public/sched.h> /* FIXME: temp place to hold these page related macros */ #include <asm/page.h> @@ -184,77 +186,9 @@ } static inline int -HYPERVISOR_yield( - void) -{ -#if 0 - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield) - : "memory" ); - - return ret; -#endif - return 1; -} - -static inline int -HYPERVISOR_block( - void) -{ -#if 0 - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block) - : "memory" ); - - return ret; -#endif - return 1; -} - -static inline int -HYPERVISOR_shutdown( - void) -{ -#if 0 - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift)) - : "memory" ); - - return ret; -#endif - return 1; -} - -static inline int -HYPERVISOR_reboot( - void) -{ -#if 0 - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift)) - : "memory" ); - - return ret; -#endif +HYPERVISOR_sched_op( + int cmd, unsigned long arg) +{ return 1; } @@ -262,39 +196,6 @@ HYPERVISOR_suspend( unsigned long srec) { -#if 0 - int ret; - unsigned long ign1, ign2; - - /* NB. On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=S" (ign2) - : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), - "S" (srec) : "memory"); - - return ret; -#endif - return 1; -} - -static inline int -HYPERVISOR_crash( - void) -{ -#if 0 - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift)) - : "memory" ); - - return ret; -#endif return 1; } @@ -601,24 +502,6 @@ return 1; } -static inline int -HYPERVISOR_boot_vcpu( - unsigned long vcpu, vcpu_guest_context_t *ctxt) -{ -#if 0 - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt) - : "memory"); - - return ret; -#endif - return 1; -} #endif #endif /* __HYPERCALL_H__ */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Sat Oct 8 20:28:24 2005 @@ -35,6 +35,7 @@ #define __HYPERCALL_H__ #include <asm-xen/xen-public/xen.h> +#include <asm-xen/xen-public/sched.h> #define __syscall_clobber "r11","rcx","memory" @@ -165,33 +166,10 @@ } static inline int -HYPERVISOR_yield( - void) -{ - return _hypercall2(int, sched_op, SCHEDOP_yield, 0); -} - -static inline int -HYPERVISOR_block( - void) -{ - return _hypercall2(int, sched_op, SCHEDOP_block, 0); -} - -static inline int -HYPERVISOR_shutdown( - void) -{ - return _hypercall2(int, sched_op, SCHEDOP_shutdown | - (SHUTDOWN_poweroff << SCHEDOP_reasonshift), 0); -} - -static inline int -HYPERVISOR_reboot( - void) -{ - return _hypercall2(int, sched_op, SCHEDOP_shutdown | - (SHUTDOWN_reboot << SCHEDOP_reasonshift), 0); +HYPERVISOR_sched_op( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op, cmd, arg); } static inline long @@ -302,26 +280,10 @@ } static inline int -HYPERVISOR_boot_vcpu( - unsigned long vcpu, vcpu_guest_context_t *ctxt) -{ - return _hypercall2(int, boot_vcpu, vcpu, ctxt); -} - -static inline int -HYPERVISOR_vcpu_up( - int vcpu) -{ - return _hypercall2(int, sched_op, SCHEDOP_vcpu_up | - (vcpu << SCHEDOP_vcpushift), 0); -} - -static inline int -HYPERVISOR_vcpu_pickle( - int vcpu, vcpu_guest_context_t *ctxt) -{ - return _hypercall2(int, sched_op, SCHEDOP_vcpu_pickle | - (vcpu << SCHEDOP_vcpushift), ctxt); +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); } static inline int @@ -341,8 +303,8 @@ HYPERVISOR_suspend( unsigned long srec) { - return _hypercall2(int, sched_op, SCHEDOP_shutdown | - (SHUTDOWN_suspend << SCHEDOP_reasonshift), srec); + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); } #endif /* __HYPERCALL_H__ */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/include/asm-xen/evtchn.h --- a/linux-2.6-xen-sparse/include/asm-xen/evtchn.h Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/evtchn.h Sat Oct 8 20:28:24 2005 @@ -50,14 +50,6 @@ /* Dynamically bind an IPI source to Linux IRQ space. */ extern int bind_ipi_to_irq(int ipi); extern void unbind_ipi_from_irq(int ipi); - -/* - * Dynamically bind an event-channel port to Linux IRQ space. - * BIND: Returns IRQ or error. - * UNBIND: Takes IRQ to unbind from; automatically closes the event channel. - */ -extern int bind_evtchn_to_irq(unsigned int evtchn); -extern void unbind_evtchn_from_irq(unsigned int irq); /* * Dynamically bind an event-channel port to an IRQ-like callback handler. @@ -124,25 +116,10 @@ static inline void notify_remote_via_evtchn(int port) { evtchn_op_t op; - op.cmd = EVTCHNOP_send; - op.u.send.local_port = port; + op.cmd = EVTCHNOP_send, + op.u.send.port = port; (void)HYPERVISOR_event_channel_op(&op); } - -/* - * CHARACTER-DEVICE DEFINITIONS - */ - -/* /dev/xen/evtchn resides at device number major=10, minor=201 */ -#define EVTCHN_MINOR 201 - -/* /dev/xen/evtchn ioctls: */ -/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ -#define EVTCHN_RESET _IO('E', 1) -/* EVTCHN_BIND: Bind to teh specified event-channel port. */ -#define EVTCHN_BIND _IO('E', 2) -/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ -#define EVTCHN_UNBIND _IO('E', 3) #endif /* __ASM_EVTCHN_H__ */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h --- a/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h Sat Oct 8 20:28:24 2005 @@ -3,7 +3,7 @@ * * Interface to /proc/xen/privcmd. * - * Copyright (c) 2003-2004, K A Fraser + * Copyright (c) 2003-2005, K A Fraser * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: @@ -27,39 +27,39 @@ * IN THE SOFTWARE. */ -#ifndef __PRIVCMD_H__ -#define __PRIVCMD_H__ +#ifndef __LINUX_PUBLIC_PRIVCMD_H__ +#define __LINUX_PUBLIC_PRIVCMD_H__ typedef struct privcmd_hypercall { - unsigned long op; - unsigned long arg[5]; + unsigned long op; + unsigned long arg[5]; } privcmd_hypercall_t; typedef struct privcmd_mmap_entry { - unsigned long va; - unsigned long mfn; - unsigned long npages; + unsigned long va; + unsigned long mfn; + unsigned long npages; } privcmd_mmap_entry_t; typedef struct privcmd_mmap { - int num; - domid_t dom; /* target domain */ - privcmd_mmap_entry_t *entry; + int num; + domid_t dom; /* target domain */ + privcmd_mmap_entry_t *entry; } privcmd_mmap_t; typedef struct privcmd_mmapbatch { - int num; /* number of pages to populate */ - domid_t dom; /* target domain */ - unsigned long addr; /* virtual address */ - unsigned long *arr; /* array of mfns - top nibble set on err */ + int num; /* number of pages to populate */ + domid_t dom; /* target domain */ + unsigned long addr; /* virtual address */ + unsigned long *arr; /* array of mfns - top nibble set on err */ } privcmd_mmapbatch_t; typedef struct privcmd_blkmsg { - unsigned long op; - void *buf; - int buf_size; + unsigned long op; + void *buf; + int buf_size; } privcmd_blkmsg_t; /* @@ -67,16 +67,26 @@ * @arg: &privcmd_hypercall_t * Return: Value returned from execution of the specified hypercall. */ -#define IOCTL_PRIVCMD_HYPERCALL \ - _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t)) +#define IOCTL_PRIVCMD_HYPERCALL \ + _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t)) -#define IOCTL_PRIVCMD_MMAP \ - _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t)) -#define IOCTL_PRIVCMD_MMAPBATCH \ - _IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t)) -#define IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN \ - _IOC(_IOC_READ, 'P', 4, sizeof(unsigned long)) -#define IOCTL_PRIVCMD_INITDOMAIN_STORE \ - _IOC(_IOC_READ, 'P', 5, 0) +#define IOCTL_PRIVCMD_MMAP \ + _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t)) +#define IOCTL_PRIVCMD_MMAPBATCH \ + _IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t)) +#define IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN \ + _IOC(_IOC_READ, 'P', 4, sizeof(unsigned long)) +#define IOCTL_PRIVCMD_INITDOMAIN_STORE \ + _IOC(_IOC_READ, 'P', 5, 0) -#endif /* __PRIVCMD_H__ */ +#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/include/asm-xen/xenbus.h --- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Sat Oct 8 20:28:24 2005 @@ -33,6 +33,7 @@ #include <linux/device.h> #include <linux/notifier.h> #include <asm/semaphore.h> +#include <asm-xen/xen-public/io/xs_wire.h> /* A xenbus device. */ struct xenbus_device { @@ -73,7 +74,7 @@ return container_of(drv, struct xenbus_driver, driver); } -int xenbus_register_device(struct xenbus_driver *drv); +int xenbus_register_driver(struct xenbus_driver *drv); int xenbus_register_backend(struct xenbus_driver *drv); void xenbus_unregister_driver(struct xenbus_driver *drv); @@ -113,7 +114,8 @@ { struct list_head list; char *node; - void (*callback)(struct xenbus_watch *, const char *node); + void (*callback)(struct xenbus_watch *, + const char **vec, unsigned int len); }; /* notifer routines for when the xenstore comes up */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/mkbuildtree --- a/linux-2.6-xen-sparse/mkbuildtree Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/mkbuildtree Sat Oct 8 20:28:24 2005 @@ -113,9 +113,6 @@ cd ${AD}/include/asm-xen/xen-public relative_lndir ../../../${RS}/../xen/include/public -cd ${AD}/include/asm-xen/linux-public -ln -sf ../../../${RS}/../tools/xenstore/xenstored.h - # Arch-specific post-processing cd ${AD} if [ -x arch/${LINUX_ARCH}/xen-mkbuildtree-post ]; then diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/Makefile --- a/tools/Makefile Sat Oct 8 17:37:45 2005 +++ b/tools/Makefile Sat Oct 8 20:28:24 2005 @@ -22,7 +22,7 @@ # These don't cross-compile ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH)) SUBDIRS += python -#SUBDIRS += pygrub +SUBDIRS += pygrub endif .PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/blktap/xenbus.c --- a/tools/blktap/xenbus.c Sat Oct 8 17:37:45 2005 +++ b/tools/blktap/xenbus.c Sat Oct 8 20:28:24 2005 @@ -116,25 +116,25 @@ /* This assumes that the domain name we are looking for is unique! */ -char *get_dom_uuid(struct xs_handle *h, const char *name) -{ - char **e, *val, *uuid = NULL; +char *get_dom_domid(struct xs_handle *h, const char *name) +{ + char **e, *val, *domid = NULL; int num, i, len; char *path; - e = xs_directory(h, "/domain", &num); + e = xs_directory(h, "/local/domain", &num); i=0; while (i < num) { - asprintf(&path, "/domain/%s/name", e[i]); + asprintf(&path, "/local/domain/%s/name", e[i]); val = xs_read(h, path, &len); free(path); if (val == NULL) continue; if (strcmp(val, name) == 0) { /* match! */ - asprintf(&path, "/domain/%s/uuid", e[i]); - uuid = xs_read(h, path, &len); + asprintf(&path, "/local/domain/%s/domid", e[i]); + domid = xs_read(h, path, &len); free(val); free(path); break; @@ -144,7 +144,7 @@ } free(e); - return uuid; + return domid; } static int strsep_len(const char *str, char c, unsigned int len) @@ -251,13 +251,14 @@ char *node = NULL; struct xenbus_watch *w; int er; - - res = xs_read_watch(h); + unsigned int num; + + res = xs_read_watch(h, &num); if (res == NULL) return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */ - node = res[0]; - token = res[1]; + node = res[XS_WATCH_PATH]; + token = res[XS_WATCH_TOKEN]; er = xs_acknowledge_watch(h, token); if (er == 0) @@ -553,15 +554,15 @@ int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname) { - char *uuid, *path; + char *domid, *path; struct xenbus_watch *vbd_watch; int er; - uuid = get_dom_uuid(h, domname); - - DPRINTF("%s: %s\n", domname, (uuid != NULL) ? uuid : "[ not found! ]"); - - asprintf(&path, "/domain/%s/backend/vbd", uuid); + domid = get_dom_domid(h, domname); + + DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]"); + + asprintf(&path, "/local/domain/%s/backend/vbd", domid); if (path == NULL) return -ENOMEM; diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/check/check_hotplug --- a/tools/check/check_hotplug Sat Oct 8 17:37:45 2005 +++ b/tools/check/check_hotplug Sat Oct 8 20:28:24 2005 @@ -7,4 +7,8 @@ exit 1 } +if [ -x /sbin/udev ] && [ ! -z `udev -V` ] && [ `udev -V` -ge 059 ]; then + exit 0 +fi + which hotplug 1>/dev/null 2>&1 || error diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Sat Oct 8 17:37:45 2005 +++ b/tools/console/daemon/io.c Sat Oct 8 20:28:24 2005 @@ -1,4 +1,4 @@ -/*\ +/* * Copyright (C) International Business Machines Corp., 2005 * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx> * @@ -16,14 +16,15 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -\*/ + */ #define _GNU_SOURCE #include "utils.h" #include "io.h" -#include "xenctrl.h" -#include "xs.h" +#include <xenctrl.h> +#include <xs.h> +#include <xen/linux/evtchn.h> #include <malloc.h> #include <stdlib.h> @@ -80,14 +81,24 @@ #define XENCONS_FULL(ring) (((ring)->prod - (ring)->cons) == XENCONS_RING_SIZE) #define XENCONS_SPACE(ring) (XENCONS_RING_SIZE - ((ring)->prod - (ring)->cons)) +static void evtchn_notify(struct domain *dom) +{ + struct ioctl_evtchn_notify notify; + notify.port = dom->local_port; + (void)ioctl(dom->evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify); +} + static void buffer_append(struct domain *dom) { struct buffer *buffer = &dom->buffer; struct ring_head *ring = (struct ring_head *)dom->page; size_t size; u32 oldcons; + int notify = 0; while ((size = ring->prod - ring->cons) != 0) { + notify = 1; + if ((buffer->capacity - buffer->size) < size) { buffer->capacity += (size + 1024); buffer->data = realloc(buffer->data, buffer->capacity); @@ -115,6 +126,9 @@ buffer->capacity = buffer->max_capacity; } } + + if (notify) + evtchn_notify(dom); } static bool buffer_empty(struct buffer *buffer) @@ -219,16 +233,14 @@ return ret; } -#define EVENTCHN_BIND _IO('E', 2) -#define EVENTCHN_UNBIND _IO('E', 3) - static int domain_create_ring(struct domain *dom) { - int err, local_port, ring_ref; + int err, remote_port, ring_ref, rc; + struct ioctl_evtchn_bind_interdomain bind; err = xs_gather(xs, dom->conspath, "ring-ref", "%u", &ring_ref, - "port", "%i", &local_port, + "port", "%i", &remote_port, NULL); if (err) goto out; @@ -246,26 +258,28 @@ dom->ring_ref = ring_ref; } - if (local_port != dom->local_port) { - dom->local_port = -1; - if (dom->evtchn_fd != -1) - close(dom->evtchn_fd); - /* Opening evtchn independently for each console is a bit - * wastefule, but that's how the code is structured... */ - dom->evtchn_fd = open("/dev/xen/evtchn", O_RDWR); - if (dom->evtchn_fd == -1) { - err = errno; - goto out; - } + dom->local_port = -1; + if (dom->evtchn_fd != -1) + close(dom->evtchn_fd); + + /* Opening evtchn independently for each console is a bit + * wasteful, but that's how the code is structured... */ + dom->evtchn_fd = open("/dev/xen/evtchn", O_RDWR); + if (dom->evtchn_fd == -1) { + err = errno; + goto out; + } - if (ioctl(dom->evtchn_fd, EVENTCHN_BIND, local_port) == -1) { - err = errno; - close(dom->evtchn_fd); - dom->evtchn_fd = -1; - goto out; - } - dom->local_port = local_port; - } + bind.remote_domain = dom->domid; + bind.remote_port = remote_port; + rc = ioctl(dom->evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); + if (rc == -1) { + err = errno; + close(dom->evtchn_fd); + dom->evtchn_fd = -1; + goto out; + } + dom->local_port = rc; out: return err; @@ -433,7 +447,7 @@ inring->buf[XENCONS_IDX(inring->prod)] = msg[i]; inring->prod++; } - xc_evtchn_send(xc, dom->local_port); + evtchn_notify(dom); } else { close(dom->tty_fd); dom->tty_fd = -1; @@ -477,14 +491,15 @@ char **vec; int domid; struct domain *dom; - - vec = xs_read_watch(xs); + unsigned int num; + + vec = xs_read_watch(xs, &num); if (!vec) return; - if (!strcmp(vec[1], "domlist")) + if (!strcmp(vec[XS_WATCH_TOKEN], "domlist")) enum_domains(); - else if (sscanf(vec[1], "dom%u", &domid) == 1) { + else if (sscanf(vec[XS_WATCH_TOKEN], "dom%u", &domid) == 1) { dom = lookup_domain(domid); if (dom->is_dead == false) domain_create_ring(dom); diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/debugger/pdb/pdb_xen.c --- a/tools/debugger/pdb/pdb_xen.c Sat Oct 8 17:37:45 2005 +++ b/tools/debugger/pdb/pdb_xen.c Sat Oct 8 20:28:24 2005 @@ -43,11 +43,7 @@ #include <sys/ioctl.h> - -/* /dev/xen/evtchn ioctls */ -#define EVTCHN_RESET _IO('E', 1) /* clear & reinit buffer */ -#define EVTCHN_BIND _IO('E', 2) /* bind to event channel */ -#define EVTCHN_UNBIND _IO('E', 3) /* unbind from event channel */ +#include <xen/linux/evtchn.h> int xen_evtchn_bind (int evtchn_fd, int idx) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/Makefile --- a/tools/examples/Makefile Sat Oct 8 17:37:45 2005 +++ b/tools/examples/Makefile Sat Oct 8 20:28:24 2005 @@ -21,17 +21,30 @@ XEN_SCRIPT_DIR = /etc/xen/scripts XEN_SCRIPTS = network-bridge vif-bridge XEN_SCRIPTS += network-route vif-route -XEN_SCRIPTS += block-phy -XEN_SCRIPTS += block-file +XEN_SCRIPTS += network-nat vif-nat +XEN_SCRIPTS += block XEN_SCRIPTS += block-enbd XEN_HOTPLUG_DIR = /etc/hotplug XEN_HOTPLUG_SCRIPTS = xen-backend.agent +UDEV_RULES_DIR = /etc/udev/rules.d +UDEV_RULES = xen-backend.rules + +ifeq ($(findstring $(DISTDIR),$(DESTDIR)),$(DISTDIR)) +HOTPLUGS=install-hotplug install-udev +else +ifeq ($(shell [ -x /sbin/udev ] && [ ! -z `udev -V` ] && [ `/sbin/udev -V` -ge 059 ] && echo 1),1) +HOTPLUGS=install-udev +else +HOTPLUGS=install-hotplug +endif +endif + all: build: -install: all install-initd install-configs install-scripts install-hotplug +install: all install-initd install-configs install-scripts $(HOTPLUGS) install-initd: [ -d $(DESTDIR)/etc/init.d ] || $(INSTALL_DIR) $(DESTDIR)/etc/init.d @@ -44,7 +57,7 @@ [ -d $(DESTDIR)$(XEN_CONFIG_DIR)/auto ] || \ $(INSTALL_DIR) $(DESTDIR)$(XEN_CONFIG_DIR)/auto for i in $(XEN_CONFIGS); \ - do [ -a $(DESTDIR)$(XEN_CONFIG_DIR)/$$i ] || \ + do [ -e $(DESTDIR)$(XEN_CONFIG_DIR)/$$i ] || \ $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_CONFIG_DIR); \ done @@ -52,7 +65,7 @@ [ -d $(DESTDIR)$(XEN_SCRIPT_DIR) ] || \ $(INSTALL_DIR) $(DESTDIR)$(XEN_SCRIPT_DIR) for i in $(XEN_SCRIPTS); \ - do [ -a $(DESTDIR)$(XEN_SCRIPT_DIR)/$$i ] || \ + do \ $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \ done @@ -64,4 +77,12 @@ $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \ done +install-udev: + [ -d $(DESTDIR)$(UDEV_RULES_DIR) ] || \ + $(INSTALL_DIR) $(DESTDIR)$(UDEV_RULES_DIR) + for i in $(UDEV_RULES); \ + do \ + $(INSTALL_PROG) $$i $(DESTDIR)$(UDEV_RULES_DIR); \ + done + clean: diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/network-bridge --- a/tools/examples/network-bridge Sat Oct 8 17:37:45 2005 +++ b/tools/examples/network-bridge Sat Oct 8 20:28:24 2005 @@ -38,6 +38,17 @@ # Print routes. # #============================================================================ + +# Gentoo doesn't have ifup/ifdown: define appropriate alternatives +which ifup >& /dev/null +if [ "$?" != 0 -a -e /etc/conf.d/net ]; then + ifup() { + /etc/init.d/net.$1 start + } + ifdown() { + /etc/init.d/net.$1 stop + } +fi # Exit if anything goes wrong. set -e diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/vif-bridge --- a/tools/examples/vif-bridge Sat Oct 8 17:37:45 2005 +++ b/tools/examples/vif-bridge Sat Oct 8 20:28:24 2005 @@ -33,6 +33,7 @@ # Exit if anything goes wrong set -e +export PATH=/sbin:/bin:/usr/bin:/usr/sbin:$PATH echo "*vif-bridge $*" >&2 diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/vif-nat --- a/tools/examples/vif-nat Sat Oct 8 17:37:45 2005 +++ b/tools/examples/vif-nat Sat Oct 8 20:28:24 2005 @@ -22,7 +22,7 @@ # Exit if anything goes wrong set -e - +export PATH=/sbin:/bin:/usr/bin:/usr/sbin:$PATH echo "*vif-nat $*" >&2 # Operation name. diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/vif-route --- a/tools/examples/vif-route Sat Oct 8 17:37:45 2005 +++ b/tools/examples/vif-route Sat Oct 8 20:28:24 2005 @@ -23,7 +23,7 @@ # Exit if anything goes wrong set -e - +export PATH=/sbin:/bin:/usr/bin:/usr/sbin:$PATH echo "*vif-route $*" >&2 # Operation name. diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/xen-backend.agent --- a/tools/examples/xen-backend.agent Sat Oct 8 17:37:45 2005 +++ b/tools/examples/xen-backend.agent Sat Oct 8 20:28:24 2005 @@ -11,20 +11,14 @@ add) case "$XENBUS_TYPE" in vbd) - t=$(xenstore-read "$XENBUS_PATH"/type) - params=$(xenstore-read "$XENBUS_PATH"/params) - [ -x /etc/xen/scripts/block-"$t" ] && \ - /etc/xen/scripts/block-"$t" bind $params + /etc/xen/scripts/block bind ;; esac ;; remove) case "$XENBUS_TYPE" in vbd) - t=$(xenstore-read "$XENBUS_PATH"/type) - node=$(xenstore-read "$XENBUS_PATH"/node) - [ -x /etc/xen/scripts/block-"$t" ] && \ - /etc/xen/scripts/block-"$t" unbind $node + /etc/xen/scripts/block unbind ;; esac # remove device backend store entries diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/xmexample.vmx --- a/tools/examples/xmexample.vmx Sat Oct 8 17:37:45 2005 +++ b/tools/examples/xmexample.vmx Sat Oct 8 20:28:24 2005 @@ -48,12 +48,41 @@ disk = [ 'file:/var/images/min-el3-i386.img,ioemu:hda,w' ] #---------------------------------------------------------------------------- -# Set according to whether you want the domain restarted when it exits. -# The default is 'onreboot', which restarts the domain when it shuts down -# with exit code reboot. -# Other values are 'always', and 'never'. +# Configure the behaviour when a domain exits. There are three 'reasons' +# for a domain to stop: poweroff, reboot, and crash. For each of these you +# may specify: +# +# "destroy", meaning that the domain is cleaned up as normal; +# "restart", meaning that a new domain is started in place of the old +# one; +# "preserve", meaning that no clean-up is done until the domain is +# manually destroyed (using xm destroy, for example); or +# "rename-restart", meaning that the old domain is not cleaned up, but is +# renamed and a new domain started in its place. +# +# The default is +# +# on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# For backwards compatibility we also support the deprecated option restart +# +# restart = 'onreboot' means on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'destroy' +# +# restart = 'always' means on_poweroff = 'restart' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# restart = 'never' means on_poweroff = 'destroy' +# on_reboot = 'destroy' +# on_crash = 'destroy' -#restart = 'onreboot' +#on_poweroff = 'destroy' +#on_reboot = 'restart' +#on_crash = 'restart' #============================================================================ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/xmexample1 --- a/tools/examples/xmexample1 Sat Oct 8 17:37:45 2005 +++ b/tools/examples/xmexample1 Sat Oct 8 20:28:24 2005 @@ -91,11 +91,40 @@ extra = "4" #---------------------------------------------------------------------------- -# Set according to whether you want the domain restarted when it exits. -# The default is 'onreboot', which restarts the domain when it shuts down -# with exit code reboot. -# Other values are 'always', and 'never'. +# Configure the behaviour when a domain exits. There are three 'reasons' +# for a domain to stop: poweroff, reboot, and crash. For each of these you +# may specify: +# +# "destroy", meaning that the domain is cleaned up as normal; +# "restart", meaning that a new domain is started in place of the old +# one; +# "preserve", meaning that no clean-up is done until the domain is +# manually destroyed (using xm destroy, for example); or +# "rename-restart", meaning that the old domain is not cleaned up, but is +# renamed and a new domain started in its place. +# +# The default is +# +# on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# For backwards compatibility we also support the deprecated option restart +# +# restart = 'onreboot' means on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'destroy' +# +# restart = 'always' means on_poweroff = 'restart' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# restart = 'never' means on_poweroff = 'destroy' +# on_reboot = 'destroy' +# on_crash = 'destroy' -#restart = 'onreboot' +#on_poweroff = 'destroy' +#on_reboot = 'restart' +#on_crash = 'restart' #============================================================================ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/xmexample2 --- a/tools/examples/xmexample2 Sat Oct 8 17:37:45 2005 +++ b/tools/examples/xmexample2 Sat Oct 8 20:28:24 2005 @@ -127,11 +127,40 @@ extra = "4 VMID=%d usr=/dev/sda6" % vmid #---------------------------------------------------------------------------- -# Set according to whether you want the domain restarted when it exits. -# The default is 'onreboot', which restarts the domain when it shuts down -# with exit code reboot. -# Other values are 'always', and 'never'. +# Configure the behaviour when a domain exits. There are three 'reasons' +# for a domain to stop: poweroff, reboot, and crash. For each of these you +# may specify: +# +# "destroy", meaning that the domain is cleaned up as normal; +# "restart", meaning that a new domain is started in place of the old +# one; +# "preserve", meaning that no clean-up is done until the domain is +# manually destroyed (using xm destroy, for example); or +# "rename-restart", meaning that the old domain is not cleaned up, but is +# renamed and a new domain started in its place. +# +# The default is +# +# on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# For backwards compatibility we also support the deprecated option restart +# +# restart = 'onreboot' means on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'destroy' +# +# restart = 'always' means on_poweroff = 'restart' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# restart = 'never' means on_poweroff = 'destroy' +# on_reboot = 'destroy' +# on_crash = 'destroy' -#restart = 'onreboot' +#on_poweroff = 'destroy' +#on_reboot = 'restart' +#on_crash = 'restart' #============================================================================ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/xmexample3 --- a/tools/examples/xmexample3 Sat Oct 8 17:37:45 2005 +++ b/tools/examples/xmexample3 Sat Oct 8 20:28:24 2005 @@ -124,11 +124,40 @@ extra = "4 VMID=%d" % vmid #---------------------------------------------------------------------------- -# Set according to whether you want the domain restarted when it exits. -# The default is 'onreboot', which restarts the domain when it shuts down -# with exit code reboot. -# Other values are 'always', and 'never'. +# Configure the behaviour when a domain exits. There are three 'reasons' +# for a domain to stop: poweroff, reboot, and crash. For each of these you +# may specify: +# +# "destroy", meaning that the domain is cleaned up as normal; +# "restart", meaning that a new domain is started in place of the old +# one; +# "preserve", meaning that no clean-up is done until the domain is +# manually destroyed (using xm destroy, for example); or +# "rename-restart", meaning that the old domain is not cleaned up, but is +# renamed and a new domain started in its place. +# +# The default is +# +# on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# For backwards compatibility we also support the deprecated option restart +# +# restart = 'onreboot' means on_poweroff = 'destroy' +# on_reboot = 'restart' +# on_crash = 'destroy' +# +# restart = 'always' means on_poweroff = 'restart' +# on_reboot = 'restart' +# on_crash = 'restart' +# +# restart = 'never' means on_poweroff = 'destroy' +# on_reboot = 'destroy' +# on_crash = 'destroy' -#restart = 'onreboot' +#on_poweroff = 'destroy' +#on_reboot = 'restart' +#on_crash = 'restart' #============================================================================ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/firmware/vmxassist/Makefile --- a/tools/firmware/vmxassist/Makefile Sat Oct 8 17:37:45 2005 +++ b/tools/firmware/vmxassist/Makefile Sat Oct 8 20:28:24 2005 @@ -25,9 +25,7 @@ TEXTADDR=0x000D0000 DEFINES=-DDEBUG -DTEXTADDR=${TEXTADDR} -XENINC=-I$(XEN_ROOT)/xen/include -I$(XEN_ROOT)/tools/libxc -#DEFINES=-DDEBUG -DTEST -DTEXTADDR=${TEXTADDR} -#XENINC=-I/home/leendert/xen/xeno-unstable.bk/xen/include +XENINC=-I$(XEN_ROOT)/tools/libxc LD = ld CC = gcc diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/firmware/vmxassist/vm86.h --- a/tools/firmware/vmxassist/vm86.h Sat Oct 8 17:37:45 2005 +++ b/tools/firmware/vmxassist/vm86.h Sat Oct 8 20:28:24 2005 @@ -33,7 +33,7 @@ typedef int64_t s64; #endif -#include <public/vmx_assist.h> +#include <xen/vmx_assist.h> #define NR_EXCEPTION_HANDLER 32 #define NR_INTERRUPT_HANDLERS 16 diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/ioemu/hw/i8254.c --- a/tools/ioemu/hw/i8254.c Sat Oct 8 17:37:45 2005 +++ b/tools/ioemu/hw/i8254.c Sat Oct 8 20:28:24 2005 @@ -22,8 +22,8 @@ * THE SOFTWARE. */ #include "vl.h" -#include "xenctrl.h" -#include <io/ioreq.h> +#include <xenctrl.h> +#include <xen/io/ioreq.h> //#define DEBUG_PIT diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/ioemu/hw/i8259.c --- a/tools/ioemu/hw/i8259.c Sat Oct 8 17:37:45 2005 +++ b/tools/ioemu/hw/i8259.c Sat Oct 8 20:28:24 2005 @@ -22,8 +22,8 @@ * THE SOFTWARE. */ #include "vl.h" -#include "xenctrl.h" -#include <io/ioreq.h> +#include <xenctrl.h> +#include <xen/io/ioreq.h> /* debug PIC */ //#define DEBUG_PIC diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/ioemu/hw/ioapic.h --- a/tools/ioemu/hw/ioapic.h Sat Oct 8 17:37:45 2005 +++ b/tools/ioemu/hw/ioapic.h Sat Oct 8 20:28:24 2005 @@ -26,9 +26,9 @@ #ifndef __IOAPIC_H #define __IOAPIC_H -#include "xenctrl.h" -#include <io/ioreq.h> -#include <io/vmx_vlapic.h> +#include <xenctrl.h> +#include <xen/io/ioreq.h> +#include <xen/io/vmx_vlapic.h> #define IOAPIC_NUM_PINS 24 #define IOAPIC_VERSION_ID 0x11 diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/ioemu/target-i386-dm/Makefile --- a/tools/ioemu/target-i386-dm/Makefile Sat Oct 8 17:37:45 2005 +++ b/tools/ioemu/target-i386-dm/Makefile Sat Oct 8 20:28:24 2005 @@ -6,7 +6,7 @@ INSTALL_DIR := $(DESTDIR)/usr/$(LIBDIR)/xen/bin TARGET_PATH=$(SRC_PATH)/target-$(TARGET_ARCH) VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/audio -DEFINES=-I. -I$(TARGET_PATH) -I$(SRC_PATH) -I$(XEN_ROOT)/xen/include/public +DEFINES=-I. -I$(TARGET_PATH) -I$(SRC_PATH) DEFINES+= -I$(XEN_ROOT)/tools/libxc ifdef CONFIG_USER_ONLY VPATH+=:$(SRC_PATH)/linux-user diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Sat Oct 8 17:37:45 2005 +++ b/tools/ioemu/target-i386-dm/helper2.c Sat Oct 8 20:28:24 2005 @@ -47,12 +47,15 @@ #include <fcntl.h> #include <sys/ioctl.h> -#include "xenctrl.h" -#include <io/ioreq.h> +#include <xenctrl.h> +#include <xen/io/ioreq.h> +#include <xen/linux/evtchn.h> #include "cpu.h" #include "exec-all.h" #include "vl.h" + +extern int domid; void *shared_vram; @@ -119,7 +122,7 @@ //the evtchn fd for polling int evtchn_fd = -1; //the evtchn port for polling the notification, should be inputed as bochs's parameter -u16 ioreq_port = 0; +u16 ioreq_remote_port, ioreq_local_port; //some functions to handle the io req packet void @@ -156,9 +159,9 @@ int rc; u16 buf[2]; rc = read(evtchn_fd, buf, 2); - if (rc == 2 && buf[0] == ioreq_port){//got only one matched 16bit port index + if (rc == 2 && buf[0] == ioreq_local_port){//got only one matched 16bit port index // unmask the wanted port again - write(evtchn_fd, &ioreq_port, 2); + write(evtchn_fd, &ioreq_local_port, 2); //get the io packet from shared memory return __cpu_get_ioreq(); @@ -417,7 +420,6 @@ void destroy_vmx_domain(void) { - extern int domid; extern FILE* logfile; char destroy_cmd[20]; sprintf(destroy_cmd, "xm destroy %d", domid); @@ -484,11 +486,9 @@ do_ioapic(); #endif if (env->send_event) { - int ret; - ret = xc_evtchn_send(xc_handle, ioreq_port); - if (ret == -1) { - fprintf(logfile, "evtchn_send failed on port: %d\n", ioreq_port); - } + struct ioctl_evtchn_notify notify; + notify.port = ioreq_local_port; + (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify); } } destroy_vmx_domain(); @@ -499,7 +499,6 @@ qemu_vmx_reset(void *unused) { char cmd[255]; - extern int domid; /* pause domain first, to avoid repeated reboot request*/ xc_domain_pause (xc_handle, domid); @@ -512,6 +511,8 @@ cpu_init() { CPUX86State *env; + struct ioctl_evtchn_bind_interdomain bind; + int rc; cpu_exec_init(); qemu_register_reset(qemu_vmx_reset, NULL); @@ -532,12 +533,14 @@ return NULL; } - fprintf(logfile, "listening to port: %d\n", ioreq_port); - /*unmask the wanted port -- bind*/ - if (ioctl(evtchn_fd, ('E'<<8)|2, ioreq_port) == -1) { + bind.remote_domain = domid; + bind.remote_port = ioreq_remote_port; + rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); + if (rc == -1) { perror("ioctl"); return NULL; } + ioreq_local_port = rc; return env; } diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Sat Oct 8 17:37:45 2005 +++ b/tools/ioemu/vl.c Sat Oct 8 20:28:24 2005 @@ -2806,9 +2806,9 @@ case QEMU_OPTION_p: { - extern short ioreq_port; - ioreq_port = atoi(optarg); - printf("port: %d\n", ioreq_port); + extern u16 ioreq_remote_port; + ioreq_remote_port = atoi(optarg); + printf("port: %d\n", ioreq_remote_port); } break; case QEMU_OPTION_l: diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/libxc/xc_evtchn.c --- a/tools/libxc/xc_evtchn.c Sat Oct 8 17:37:45 2005 +++ b/tools/libxc/xc_evtchn.c Sat Oct 8 20:28:24 2005 @@ -34,92 +34,18 @@ int xc_evtchn_alloc_unbound(int xc_handle, u32 dom, - int *port) + u32 remote_dom) { - evtchn_op_t op; int rc; - - op.cmd = EVTCHNOP_alloc_unbound; - op.u.alloc_unbound.dom = (domid_t)dom; - op.u.alloc_unbound.port = (port != NULL) ? *port : 0; + evtchn_op_t op = { + .cmd = EVTCHNOP_alloc_unbound, + .u.alloc_unbound.dom = (domid_t)dom, + .u.alloc_unbound.remote_dom = (domid_t)remote_dom }; if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) - { - if ( port != NULL ) - *port = op.u.alloc_unbound.port; - } + rc = op.u.alloc_unbound.port; return rc; -} - - -int xc_evtchn_bind_interdomain(int xc_handle, - u32 dom1, - u32 dom2, - int *port1, - int *port2) -{ - evtchn_op_t op; - int rc; - - op.cmd = EVTCHNOP_bind_interdomain; - op.u.bind_interdomain.dom1 = (domid_t)dom1; - op.u.bind_interdomain.dom2 = (domid_t)dom2; - op.u.bind_interdomain.port1 = (port1 != NULL) ? *port1 : 0; - op.u.bind_interdomain.port2 = (port2 != NULL) ? *port2 : 0; - - - if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) - { - if ( port1 != NULL ) - *port1 = op.u.bind_interdomain.port1; - if ( port2 != NULL ) - *port2 = op.u.bind_interdomain.port2; - } - - return rc; -} - - -int xc_evtchn_bind_virq(int xc_handle, - int virq, - int *port) -{ - evtchn_op_t op; - int rc; - - op.cmd = EVTCHNOP_bind_virq; - op.u.bind_virq.virq = (u32)virq; - - if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) - { - if ( port != NULL ) - *port = op.u.bind_virq.port; - } - - return rc; -} - - -int xc_evtchn_close(int xc_handle, - u32 dom, - int port) -{ - evtchn_op_t op; - op.cmd = EVTCHNOP_close; - op.u.close.dom = (domid_t)dom; - op.u.close.port = port; - return do_evtchn_op(xc_handle, &op); -} - - -int xc_evtchn_send(int xc_handle, - int local_port) -{ - evtchn_op_t op; - op.cmd = EVTCHNOP_send; - op.u.send.local_port = local_port; - return do_evtchn_op(xc_handle, &op); } @@ -128,13 +54,12 @@ int port, xc_evtchn_status_t *status) { - evtchn_op_t op; int rc; + evtchn_op_t op = { + .cmd = EVTCHNOP_status, + .u.status.dom = (domid_t)dom, + .u.status.port = port }; - op.cmd = EVTCHNOP_status; - op.u.status.dom = (domid_t)dom; - op.u.status.port = port; - if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 ) memcpy(status, &op.u.status, sizeof(*status)); diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Sat Oct 8 17:37:45 2005 +++ b/tools/libxc/xc_linux_restore.c Sat Oct 8 20:28:24 2005 @@ -500,13 +500,13 @@ } /* Uncanonicalise the suspend-record frame number and poke resume rec. */ - pfn = ctxt.user_regs.esi; + pfn = ctxt.user_regs.edx; if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) ) { ERR("Suspend record frame number is bad"); goto out; } - ctxt.user_regs.esi = mfn = pfn_to_mfn_table[pfn]; + ctxt.user_regs.edx = mfn = pfn_to_mfn_table[pfn]; start_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); start_info->nr_pages = nr_pfns; diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Sat Oct 8 17:37:45 2005 +++ b/tools/libxc/xc_linux_save.c Sat Oct 8 20:28:24 2005 @@ -978,9 +978,9 @@ goto out; } - DPRINTF("SUSPEND shinfo %08lx eip %08u esi %08u\n", + DPRINTF("SUSPEND shinfo %08lx eip %08u edx %08u\n", info.shared_info_frame, - ctxt.user_regs.eip, ctxt.user_regs.esi); + ctxt.user_regs.eip, ctxt.user_regs.edx); } if ( xc_shadow_control( xc_handle, dom, @@ -1048,7 +1048,7 @@ } /* Canonicalise the suspend-record frame number. */ - if ( !translate_mfn_to_pfn(&ctxt.user_regs.esi) ) + if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) ) { ERR("Suspend record is not in range of pseudophys map"); goto out; diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Sat Oct 8 17:37:45 2005 +++ b/tools/libxc/xenctrl.h Sat Oct 8 20:28:24 2005 @@ -25,6 +25,7 @@ #include <xen/dom0_ops.h> #include <xen/version.h> #include <xen/event_channel.h> +#include <xen/sched.h> #include <xen/sched_ctl.h> #include <xen/acm.h> @@ -305,66 +306,14 @@ * well-known port within a domain to receive events on. * * @parm xc_handle a handle to an open hypervisor interface - * @parm dom the ID of the domain. This maybe DOMID_SELF - * @parm port a pointer to a port. This is an in/out parameter. If *port is - * 0, then a new port will be assigned, if port is > 0 then that - * port is allocated if the port is unallocated. - * @return 0 on success, -1 on failure + * @parm dom the ID of the local domain (the 'allocatee') + * @parm remote_dom the ID of the domain who will later bind + * @return allocated port (in @dom) on success, -1 on failure */ int xc_evtchn_alloc_unbound(int xc_handle, u32 dom, - int *port); - -/** - * This function creates a pair of ports between two domains. A port can only - * be bound once within a domain. - * - * @parm xc_handle a handle to an open hypervisor interface - * @parm dom1 one of the two domains to connect. Can be DOMID_SELF. - * @parm dom2 the other domain to connect. Can be DOMID_SELF. - * @parm port1 an in/out parameter. If > 0, then try to connect *port. If - * 0, then allocate a new port and store the port in *port. - * @parm port2 the port connected on port2. This parameter behaves the same - * way as port1. - * @return 0 on success, -1 on error. - */ -int xc_evtchn_bind_interdomain(int xc_handle, - u32 dom1, - u32 dom2, - int *port1, - int *port2); -int xc_evtchn_bind_virq(int xc_handle, - int virq, - int *port); - -/** - * This function will close a single port on an event channel. - * - * @parm xc_handle a handle to an open hypervisor interface - * @parm dom the domain that the port exists on. May be DOMID_SELF. - * @parm port the port to close - * @return 0 on success, -1 on error - */ -int xc_evtchn_close(int xc_handle, - u32 dom, /* may be DOMID_SELF */ - int port); - -/** - * This function generates a notify event on a bound port. - * - * Notifies can be read within Linux by opening /dev/xen/evtchn and reading - * a 16 bit value. The result will be the port the event occurred on. When - * events occur, the port is masked until the 16 bit port value is written back - * to the file. When /dev/xen/evtchn is opened, it has to be bound via an - * ioctl to each port to listen on. The ioctl for binding is _IO('E', 2). The - * parameter is the port to listen on. - * - * @parm xc_handle a handle to an open hypervisor interface - * @parm local_port the port to generate the notify on - * @return 0 on success, -1 on error - */ -int xc_evtchn_send(int xc_handle, - int local_port); + u32 remote_dom); + int xc_evtchn_status(int xc_handle, u32 dom, /* may be DOMID_SELF */ int port, diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/pygrub/Makefile --- a/tools/pygrub/Makefile Sat Oct 8 17:37:45 2005 +++ b/tools/pygrub/Makefile Sat Oct 8 20:28:24 2005 @@ -15,4 +15,4 @@ endif clean: - rm -rf build *.pyc *.pyo *.o *.a *~ + rm -rf build tmp *.pyc *.pyo *.o *.a *~ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/pygrub/setup.py --- a/tools/pygrub/setup.py Sat Oct 8 17:37:45 2005 +++ b/tools/pygrub/setup.py Sat Oct 8 20:28:24 2005 @@ -1,5 +1,7 @@ from distutils.core import setup, Extension +from distutils.ccompiler import new_compiler import os +import sys extra_compile_args = [ "-fno-strict-aliasing", "-Wall", "-Werror" ] @@ -7,9 +9,19 @@ fsys_pkgs = [] if os.path.exists("/usr/include/ext2fs/ext2_fs.h"): + ext2defines = [] + cc = new_compiler() + cc.add_library("ext2fs") + if cc.has_function("ext2fs_open2"): + ext2defines.append( ("HAVE_EXT2FS_OPEN2", None) ) + else: + sys.stderr.write("WARNING: older version of e2fsprogs installed, not building full\n") + sys.stderr.write(" disk support for ext2.\n") + ext2 = Extension("grub.fsys.ext2._pyext2", extra_compile_args = extra_compile_args, libraries = ["ext2fs"], + define_macros = ext2defines, sources = ["src/fsys/ext2/ext2module.c"]) fsys_mods.append(ext2) fsys_pkgs.append("grub.fsys.ext2") diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/pygrub/src/fsys/ext2/ext2module.c --- a/tools/pygrub/src/fsys/ext2/ext2module.c Sat Oct 8 17:37:45 2005 +++ b/tools/pygrub/src/fsys/ext2/ext2module.c Sat Oct 8 20:28:24 2005 @@ -229,8 +229,13 @@ snprintf(offsetopt, 29, "offset=%d", offset); } +#ifdef HAVE_EXT2FS_OPEN2 err = ext2fs_open2(name, offsetopt, flags, superblock, block_size, unix_io_manager, &efs); +#else + err = ext2fs_open(name, flags, superblock, block_size, + unix_io_manager, &efs); +#endif if (err) { PyErr_SetString(PyExc_ValueError, "unable to open file"); return NULL; diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/Makefile --- a/tools/python/Makefile Sat Oct 8 17:37:45 2005 +++ b/tools/python/Makefile Sat Oct 8 20:28:24 2005 @@ -15,5 +15,8 @@ CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" endif +test: + export LD_LIBRARY_PATH=$$(readlink -f ../libxc):$$(readlink -f ../xenstore); python test.py -b -u + clean: rm -rf build *.pyc *.pyo *.o *.a *~ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/setup.py --- a/tools/python/setup.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/setup.py Sat Oct 8 20:28:24 2005 @@ -42,7 +42,9 @@ 'xen.xend.xenstore', 'xen.xm', 'xen.web', - 'xen.sv' + 'xen.sv', + + 'xen.xend.tests' ], ext_package = "xen.lowlevel", ext_modules = [ xc, xs ] diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/lowlevel/xc/xc.c Sat Oct 8 20:28:24 2005 @@ -432,104 +432,19 @@ { XcObject *xc = (XcObject *)self; - u32 dom; - int port = 0; - - static char *kwd_list[] = { "dom", "port", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list, - &dom, &port) ) - return NULL; - - if ( xc_evtchn_alloc_unbound(xc->xc_handle, dom, &port) != 0 ) + u32 dom, remote_dom; + int port; + + static char *kwd_list[] = { "dom", "remote_dom", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "ii", kwd_list, + &dom, &remote_dom) ) + return NULL; + + if ( (port = xc_evtchn_alloc_unbound(xc->xc_handle, dom, remote_dom)) < 0 ) return PyErr_SetFromErrno(xc_error); return PyInt_FromLong(port); -} - -static PyObject *pyxc_evtchn_bind_interdomain(PyObject *self, - PyObject *args, - PyObject *kwds) -{ - XcObject *xc = (XcObject *)self; - - u32 dom1 = DOMID_SELF, dom2 = DOMID_SELF; - int port1 = 0, port2 = 0; - - static char *kwd_list[] = { "dom1", "dom2", "port1", "port2", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "|iiii", kwd_list, - &dom1, &dom2, &port1, &port2) ) - return NULL; - - if ( xc_evtchn_bind_interdomain(xc->xc_handle, dom1, - dom2, &port1, &port2) != 0 ) - return PyErr_SetFromErrno(xc_error); - - return Py_BuildValue("{s:i,s:i}", - "port1", port1, - "port2", port2); -} - -static PyObject *pyxc_evtchn_bind_virq(PyObject *self, - PyObject *args, - PyObject *kwds) -{ - XcObject *xc = (XcObject *)self; - - int virq, port; - - static char *kwd_list[] = { "virq", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &virq) ) - return NULL; - - if ( xc_evtchn_bind_virq(xc->xc_handle, virq, &port) != 0 ) - return PyErr_SetFromErrno(xc_error); - - return PyInt_FromLong(port); -} - -static PyObject *pyxc_evtchn_close(PyObject *self, - PyObject *args, - PyObject *kwds) -{ - XcObject *xc = (XcObject *)self; - - u32 dom = DOMID_SELF; - int port; - - static char *kwd_list[] = { "port", "dom", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list, - &port, &dom) ) - return NULL; - - if ( xc_evtchn_close(xc->xc_handle, dom, port) != 0 ) - return PyErr_SetFromErrno(xc_error); - - Py_INCREF(zero); - return zero; -} - -static PyObject *pyxc_evtchn_send(PyObject *self, - PyObject *args, - PyObject *kwds) -{ - XcObject *xc = (XcObject *)self; - - int port; - - static char *kwd_list[] = { "port", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &port) ) - return NULL; - - if ( xc_evtchn_send(xc->xc_handle, port) != 0 ) - return PyErr_SetFromErrno(xc_error); - - Py_INCREF(zero); - return zero; } static PyObject *pyxc_evtchn_status(PyObject *self, @@ -1028,41 +943,10 @@ { "evtchn_alloc_unbound", (PyCFunction)pyxc_evtchn_alloc_unbound, METH_VARARGS | METH_KEYWORDS, "\n" - "Allocate an unbound local port that will await a remote connection.\n" - " dom [int]: Remote domain to accept connections from.\n\n" + "Allocate an unbound port that will await a remote connection.\n" + " dom [int]: Domain whose port space to allocate from.\n" + " remote_dom [int]: Remote domain to accept connections from.\n\n" "Returns: [int] Unbound event-channel port.\n" }, - - { "evtchn_bind_interdomain", - (PyCFunction)pyxc_evtchn_bind_interdomain, - METH_VARARGS | METH_KEYWORDS, "\n" - "Open an event channel between two domains.\n" - " dom1 [int, SELF]: First domain to be connected.\n" - " dom2 [int, SELF]: Second domain to be connected.\n\n" - "Returns: [dict] dictionary is empty on failure.\n" - " port1 [int]: Port-id for endpoint at dom1.\n" - " port2 [int]: Port-id for endpoint at dom2.\n" }, - - { "evtchn_bind_virq", - (PyCFunction)pyxc_evtchn_bind_virq, - METH_VARARGS | METH_KEYWORDS, "\n" - "Bind an event channel to the specified VIRQ.\n" - " virq [int]: VIRQ to bind.\n\n" - "Returns: [int] Bound event-channel port.\n" }, - - { "evtchn_close", - (PyCFunction)pyxc_evtchn_close, - METH_VARARGS | METH_KEYWORDS, "\n" - "Close an event channel. If interdomain, sets remote end to 'unbound'.\n" - " dom [int, SELF]: Dom-id of one endpoint of the channel.\n" - " port [int]: Port-id of one endpoint of the channel.\n\n" - "Returns: [int] 0 on success; -1 on error.\n" }, - - { "evtchn_send", - (PyCFunction)pyxc_evtchn_send, - METH_VARARGS | METH_KEYWORDS, "\n" - "Send an event along a locally-connected event channel.\n" - " port [int]: Port-id of a local channel endpoint.\n\n" - "Returns: [int] 0 on success; -1 on error.\n" }, { "evtchn_status", (PyCFunction)pyxc_evtchn_status, diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/lowlevel/xs/xs.c --- a/tools/python/xen/lowlevel/xs/xs.c Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/lowlevel/xs/xs.c Sat Oct 8 20:28:24 2005 @@ -462,19 +462,20 @@ char **xsval = NULL; PyObject *token; int i; + unsigned int num; if (!xh) goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec)) goto exit; Py_BEGIN_ALLOW_THREADS - xsval = xs_read_watch(xh); - Py_END_ALLOW_THREADS - if (!xsval) { - PyErr_SetFromErrno(PyExc_RuntimeError); - goto exit; - } - if (sscanf(xsval[1], "%li", (unsigned long *)&token) != 1) { + xsval = xs_read_watch(xh, &num); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + if (sscanf(xsval[XS_WATCH_TOKEN], "%li", (unsigned long *)&token) != 1) { PyErr_SetString(PyExc_RuntimeError, "invalid token"); goto exit; } @@ -487,7 +488,7 @@ goto exit; } /* Create tuple (path, token). */ - val = Py_BuildValue("(sO)", xsval[0], token); + val = Py_BuildValue("(sO)", xsval[XS_WATCH_PATH], token); exit: if (xsval) free(xsval); @@ -774,39 +775,6 @@ return val; } -#define xspy_shutdown_doc "\n" \ - "Shutdown the xenstore daemon.\n" \ - "\n" \ - "Returns None on success.\n" \ - "Raises RuntimeError on error.\n" \ - "\n" - -static PyObject *xspy_shutdown(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *kwd_spec[] = { NULL }; - static char *arg_spec = ""; - - struct xs_handle *xh = xshandle(self); - PyObject *val = NULL; - int xsval = 0; - - if (!xh) - goto exit; - if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec)) - goto exit; - Py_BEGIN_ALLOW_THREADS - xsval = xs_shutdown(xh); - Py_END_ALLOW_THREADS - if (!xsval) { - PyErr_SetFromErrno(PyExc_RuntimeError); - goto exit; - } - Py_INCREF(Py_None); - val = Py_None; - exit: - return val; -} - #define xspy_get_domain_path_doc "\n" \ "Return store path of domain.\n" \ " domid [int]: domain id\n" \ @@ -845,28 +813,6 @@ } val = PyString_FromString(xsval); free(xsval); - exit: - return val; -} - -#define xspy_fileno_doc "\n" \ - "Get the file descriptor of the xenstore socket.\n" \ - "Allows an xs object to be passed to select().\n" \ - "\n" \ - "Returns: [int] file descriptor.\n" \ - "\n" - -static PyObject *xspy_fileno(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *kwd_spec[] = { NULL }; - static char *arg_spec = ""; - - struct xs_handle *xh = xshandle(self); - PyObject *val = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec)) - goto exit; - val = PyInt_FromLong((xh ? xs_fileno(xh) : -1)); exit: return val; } @@ -894,9 +840,7 @@ XSPY_METH(introduce_domain), XSPY_METH(release_domain), XSPY_METH(close), - XSPY_METH(shutdown), XSPY_METH(get_domain_path), - XSPY_METH(fileno), { /* Terminator. */ }, }; diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/sv/DomInfo.py --- a/tools/python/xen/sv/DomInfo.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/sv/DomInfo.py Sat Oct 8 20:28:24 2005 @@ -139,7 +139,7 @@ if not dom is None and dom != '0': if DEBUG: print ">DomShutDown %s" % dom try: - server.xend_domain_shutdown( int( dom ), "halt" ) + server.xend_domain_shutdown( int( dom ), "poweroff" ) except: pass @@ -175,7 +175,7 @@ if not dom is None and dom != '0': if DEBUG: print ">DomDestroy %s" % dom try: - server.xend_domain_destroy( int( dom ), "halt" ) + server.xend_domain_destroy(int( dom )) except: pass diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/web/http.py --- a/tools/python/xen/web/http.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/web/http.py Sat Oct 8 20:28:24 2005 @@ -437,6 +437,9 @@ send_body = self.hasBody() if not self.close_connection: self.setResponseHeader("Connection", "keep-alive") + self.setResponseHeader("Pragma", "no-cache") + self.setResponseHeader("Cache-Control", "no-cache") + self.setResponseHeader("Expires", "-1") if send_body: self.output.seek(0, 0) body = self.output.getvalue() diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/XendCheckpoint.py Sat Oct 8 20:28:24 2005 @@ -1,4 +1,5 @@ # Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx> +# Copyright (C) 2005 XenSource Ltd # This file is subject to the terms and conditions of the GNU General # Public License. See the file "COPYING" in the main directory of @@ -7,15 +8,14 @@ import os import re import select +import string import sxp -from string import join from struct import pack, unpack, calcsize from xen.util.xpopen import xPopen3 import xen.lowlevel.xc -import XendDomainInfo from xen.xend.xenstore.xsutil import IntroduceDomain from XendError import XendError @@ -42,57 +42,55 @@ raise XendError(errmsg) return buf -def save(xd, fd, dominfo, live): +def save(fd, dominfo, live): write_exact(fd, SIGNATURE, "could not write guest state file: signature") config = sxp.to_string(dominfo.sxpr()) - write_exact(fd, pack("!i", len(config)), - "could not write guest state file: config len") - write_exact(fd, config, "could not write guest state file: config") - - # xc_save takes three customization parameters: maxit, max_f, and flags - # the last controls whether or not save is 'live', while the first two - # further customize behaviour when 'live' save is enabled. Passing "0" - # simply uses the defaults compiled into libxenguest; see the comments - # and/or code in xc_linux_save() for more information. - cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd), - str(dominfo.domid), "0", "0", str(int(live)) ] - log.info("[xc_save] " + join(cmd)) - child = xPopen3(cmd, True, -1, [fd, xc.handle()]) - - lasterr = "" - p = select.poll() - p.register(child.fromchild.fileno()) - p.register(child.childerr.fileno()) - while True: - r = p.poll() - for (fd, event) in r: - if not event & select.POLLIN: - continue - if fd == child.childerr.fileno(): - l = child.childerr.readline() - log.error(l.rstrip()) - lasterr = l.rstrip() - if fd == child.fromchild.fileno(): - l = child.fromchild.readline() - if l.rstrip() == "suspend": - log.info("suspending %d" % dominfo.domid) - xd.domain_shutdown(dominfo.domid, reason='suspend') - dominfo.state_wait(XendDomainInfo.STATE_VM_SUSPENDED) - log.info("suspend %d done" % dominfo.domid) - child.tochild.write("done\n") - child.tochild.flush() - if filter(lambda (fd, event): event & select.POLLHUP, r): - break - - if child.wait() >> 8 == 127: - lasterr = "popen %s failed" % PATH_XC_SAVE - if child.wait() != 0: - raise XendError("xc_save failed: %s" % lasterr) - - dominfo.closeStoreChannel() - xd.domain_destroy(dominfo.domid) - return None + + domain_name = dominfo.getName() + # Rename the domain temporarily, so that we don't get a name clash if this + # domain is migrating (live or non-live) to the local host. Doing such a + # thing is useful for debugging. + dominfo.setName('migrating-' + domain_name) + + try: + write_exact(fd, pack("!i", len(config)), + "could not write guest state file: config len") + write_exact(fd, config, "could not write guest state file: config") + + # xc_save takes three customization parameters: maxit, max_f, and + # flags the last controls whether or not save is 'live', while the + # first two further customize behaviour when 'live' save is + # enabled. Passing "0" simply uses the defaults compiled into + # libxenguest; see the comments and/or code in xc_linux_save() for + # more information. + cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd), + str(dominfo.getDomid()), "0", "0", str(int(live)) ] + log.debug("[xc_save]: %s", string.join(cmd)) + + def saveInputHandler(line, tochild): + log.debug("In saveInputHandler %s", line) + if line == "suspend": + log.debug("Suspending %d ...", dominfo.getDomid()) + dominfo.shutdown('suspend') + dominfo.waitForShutdown() + log.info("Domain %d suspended.", dominfo.getDomid()) + tochild.write("done\n") + tochild.flush() + + forkHelper(cmd, fd, saveInputHandler, False) + + dominfo.destroyDomain() + + except Exception, exn: + log.exception("Save failed on domain %s (%d).", domain_name, + dominfo.getDomid()) + try: + dominfo.setName(domain_name) + except: + log.exception("Failed to reset the migrating domain's name") + raise Exception, exn + def restore(xd, fd): signature = read_exact(fd, len(SIGNATURE), @@ -113,71 +111,98 @@ raise XendError("not a valid guest state file: config parse") vmconfig = p.get_val() - dominfo = xd.domain_configure(vmconfig) - - l = read_exact(fd, sizeof_unsigned_long, - "not a valid guest state file: pfn count read") - nr_pfns = unpack("=L", l)[0] # XXX endianess - if nr_pfns > 1024*1024: # XXX - raise XendError( - "not a valid guest state file: pfn count out of range") - - if dominfo.store_channel: - store_evtchn = dominfo.store_channel.port2 - else: - store_evtchn = 0 - - if dominfo.console_channel: - console_evtchn = dominfo.console_channel.port2 - else: - console_evtchn = 0 - - cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd), - str(dominfo.domid), str(nr_pfns), - str(store_evtchn), str(console_evtchn)] - log.info("[xc_restore] " + join(cmd)) + + dominfo = xd.restore_(vmconfig) + + assert dominfo.store_channel + assert dominfo.console_channel + assert dominfo.getDomainPath() + + try: + l = read_exact(fd, sizeof_unsigned_long, + "not a valid guest state file: pfn count read") + nr_pfns = unpack("=L", l)[0] # XXX endianess + if nr_pfns > 1024*1024: # XXX + raise XendError( + "not a valid guest state file: pfn count out of range") + + store_evtchn = dominfo.store_channel + console_evtchn = dominfo.console_channel + + cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd), + str(dominfo.getDomid()), str(nr_pfns), + str(store_evtchn), str(console_evtchn)] + log.debug("[xc_restore]: %s", string.join(cmd)) + + def restoreInputHandler(line, _): + m = re.match(r"^(store-mfn) (\d+)$", line) + if m: + store_mfn = int(m.group(2)) + dominfo.setStoreRef(store_mfn) + log.debug("IntroduceDomain %d %d %d %s", + dominfo.getDomid(), + store_mfn, + dominfo.store_channel, + dominfo.getDomainPath()) + IntroduceDomain(dominfo.getDomid(), + store_mfn, + dominfo.store_channel, + dominfo.getDomainPath()) + else: + m = re.match(r"^(console-mfn) (\d+)$", line) + if m: + dominfo.setConsoleRef(int(m.group(2))) + + forkHelper(cmd, fd, restoreInputHandler, True) + + return dominfo + except: + dominfo.destroy() + raise + + +def forkHelper(cmd, fd, inputHandler, closeToChild): child = xPopen3(cmd, True, -1, [fd, xc.handle()]) - child.tochild.close() - - lasterr = "" - p = select.poll() - p.register(child.fromchild.fileno()) - p.register(child.childerr.fileno()) - while True: - r = p.poll() - for (fd, event) in r: - if not event & select.POLLIN: - continue - if fd == child.childerr.fileno(): - l = child.childerr.readline() - log.error(l.rstrip()) - lasterr = l.rstrip() - if fd == child.fromchild.fileno(): - l = child.fromchild.readline() - while l: - log.info(l.rstrip()) - m = re.match(r"^(store-mfn) (\d+)\n$", l) - if m: - if dominfo.store_channel: - dominfo.setStoreRef(int(m.group(2))) - if dominfo.store_mfn >= 0: - IntroduceDomain(dominfo.domid, - dominfo.store_mfn, - dominfo.store_channel.port1, - dominfo.path) - m = re.match(r"^(console-mfn) (\d+)\n$", l) - if m: - dominfo.setConsoleRef(int(m.group(2))) - try: - l = child.fromchild.readline() - except: - l = None - if filter(lambda (fd, event): event & select.POLLHUP, r): - break + + if closeToChild: + child.tochild.close() + + try: + fds = [child.fromchild.fileno(), + child.childerr.fileno()] + p = select.poll() + map(p.register, fds) + while len(fds) > 0: + r = p.poll() + for (fd, event) in r: + if event & select.POLLIN: + if fd == child.childerr.fileno(): + lasterr = child.childerr.readline().rstrip() + log.error('%s', lasterr) + else: + l = child.fromchild.readline().rstrip() + while l: + log.debug('%s', l) + inputHandler(l, child.tochild) + try: + l = child.fromchild.readline().rstrip() + except: + l = None + + if event & select.POLLERR: + raise XendError('Error reading from child process for %s', + cmd) + + if event & select.POLLHUP: + fds.remove(fd) + p.unregister(fd) + finally: + child.fromchild.close() + child.childerr.close() + if not closeToChild: + child.tochild.close() if child.wait() >> 8 == 127: - lasterr = "popen %s failed" % PATH_XC_RESTORE + lasterr = "popen failed" if child.wait() != 0: - raise XendError("xc_restore failed: %s" % lasterr) - - return dominfo + raise XendError("%s failed: %s" % (string.join(cmd), lasterr)) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/XendClient.py --- a/tools/python/xen/xend/XendClient.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/XendClient.py Sat Oct 8 20:28:24 2005 @@ -232,10 +232,9 @@ {'op' : 'sysrq', 'key' : key}) - def xend_domain_destroy(self, id, reason): - return self.xendPost(self.domainurl(id), - {'op' : 'destroy', - 'reason' : reason }) + def xend_domain_destroy(self, id): + return self.xendPost(self.domainurl(id), + {'op' : 'destroy' }) def xend_domain_save(self, id, filename): return self.xendPost(self.domainurl(id), @@ -307,22 +306,22 @@ {'op' : 'device_create', 'config' : fileof(config) }) - def xend_domain_device_refresh(self, id, type, idx): + def xend_domain_device_refresh(self, id, type, dev): return self.xendPost(self.domainurl(id), {'op' : 'device_refresh', 'type' : type, - 'idx' : idx }) - - def xend_domain_device_destroy(self, id, type, idx): + 'dev' : dev }) + + def xend_domain_device_destroy(self, id, type, dev): return self.xendPost(self.domainurl(id), {'op' : 'device_destroy', 'type' : type, - 'idx' : idx }) - - def xend_domain_device_configure(self, id, config, idx): + 'dev' : dev }) + + def xend_domain_device_configure(self, id, config, dev): return self.xendPost(self.domainurl(id), {'op' : 'device_configure', - 'idx' : idx, + 'dev' : dev, 'config' : fileof(config) }) def xend_vnets(self): diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/XendDomain.py Sat Oct 8 20:28:24 2005 @@ -22,14 +22,15 @@ Needs to be persistent for one uptime. """ import os +import logging +import threading import xen.lowlevel.xc -from xen.xend import sxp +import XendDomainInfo + from xen.xend import XendRoot from xen.xend import XendCheckpoint -from xen.xend.XendDomainInfo import XendDomainInfo -from xen.xend import EventServer from xen.xend.XendError import XendError from xen.xend.XendLogging import log from xen.xend.server import relocate @@ -37,26 +38,17 @@ xc = xen.lowlevel.xc.new() xroot = XendRoot.instance() -eserver = EventServer.instance() __all__ = [ "XendDomain" ] PRIV_DOMAIN = 0 - -class XendDomainDict(dict): - def get_by_name(self, name): - try: - return filter(lambda d: d.getName() == name, self.values())[0] - except IndexError, err: - return None class XendDomain: """Index of all domains. Singleton. """ - """Dict of domain info indexed by domain id.""" - domains = None + ## public: def __init__(self): # Hack alert. Python does not support mutual imports, but XendDomainInfo @@ -64,18 +56,30 @@ # to import XendDomain from XendDomainInfo causes unbounded recursion. # So we stuff the XendDomain instance (self) into xroot's components. xroot.add_component("xen.xend.XendDomain", self) - self.domains = XendDomainDict() + self.domains = {} + self.domains_lock = threading.Condition() self.watchReleaseDomain() - self.refresh() - self.dom0_setup() + + self.domains_lock.acquire() + try: + self.refresh(True) + self.dom0_setup() + finally: + self.domains_lock.release() + def list(self): """Get list of domain objects. @return: domain objects """ - self.refresh() - return self.domains.values() + self.domains_lock.acquire() + try: + self.refresh() + return self.domains.values() + finally: + self.domains_lock.release() + def list_sorted(self): """Get list of domain objects, sorted by name. @@ -94,15 +98,25 @@ doms = self.list_sorted() return map(lambda x: x.getName(), doms) + + ## private: + def onReleaseDomain(self): - self.refresh() + self.domains_lock.acquire() + try: + self.refresh() + finally: + self.domains_lock.release() + def watchReleaseDomain(self): from xen.xend.xenstore.xswatch import xswatch self.releaseDomain = xswatch("@releaseDomain", self.onReleaseDomain) + def xen_domains(self): - """Get table of domains indexed by id from xc. + """Get table of domains indexed by id from xc. Expects to be + protected by the domains_lock. """ domlist = xc.domain_getinfo() doms = {} @@ -111,68 +125,37 @@ doms[domid] = d return doms - def xen_domain(self, dom): - """Get info about a single domain from xc. - Returns None if not found. - - @param dom domain id (int) - """ - dominfo = xc.domain_getinfo(dom, 1) - if dominfo == [] or dominfo[0]['dom'] != dom: - dominfo = None - else: - dominfo = dominfo[0] - return dominfo - - - def recreate_domain(self, xeninfo): - """Refresh initial domain info from db.""" - - dominfo = XendDomainInfo.recreate(xeninfo) - self._add_domain(dominfo) - return dominfo - def dom0_setup(self): - dom0 = self.domain_lookup(PRIV_DOMAIN) - if not dom0: - dom0 = self.recreate_domain(self.xen_domain(PRIV_DOMAIN)) - dom0.dom0_init_store() + """Expects to be protected by the domains_lock.""" + dom0 = self.domains[PRIV_DOMAIN] dom0.dom0_enforce_vcpus() - def _add_domain(self, info, notify=True): - """Add a domain entry to the tables. - - @param info: domain info object - @param notify: send a domain created event if true - """ - if info.getDomid() in self.domains: - notify = False + def _add_domain(self, info): + """Add the given domain entry to this instance's internal cache. + Expects to be protected by the domains_lock. + """ self.domains[info.getDomid()] = info - info.exportToDB() - if notify: - eserver.inject('xend.domain.create', [info.getName(), - info.getDomid()]) - - def _delete_domain(self, domid, notify=True): - """Remove a domain from the tables. - - @param id: domain id - @param notify: send a domain died event if true + + + def _delete_domain(self, domid): + """Remove the given domain from this instance's internal cache. + Expects to be protected by the domains_lock. """ info = self.domains.get(domid) if info: del self.domains[domid] - info.cleanup() - info.delete() - if notify: - eserver.inject('xend.domain.died', [info.getName(), - info.getDomid()]) - - - def refresh(self): - """Refresh domain list from Xen. + info.cleanupDomain() + + + def refresh(self, initialising = False): + """Refresh domain list from Xen. Expects to be protected by the + domains_lock. + + @param initialising True if this is the first refresh after starting + Xend. This does not change this method's behaviour, except for + logging. """ doms = self.xen_domains() for d in self.domains.values(): @@ -183,30 +166,33 @@ self._delete_domain(d.getDomid()) for d in doms: if d not in self.domains: - try: - self.recreate_domain(doms[d]) - except: - log.exception( - "Failed to recreate information for domain %d. " - "Destroying it in the hope of recovery.", d) + if doms[d]['dying']: + log.log(initialising and logging.ERROR or logging.DEBUG, + 'Cannot recreate information for dying domain %d.' + ' Xend will ignore this domain from now on.', + doms[d]['dom']) + else: try: - xc.domain_destroy(dom = d) + dominfo = XendDomainInfo.recreate(doms[d]) + self._add_domain(dominfo) except: - log.exception('Destruction of %d failed.', d) - - - def update_domain(self, id): - """Update information for a single domain. - - @param id: domain id - """ - dominfo = self.xen_domain(id) - if dominfo: - d = self.domains.get(id) - if d: - d.update(dominfo) - else: - self._delete_domain(id) + if d == PRIV_DOMAIN: + log.exception( + "Failed to recreate information for domain " + "%d. Doing nothing except crossing my " + "fingers.", d) + else: + log.exception( + "Failed to recreate information for domain " + "%d. Destroying it in the hope of " + "recovery.", d) + try: + xc.domain_destroy(dom = d) + except: + log.exception('Destruction of %d failed.', d) + + + ## public: def domain_create(self, config): """Create a domain from a configuration. @@ -214,24 +200,22 @@ @param config: configuration @return: domain """ - dominfo = XendDomainInfo.create(config) - self._add_domain(dominfo) - return dominfo + self.domains_lock.acquire() + try: + dominfo = XendDomainInfo.create(config) + self._add_domain(dominfo) + return dominfo + finally: + self.domains_lock.release() + def domain_configure(self, config): - """Configure an existing domain. This is intended for internal - use by domain restore and migrate. + """Configure an existing domain. @param vmconfig: vm configuration """ - # We accept our configuration specified as ['config' [...]], which - # some tools or configuration files may be using. For save-restore, - # we use the value of XendDomainInfo.sxpr() directly, which has no - # such item. - nested = sxp.child_value(config, 'config') - if nested: - config = nested - return XendDomainInfo.restore(config) + # !!! + raise XendError("Unsupported") def domain_restore(self, src): """Restore a domain from file. @@ -241,89 +225,156 @@ try: fd = os.open(src, os.O_RDONLY) - dominfo = XendCheckpoint.restore(self, fd) - self._add_domain(dominfo) - return dominfo + try: + return self.domain_restore_fd(fd) + finally: + os.close(fd) except OSError, ex: raise XendError("can't read guest state file %s: %s" % (src, ex[1])) - def domain_get(self, id): - """Get up-to-date info about a domain. - - @param id: domain id - @return: domain object (or None) - """ - self.update_domain(id) - return self.domains.get(id) - - - def domain_lookup(self, id): - return self.domains.get(id) - - def domain_lookup_by_name(self, name): - dominfo = self.domains.get_by_name(name) - if not dominfo: - try: - id = int(name) - dominfo = self.domain_lookup(id) - except ValueError: - pass - return dominfo - - def domain_unpause(self, id): - """Unpause domain execution. - - @param id: domain id - """ - dominfo = self.domain_lookup(id) - eserver.inject('xend.domain.unpause', [dominfo.getName(), - dominfo.getDomid()]) - try: + def domain_restore_fd(self, fd): + """Restore a domain from the given file descriptor.""" + + try: + return XendCheckpoint.restore(self, fd) + except: + # I don't really want to log this exception here, but the error + # handling in the relocation-socket handling code (relocate.py) is + # poor, so we need to log this for debugging. + log.exception("Restore failed") + raise + + + def restore_(self, config): + """Create a domain as part of the restore process. This is called + only from {@link XendCheckpoint}. + + A restore request comes into XendDomain through {@link + #domain_restore} or {@link #domain_restore_fd}. That request is + forwarded immediately to XendCheckpoint which, when it is ready, will + call this method. It is necessary to come through here rather than go + directly to {@link XendDomainInfo.restore} because we need to + serialise the domain creation process, but cannot lock + domain_restore_fd as a whole, otherwise we will deadlock waiting for + the old domain to die. + """ + self.domains_lock.acquire() + try: + dominfo = XendDomainInfo.restore(config) + self._add_domain(dominfo) + return dominfo + finally: + self.domains_lock.release() + + + def domain_lookup(self, domid): + self.domains_lock.acquire() + try: + self.refresh() + return self.domains.get(domid) + finally: + self.domains_lock.release() + + + def domain_lookup_nr(self, domid): + self.domains_lock.acquire() + try: + return self.domains.get(domid) + finally: + self.domains_lock.release() + + + def domain_lookup_by_name_or_id(self, name): + self.domains_lock.acquire() + try: + self.refresh() + return self.domain_lookup_by_name_or_id_nr(name) + finally: + self.domains_lock.release() + + + def domain_lookup_by_name_or_id_nr(self, name): + self.domains_lock.acquire() + try: + dominfo = self.domain_lookup_by_name_nr(name) + + if dominfo: + return dominfo + else: + try: + return self.domains.get(int(name)) + except ValueError: + return None + finally: + self.domains_lock.release() + + + def domain_lookup_by_name_nr(self, name): + self.domains_lock.acquire() + try: + matching = filter(lambda d: d.getName() == name, + self.domains.values()) + n = len(matching) + if n == 1: + return matching[0] + elif n > 1: + log.error('Name uniqueness has been violated for name %s! ' + 'Recovering by renaming:', name) + for d in matching: + d.renameUniquely() + + return None + finally: + self.domains_lock.release() + + + def privilegedDomain(self): + self.domains_lock.acquire() + try: + return self.domains[PRIV_DOMAIN] + finally: + self.domains_lock.release() + + + def domain_unpause(self, domid): + """Unpause domain execution.""" + try: + dominfo = self.domain_lookup(domid) + log.info("Domain %s (%d) unpaused.", dominfo.getName(), + dominfo.getDomid()) return xc.domain_unpause(dom=dominfo.getDomid()) except Exception, ex: raise XendError(str(ex)) - - def domain_pause(self, id): - """Pause domain execution. - - @param id: domain id - """ - dominfo = self.domain_lookup(id) - eserver.inject('xend.domain.pause', [dominfo.getName(), - dominfo.getDomid()]) - try: + + + def domain_pause(self, domid): + """Pause domain execution.""" + try: + dominfo = self.domain_lookup(domid) + log.info("Domain %s (%d) paused.", dominfo.getName(), + dominfo.getDomid()) return xc.domain_pause(dom=dominfo.getDomid()) except Exception, ex: raise XendError(str(ex)) - def domain_shutdown(self, domid, reason='poweroff'): + def domain_shutdown(self, domid, reason = 'poweroff'): """Shutdown domain (nicely). - - poweroff: restart according to exit code and restart mode - - reboot: restart on exit - - halt: do not restart - - Returns immediately. - - @param id: domain id - @param reason: shutdown type: poweroff, reboot, suspend, halt - """ - self.callInfo(domid, XendDomainInfo.shutdown, reason) + + @param reason: shutdown reason: poweroff, reboot, suspend, halt + """ + self.callInfo(domid, XendDomainInfo.XendDomainInfo.shutdown, reason) def domain_sysrq(self, domid, key): """Send a SysRq to the specified domain.""" - return self.callInfo(domid, XendDomainInfo.send_sysrq, key) - - - def domain_destroy(self, domid, reason='halt'): - """Terminate domain immediately. - - halt: cancel any restart for the domain - - reboot schedule a restart for the domain - - @param domid: domain id - """ + return self.callInfo(domid, XendDomainInfo.XendDomainInfo.send_sysrq, + key) + + + def domain_destroy(self, domid): + """Terminate domain immediately.""" if domid == PRIV_DOMAIN: raise XendError("Cannot destroy privileged domain %i" % domid) @@ -338,68 +389,52 @@ raise XendError(str(ex)) return val - def domain_migrate(self, id, dst, live=False, resource=0): - """Start domain migration. - - @param id: domain id - """ - # Need a cancel too? - # Don't forget to cancel restart for it. - dominfo = self.domain_lookup(id) + def domain_migrate(self, domid, dst, live=False, resource=0): + """Start domain migration.""" + + dominfo = self.domain_lookup(domid) port = xroot.get_xend_relocation_port() sock = relocate.setupRelocation(dst, port) - # temporarily rename domain for localhost migration - if dst == "localhost": - dominfo.setName("tmp-" + dominfo.getName()) - - try: - XendCheckpoint.save(self, sock.fileno(), dominfo, live) - except: - if dst == "localhost": - dominfo.setName( - string.replace(dominfo.getName(), "tmp-", "", 1)) - raise + XendCheckpoint.save(sock.fileno(), dominfo, live) - return None - - def domain_save(self, id, dst): + + def domain_save(self, domid, dst): """Start saving a domain to file. - @param id: domain id @param dst: destination file """ try: - dominfo = self.domain_lookup(id) + dominfo = self.domain_lookup(domid) fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC) - - # For now we don't support 'live checkpoint' - return XendCheckpoint.save(self, fd, dominfo, False) - + try: + # For now we don't support 'live checkpoint' + return XendCheckpoint.save(fd, dominfo, False) + finally: + os.close(fd) except OSError, ex: raise XendError("can't write guest state file %s: %s" % (dst, ex[1])) - def domain_pincpu(self, id, vcpu, cpumap): + def domain_pincpu(self, domid, vcpu, cpumap): """Set which cpus vcpu can use - @param id: domain - @param vcpu: vcpu number - @param cpumap: bitmap of usbale cpus - """ - dominfo = self.domain_lookup(id) + @param cpumap: bitmap of usable cpus + """ + dominfo = self.domain_lookup(domid) try: return xc.domain_pincpu(dominfo.getDomid(), vcpu, cpumap) except Exception, ex: raise XendError(str(ex)) - def domain_cpu_bvt_set(self, id, mcuadv, warpback, warpvalue, warpl, warpu): + def domain_cpu_bvt_set(self, domid, mcuadv, warpback, warpvalue, warpl, + warpu): """Set BVT (Borrowed Virtual Time) scheduler parameters for a domain. """ - dominfo = self.domain_lookup(id) + dominfo = self.domain_lookup(domid) try: return xc.bvtsched_domain_set(dom=dominfo.getDomid(), mcuadv=mcuadv, @@ -409,30 +444,31 @@ except Exception, ex: raise XendError(str(ex)) - def domain_cpu_bvt_get(self, id): + def domain_cpu_bvt_get(self, domid): """Get BVT (Borrowed Virtual Time) scheduler parameters for a domain. """ - dominfo = self.domain_lookup(id) + dominfo = self.domain_lookup(domid) try: return xc.bvtsched_domain_get(dominfo.getDomid()) except Exception, ex: raise XendError(str(ex)) - def domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight): + def domain_cpu_sedf_set(self, domid, period, slice_, latency, extratime, + weight): """Set Simple EDF scheduler parameters for a domain. """ - dominfo = self.domain_lookup(id) - try: - return xc.sedf_domain_set(dominfo.getDomid(), period, slice, + dominfo = self.domain_lookup(domid) + try: + return xc.sedf_domain_set(dominfo.getDomid(), period, slice_, latency, extratime, weight) except Exception, ex: raise XendError(str(ex)) - def domain_cpu_sedf_get(self, id): + def domain_cpu_sedf_get(self, domid): """Get Simple EDF scheduler parameters for a domain. """ - dominfo = self.domain_lookup(id) + dominfo = self.domain_lookup(domid) try: return xc.sedf_domain_get(dominfo.getDomid()) except Exception, ex: @@ -442,63 +478,58 @@ def domain_device_create(self, domid, devconfig): """Create a new device for the specified domain. """ - return self.callInfo(domid, XendDomainInfo.device_create, devconfig) + return self.callInfo(domid, + XendDomainInfo.XendDomainInfo.device_create, + devconfig) def domain_device_configure(self, domid, devconfig, devid): """Configure an existing device in the specified domain. @return: updated device configuration """ - return self.callInfo(domid, XendDomainInfo.device_configure, + return self.callInfo(domid, + XendDomainInfo.XendDomainInfo.device_configure, devconfig, devid) - def domain_device_refresh(self, domid, devtype, devid): - """Refresh a device.""" - return self.callInfo(domid, XendDomainInfo.device_refresh, devtype, - devid) - - def domain_device_destroy(self, domid, devtype, devid): """Destroy a device.""" - return self.callInfo(domid, XendDomainInfo.destroyDevice, devtype, - devid) + return self.callInfo(domid, + XendDomainInfo.XendDomainInfo.destroyDevice, + devtype, devid) def domain_devtype_ls(self, domid, devtype): """Get list of device sxprs for the specified domain.""" - return self.callInfo(domid, XendDomainInfo.getDeviceSxprs, devtype) - - - def domain_vif_limit_set(self, id, vif, credit, period): + return self.callInfo(domid, + XendDomainInfo.XendDomainInfo.getDeviceSxprs, + devtype) + + + def domain_vif_limit_set(self, domid, vif, credit, period): """Limit the vif's transmission rate """ - dominfo = self.domain_lookup(id) + dominfo = self.domain_lookup(domid) dev = dominfo.getDevice('vif', vif) if not dev: raise XendError("invalid vif") return dev.setCreditLimit(credit, period) - def domain_shadow_control(self, id, op): - """Shadow page control. - - @param id: domain - @param op: operation - """ - dominfo = self.domain_lookup(id) + def domain_shadow_control(self, domid, op): + """Shadow page control.""" + dominfo = self.domain_lookup(domid) try: return xc.shadow_control(dominfo.getDomid(), op) except Exception, ex: raise XendError(str(ex)) - def domain_maxmem_set(self, id, mem): + def domain_maxmem_set(self, domid, mem): """Set the memory limit for a domain. - @param id: domain @param mem: memory limit (in MiB) @return: 0 on success, -1 on error """ - dominfo = self.domain_lookup(id) + dominfo = self.domain_lookup(domid) maxmem = int(mem) * 1024 try: return xc.domain_setmaxmem(dominfo.getDomid(), @@ -511,7 +542,8 @@ @param mem: memory target (in MiB) """ - self.callInfo(domid, XendDomainInfo.setMemoryTarget, mem << 10) + self.callInfo(domid, XendDomainInfo.XendDomainInfo.setMemoryTarget, + mem << 10) def domain_vcpu_hotplug(self, domid, vcpu, state): @@ -520,12 +552,13 @@ @param vcpu: target VCPU in domain @param state: which state VCPU will become """ - self.callInfo(domid, XendDomainInfo.vcpu_hotplug, vcpu, state) + self.callInfo(domid, XendDomainInfo.XendDomainInfo.vcpu_hotplug, vcpu, + state) def domain_dumpcore(self, domid): """Save a core dump for a crashed domain.""" - self.callInfo(domid, XendDomainInfo.dumpCore) + self.callInfo(domid, XendDomainInfo.XendDomainInfo.dumpCore) ## private: diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/XendDomainInfo.py Sat Oct 8 20:28:24 2005 @@ -32,8 +32,6 @@ import xen.lowlevel.xc from xen.util.blkif import blkdev_uname_to_file -from xen.xend.server.channel import EventChannel - from xen.xend import image from xen.xend import scheduler from xen.xend import sxp @@ -59,27 +57,27 @@ """Shutdown code for crash.""" DOMAIN_CRASH = 3 +"""Shutdown code for halt.""" +DOMAIN_HALT = 4 + """Map shutdown codes to strings.""" shutdown_reasons = { DOMAIN_POWEROFF: "poweroff", DOMAIN_REBOOT : "reboot", DOMAIN_SUSPEND : "suspend", DOMAIN_CRASH : "crash", + DOMAIN_HALT : "halt" } -RESTART_ALWAYS = 'always' -RESTART_ONREBOOT = 'onreboot' -RESTART_NEVER = 'never' - restart_modes = [ - RESTART_ALWAYS, - RESTART_ONREBOOT, - RESTART_NEVER, + "restart", + "destroy", + "preserve", + "rename-restart" ] STATE_VM_OK = "ok" STATE_VM_TERMINATED = "terminated" -STATE_VM_SUSPENDED = "suspended" """Flag for a block device backend domain.""" SIF_BLK_BE_DOMAIN = (1<<4) @@ -94,9 +92,10 @@ SHUTDOWN_TIMEOUT = 30 -DOMROOT = '/domain' -VMROOT = '/domain' - +DOMROOT = '/local/domain/' +VMROOT = '/vm/' + +ZOMBIE_PREFIX = 'Zombie-' xc = xen.lowlevel.xc.new() xroot = XendRoot.instance() @@ -111,15 +110,219 @@ ROUNDTRIPPING_CONFIG_ENTRIES = [ ('name', str), ('ssidref', int), + ('vcpus', int), + ('vcpu_avail', int), ('cpu_weight', float), - ('bootloader', str) + ('bootloader', str), + ('on_poweroff', str), + ('on_reboot', str), + ('on_crash', str) ] -def domain_exists(name): +# +# There are a number of CPU-related fields: +# +# vcpus: the number of virtual CPUs this domain is configured to use. +# vcpu_avail: a bitmap telling the guest domain whether it may use each of +# its VCPUs. This is translated to +# <dompath>/cpu/<id>/availability = {online,offline} for use +# by the guest domain. +# vcpu_to_cpu: the current mapping between virtual CPUs and the physical +# CPU it is using. +# cpumap: a list of bitmaps, one for each VCPU, giving the physical +# CPUs that that VCPU may use. +# cpu: a configuration setting requesting that VCPU 0 is pinned to +# the specified physical CPU. +# +# vcpus and vcpu_avail settings persist with the VM (i.e. they are persistent +# across save, restore, migrate, and restart). The other settings are only +# specific to the domain, so are lost when the VM moves. +# + + +def create(config): + """Create a VM from a configuration. + + @param config configuration + @raise: VmError for invalid configuration + """ + + log.debug("XendDomainInfo.create(%s)", config) + + vm = XendDomainInfo(getUuid(), parseConfig(config)) + try: + vm.construct() + vm.initDomain() + vm.construct_image() + vm.configure() + vm.storeVmDetails() + vm.storeDomDetails() + vm.refreshShutdown() + return vm + except: + log.exception('Domain construction failed') + vm.destroy() + raise + + +def recreate(xeninfo): + """Create the VM object for an existing domain. The domain must not + be dying, as the paths in the store should already have been removed, + and asking us to recreate them causes problems.""" + + log.debug("XendDomainInfo.recreate(%s)", xeninfo) + + assert not xeninfo['dying'] + + domid = xeninfo['dom'] + try: + dompath = GetDomainPath(domid) + if not dompath: + raise XendError( + 'No domain path in store for existing domain %d' % domid) + vmpath = xstransact.Read(dompath, "vm") + if not vmpath: + raise XendError( + 'No vm path in store for existing domain %d' % domid) + uuid = xstransact.Read(vmpath, "uuid") + if not uuid: + raise XendError( + 'No vm/uuid path in store for existing domain %d' % domid) + + log.info("Recreating domain %d, UUID %s.", domid, uuid) + + vm = XendDomainInfo(uuid, xeninfo, domid, True) + + except Exception, exn: + log.warn(str(exn)) + + uuid = getUuid() + + log.info("Recreating domain %d with new UUID %s.", domid, uuid) + + vm = XendDomainInfo(uuid, xeninfo, domid, True) + vm.removeDom() + vm.storeVmDetails() + vm.storeDomDetails() + + vm.create_channel() + if domid == 0: + vm.initStoreConnection() + + vm.refreshShutdown(xeninfo) + return vm + + +def restore(config): + """Create a domain and a VM object to do a restore. + + @param config: domain configuration + """ + + log.debug("XendDomainInfo.restore(%s)", config) + + try: + uuid = sxp.child_value(config, 'uuid') + ssidref = int(sxp.child_value(config, 'ssidref')) + except TypeError, exn: + raise VmError('Invalid ssidref in config: %s' % exn) + + vm = XendDomainInfo(uuid, parseConfig(config)) + try: + vm.construct() + vm.configure() + vm.create_channel() + vm.storeVmDetails() + vm.storeDomDetails() + vm.refreshShutdown() + return vm + except: + vm.destroy() + raise + + +def parseConfig(config): + def get_cfg(name, conv = None): + val = sxp.child_value(config, name) + + if conv and not val is None: + try: + return conv(val) + except TypeError, exn: + raise VmError( + 'Invalid setting %s = %s in configuration: %s' % + (name, val, str(exn))) + else: + return val + + + log.debug("parseConfig: config is %s" % str(config)) + + result = {} + + for e in ROUNDTRIPPING_CONFIG_ENTRIES: + result[e[0]] = get_cfg(e[0], e[1]) + + result['memory'] = get_cfg('memory', int) + result['mem_kb'] = get_cfg('mem_kb', int) + result['maxmem'] = get_cfg('maxmem', int) + result['maxmem_kb'] = get_cfg('maxmem_kb', int) + result['cpu'] = get_cfg('cpu', int) + result['image'] = get_cfg('image') + + try: + if result['image']: + result['vcpus'] = int(sxp.child_value(result['image'], + 'vcpus', 1)) + else: + result['vcpus'] = 1 + except TypeError, exn: + raise VmError( + 'Invalid configuration setting: vcpus = %s: %s' % + (sxp.child_value(result['image'], 'vcpus', 1), str(exn))) + + result['backend'] = [] + for c in sxp.children(config, 'backend'): + result['backend'].append(sxp.name(sxp.child0(c))) + + result['device'] = [] + for d in sxp.children(config, 'device'): + c = sxp.child0(d) + result['device'].append((sxp.name(c), c)) + + # Configuration option "restart" is deprecated. Parse it, but + # let on_xyz override it if they are present. + restart = get_cfg('restart') + if restart: + def handle_restart(event, val): + if not event in result: + result[event] = val + + if restart == "onreboot": + handle_restart('on_poweroff', 'destroy') + handle_restart('on_reboot', 'restart') + handle_restart('on_crash', 'destroy') + elif restart == "always": + handle_restart('on_poweroff', 'restart') + handle_restart('on_reboot', 'restart') + handle_restart('on_crash', 'restart') + elif restart == "never": + handle_restart('on_poweroff', 'destroy') + handle_restart('on_reboot', 'destroy') + handle_restart('on_crash', 'destroy') + else: + log.warn("Ignoring malformed and deprecated config option " + "restart = %s", restart) + + log.debug("parseConfig: result is %s" % str(result)) + return result + + +def domain_by_name(name): # See comment in XendDomain constructor. xd = get_component('xen.xend.XendDomain') - return xd.domain_lookup_by_name(name) + return xd.domain_lookup_by_name_nr(name) def shutdown_reason(code): """Get a shutdown reason from a code. @@ -143,7 +346,7 @@ return domlist[0] except Exception, err: # ignore missing domain - log.exception("domain_getinfo(%d) failed, ignoring", dom) + log.debug("domain_getinfo(%d) failed, ignoring: %s", dom, str(err)) return None class XendDomainInfo: @@ -154,155 +357,24 @@ MINIMUM_RESTART_TIME = 20 - def create(cls, config): - """Create a VM from a configuration. - - @param config configuration - @raise: VmError for invalid configuration - """ - - log.debug("XendDomainInfo.create(...)") - - vm = cls(getUuid(), cls.parseConfig(config)) - vm.construct() - vm.refreshShutdown() - return vm - - create = classmethod(create) - - - def recreate(cls, xeninfo): - """Create the VM object for an existing domain.""" - - log.debug("XendDomainInfo.recreate(%s)", xeninfo) - - domid = xeninfo['dom'] - try: - dompath = GetDomainPath(domid) - if not dompath: - raise XendError( - 'No domain path in store for existing domain %d' % domid) - vmpath = xstransact.Read(dompath, "vm") - if not vmpath: - raise XendError( - 'No vm path in store for existing domain %d' % domid) - uuid = xstransact.Read(vmpath, "uuid") - if not uuid: - raise XendError( - 'No vm/uuid path in store for existing domain %d' % domid) - - except Exception, exn: - log.warn(str(exn)) - uuid = getUuid() - - log.info("Recreating domain %d, uuid %s", domid, uuid) - - vm = cls(uuid, xeninfo, domid, True) - vm.refreshShutdown(xeninfo) - return vm - - recreate = classmethod(recreate) - - - def restore(cls, config, uuid = None): - """Create a domain and a VM object to do a restore. - - @param config: domain configuration - @param uuid: uuid to use - """ - - log.debug("XendDomainInfo.restore(%s, %s)", config, uuid) - - if not uuid: - uuid = getUuid() - - try: - ssidref = int(sxp.child_value(config, 'ssidref')) - except TypeError, exn: - raise VmError('Invalid ssidref in config: %s' % exn) - - vm = cls(uuid, cls.parseConfig(config), - xc.domain_create(ssidref = ssidref)) - vm.create_channel() - vm.configure() - vm.exportToDB() - vm.refreshShutdown() - return vm - - restore = classmethod(restore) - - - def parseConfig(cls, config): - def get_cfg(name, conv = None): - val = sxp.child_value(config, name) - - if conv and not val is None: - try: - return conv(val) - except TypeError, exn: - raise VmError( - 'Invalid setting %s = %s in configuration: %s' % - (name, val, str(exn))) - else: - return val - - - log.debug("parseConfig: config is %s" % str(config)) - - result = {} - - for e in ROUNDTRIPPING_CONFIG_ENTRIES: - result[e[0]] = get_cfg(e[0], e[1]) - - result['memory'] = get_cfg('memory', int) - result['mem_kb'] = get_cfg('mem_kb', int) - result['maxmem'] = get_cfg('maxmem', int) - result['maxmem_kb'] = get_cfg('maxmem_kb', int) - result['cpu'] = get_cfg('cpu', int) - result['restart_mode'] = get_cfg('restart') - result['image'] = get_cfg('image') - - try: - if result['image']: - result['vcpus'] = int(sxp.child_value(result['image'], - 'vcpus', 1)) - else: - result['vcpus'] = 1 - except TypeError, exn: - raise VmError( - 'Invalid configuration setting: vcpus = %s: %s' % - (sxp.child_value(result['image'], 'vcpus', 1), str(exn))) - - result['backend'] = [] - for c in sxp.children(config, 'backend'): - result['backend'].append(sxp.name(sxp.child0(c))) - - result['device'] = [] - for d in sxp.children(config, 'device'): - c = sxp.child0(d) - result['device'].append((sxp.name(c), c)) - - log.debug("parseConfig: result is %s" % str(result)) - return result - - - parseConfig = classmethod(parseConfig) - - def __init__(self, uuid, info, domid = None, augment = False): self.uuid = uuid self.info = info - self.path = DOMROOT + "/" + uuid - - if domid: + if domid is not None: self.domid = domid elif 'dom' in info: self.domid = int(info['dom']) else: self.domid = None + self.vmpath = VMROOT + uuid + if self.domid is None: + self.dompath = None + else: + self.dompath = DOMROOT + str(self.domid) + if augment: self.augmentInfo() @@ -317,9 +389,7 @@ self.state = STATE_VM_OK self.state_updated = threading.Condition() - - self.writeVm("uuid", self.uuid) - self.storeDom("vm", self.path) + self.refresh_shutdown_lock = threading.Condition() def augmentInfo(self): @@ -332,18 +402,30 @@ self.info[name] = val params = (("name", str), - ("restart-mode", str), + ("on_poweroff", str), + ("on_reboot", str), + ("on_crash", str), ("image", str), - ("start-time", float)) + ("vcpus", int), + ("vcpu_avail", int), + ("start_time", float)) from_store = self.gatherVm(*params) map(lambda x, y: useIfNeeded(x[0], y), params, from_store) + + device = [] + for c in controllerClasses: + devconfig = self.getDeviceConfigurations(c) + if devconfig: + device.extend(map(lambda x: (c, x), devconfig)) + useIfNeeded('device', device) def validateInfo(self): """Validate and normalise the info block. This has either been parsed - by parseConfig, or received from xc through recreate. + by parseConfig, or received from xc through recreate and augmented by + the current store contents. """ def defaultInfo(name, val): if not self.infoIsSet(name): @@ -352,9 +434,13 @@ try: defaultInfo('name', lambda: "Domain-%d" % self.domid) defaultInfo('ssidref', lambda: 0) - defaultInfo('restart_mode', lambda: RESTART_ONREBOOT) + defaultInfo('on_poweroff', lambda: "destroy") + defaultInfo('on_reboot', lambda: "restart") + defaultInfo('on_crash', lambda: "restart") defaultInfo('cpu', lambda: None) defaultInfo('cpu_weight', lambda: 1.0) + defaultInfo('vcpus', lambda: 1) + defaultInfo('vcpu_avail', lambda: (1 << self.info['vcpus']) - 1) defaultInfo('bootloader', lambda: None) defaultInfo('backend', lambda: []) defaultInfo('device', lambda: []) @@ -377,7 +463,7 @@ # mem_kb. def discard_negatives(name): - if self.infoIsSet(name) and self.info[name] <= 0: + if self.infoIsSet(name) and self.info[name] < 0: del self.info[name] def valid_KiB_(mb_name, kb_name): @@ -403,7 +489,7 @@ def valid_KiB(mb_name, kb_name): result = valid_KiB_(mb_name, kb_name) - if result <= 0: + if result is None or result < 0: raise VmError('Invalid %s / %s: %s' % (mb_name, kb_name, result)) else: @@ -436,15 +522,10 @@ raise VmError('invalid device (%s, %s)' % (str(n), str(c))) - if self.info['restart_mode'] not in restart_modes: - raise VmError('invalid restart mode: ' + - str(self.info['restart_mode'])) - - if 'cpumap' not in self.info: - if [self.info['vcpus'] == 1]: - self.info['cpumap'] = [1]; - else: - raise VmError('Cannot create CPU map') + for event in ['on_poweroff', 'on_reboot', 'on_crash']: + if self.info[event] not in restart_modes: + raise VmError('invalid restart event: %s = %s' % + (event, str(self.info[event]))) except KeyError, exn: log.exception(exn) @@ -452,42 +533,61 @@ def readVm(self, *args): - return xstransact.Read(self.path, *args) + return xstransact.Read(self.vmpath, *args) def writeVm(self, *args): - return xstransact.Write(self.path, *args) + return xstransact.Write(self.vmpath, *args) def removeVm(self, *args): - return xstransact.Remove(self.path, *args) + return xstransact.Remove(self.vmpath, *args) def gatherVm(self, *args): - return xstransact.Gather(self.path, *args) + return xstransact.Gather(self.vmpath, *args) def storeVm(self, *args): - return xstransact.Store(self.path, *args) + return xstransact.Store(self.vmpath, *args) def readDom(self, *args): - return xstransact.Read(self.path, *args) + return xstransact.Read(self.dompath, *args) def writeDom(self, *args): - return xstransact.Write(self.path, *args) + return xstransact.Write(self.dompath, *args) def removeDom(self, *args): - return xstransact.Remove(self.path, *args) + return xstransact.Remove(self.dompath, *args) def gatherDom(self, *args): - return xstransact.Gather(self.path, *args) + return xstransact.Gather(self.dompath, *args) def storeDom(self, *args): - return xstransact.Store(self.path, *args) - - - def exportToDB(self): + return xstransact.Store(self.dompath, *args) + + + def storeVmDetails(self): + to_store = { + 'uuid': self.uuid, + + # XXX + 'memory/target': str(self.info['memory_KiB']) + } + + if self.infoIsSet('image'): + to_store['image'] = sxp.to_string(self.info['image']) + + for k in ['name', 'ssidref', 'on_poweroff', 'on_reboot', 'on_crash', + 'vcpus', 'vcpu_avail']: + if self.infoIsSet(k): + to_store[k] = str(self.info[k]) + + log.debug("Storing VM details: %s" % str(to_store)) + + self.writeVm(to_store) + + + def storeDomDetails(self): to_store = { 'domid': str(self.domid), - 'uuid': self.uuid, - - 'xend/restart_mode': str(self.info['restart_mode']), + 'vm': self.vmpath, 'memory/target': str(self.info['memory_KiB']) } @@ -496,11 +596,18 @@ if v: to_store[k] = str(v) - to_store['image'] = sxp.to_string(self.info['image']) - - log.debug("Storing %s" % str(to_store)) - - self.writeVm(to_store) + def availability(n): + if self.info['vcpu_avail'] & (1 << n): + return 'online' + else: + return 'offline' + + for v in range(0, self.info['vcpus']): + to_store["cpu/%d/availability" % v] = availability(v) + + log.debug("Storing domain details: %s" % str(to_store)) + + self.writeDom(to_store) def setDomid(self, domid): @@ -522,8 +629,8 @@ def getName(self): return self.info['name'] - def getPath(self): - return self.path + def getDomainPath(self): + return self.dompath def getUuid(self): return self.uuid @@ -549,85 +656,119 @@ def refreshShutdown(self, xeninfo = None): - if xeninfo is None: - xeninfo = dom_get(self.domid) + # If set at the end of this method, a restart is required, with the + # given reason. This restart has to be done out of the scope of + # refresh_shutdown_lock. + restart_reason = None + + self.refresh_shutdown_lock.acquire() + try: if xeninfo is None: - # The domain no longer exists. This will occur if we have - # scheduled a timer to check for shutdown timeouts and the - # shutdown succeeded. + xeninfo = dom_get(self.domid) + if xeninfo is None: + # The domain no longer exists. This will occur if we have + # scheduled a timer to check for shutdown timeouts and the + # shutdown succeeded. It will also occur if someone + # destroys a domain beneath us. We clean up the domain, + # just in case, but we can't clean up the VM, because that + # VM may have migrated to a different domain on this + # machine. + self.cleanupDomain() + return + + if xeninfo['dying']: + # Dying means that a domain has been destroyed, but has not + # yet been cleaned up by Xen. This state could persist + # indefinitely if, for example, another domain has some of its + # pages mapped. We might like to diagnose this problem in the + # future, but for now all we do is make sure that it's not us + # holding the pages, by calling cleanupDomain. We can't + # clean up the VM, as above. + self.cleanupDomain() return - if xeninfo['dying']: - # Dying means that a domain has been destroyed, but has not yet - # been cleaned up by Xen. This could persist indefinitely if, - # for example, another domain has some of its pages mapped. - # We might like to diagnose this problem in the future, but for - # now all we can sensibly do is ignore it. - pass - - elif xeninfo['crashed']: - log.warn('Domain has crashed: name=%s id=%d.', - self.info['name'], self.domid) - - if xroot.get_enable_dump(): - self.dumpCore() - - self.maybeRestart('crashed') - - elif xeninfo['shutdown']: - reason = shutdown_reason(xeninfo['shutdown_reason']) - - log.info('Domain has shutdown: name=%s id=%d reason=%s.', - self.info['name'], self.domid, reason) - - self.clearRestart() - - if reason == 'suspend': - self.state_set(STATE_VM_SUSPENDED) - # Don't destroy the domain. XendCheckpoint will do this once - # it has finished. - elif reason in ['poweroff', 'reboot']: - self.maybeRestart(reason) + elif xeninfo['crashed']: + log.warn('Domain has crashed: name=%s id=%d.', + self.info['name'], self.domid) + + if xroot.get_enable_dump(): + self.dumpCore() + + restart_reason = 'crash' + + elif xeninfo['shutdown']: + if self.readDom('xend/shutdown_completed'): + # We've seen this shutdown already, but we are preserving + # the domain for debugging. Leave it alone. + return + + else: + reason = shutdown_reason(xeninfo['shutdown_reason']) + + log.info('Domain has shutdown: name=%s id=%d reason=%s.', + self.info['name'], self.domid, reason) + + self.clearRestart() + + if reason == 'suspend': + self.state_set(STATE_VM_TERMINATED) + # Don't destroy the domain. XendCheckpoint will do + # this once it has finished. + elif reason in ['poweroff', 'reboot']: + restart_reason = reason + else: + self.destroy() + else: - self.destroy() - - else: - # Domain is alive. If we are shutting it down, then check - # the timeout on that, and destroy it if necessary. - - sst = self.readVm('xend/shutdown_start_time') - if sst: - sst = float(sst) - timeout = SHUTDOWN_TIMEOUT - time.time() + sst - if timeout < 0: - log.info( - "Domain shutdown timeout expired: name=%s id=%s", - self.info['name'], self.domid) - self.destroy() - else: - log.debug( - "Scheduling refreshShutdown on domain %d in %ds.", - self.domid, timeout) - scheduler.later(timeout, self.refreshShutdown) + # Domain is alive. If we are shutting it down, then check + # the timeout on that, and destroy it if necessary. + + sst = self.readDom('xend/shutdown_start_time') + if sst: + sst = float(sst) + timeout = SHUTDOWN_TIMEOUT - time.time() + sst + if timeout < 0: + log.info( + "Domain shutdown timeout expired: name=%s id=%s", + self.info['name'], self.domid) + self.destroy() + else: + log.debug( + "Scheduling refreshShutdown on domain %d in %ds.", + self.domid, timeout) + scheduler.later(timeout, self.refreshShutdown) + finally: + self.refresh_shutdown_lock.release() + + if restart_reason: + self.maybeRestart(restart_reason) def shutdown(self, reason): if not reason in shutdown_reasons.values(): raise XendError('invalid reason:' + reason) - self.storeVm("control/shutdown", reason) - if not reason == 'suspend': - self.storeVm('xend/shutdown_start_time', time.time()) - + self.storeDom("control/shutdown", reason) + if reason != 'suspend': + self.storeDom('xend/shutdown_start_time', time.time()) + + + ## private: def clearRestart(self): - self.removeVm("xend/shutdown_start_time") + self.removeDom("xend/shutdown_start_time") def maybeRestart(self, reason): - if self.restart_needed(reason): - self.restart() - else: - self.destroy() + # Dispatch to the correct method based upon the configured on_{reason} + # behaviour. + {"destroy" : self.destroy, + "restart" : self.restart, + "preserve" : self.preserve, + "rename-restart" : self.renameRestart}[self.info['on_' + reason]]() + + + def renameRestart(self): + self.restart(True) def dumpCore(self): @@ -638,38 +779,12 @@ self.domid) xc.domain_dumpcore(dom = self.domid, corefile = corefile) - except Exception, exn: - log.error("XendDomainInfo.dumpCore failed: id = %s name = %s: %s", - self.domid, self.info['name'], str(exn)) - - - def closeChannel(self, channel, entry): - """Close the given channel, if set, and remove the given entry in the - store. Nothrow guarantee.""" - - try: - try: - if channel: - channel.close() - finally: - self.removeDom(entry) - except Exception, exn: - log.exception(exn) - - - def closeStoreChannel(self): - """Close the store channel, if any. Nothrow guarantee.""" - - self.closeChannel(self.store_channel, "store/port") - self.store_channel = None - - - def closeConsoleChannel(self): - """Close the console channel, if any. Nothrow guarantee.""" - - self.closeChannel(self.console_channel, "console/port") - self.console_channel = None - + except: + log.exception("XendDomainInfo.dumpCore failed: id = %s name = %s", + self.domid, self.info['name']) + + + ## public: def setConsoleRef(self, ref): self.console_mfn = ref @@ -715,9 +830,9 @@ ## public: - def state_wait(self, state): + def waitForShutdown(self): self.state_updated.acquire() - while self.state != state: + while self.state == STATE_VM_OK: self.state_updated.wait() self.state_updated.release() @@ -734,24 +849,32 @@ __repr__ = __str__ + ## private: + def createDevice(self, deviceClass, devconfig): return self.getDeviceController(deviceClass).createDevice(devconfig) - def configureDevice(self, deviceClass, devid, devconfig): - return self.getDeviceController(deviceClass).configureDevice( + def reconfigureDevice(self, deviceClass, devid, devconfig): + return self.getDeviceController(deviceClass).reconfigureDevice( devid, devconfig) + + ## public: def destroyDevice(self, deviceClass, devid): return self.getDeviceController(deviceClass).destroyDevice(devid) + ## private: + def getDeviceSxprs(self, deviceClass): return self.getDeviceController(deviceClass).sxprs() - ## private: + def getDeviceConfigurations(self, deviceClass): + return self.getDeviceController(deviceClass).configurations() + def getDeviceController(self, name): if name not in controllerClasses: @@ -803,7 +926,8 @@ if self.infoIsSet('cpu_time'): sxpr.append(['cpu_time', self.info['cpu_time']/1e9]) sxpr.append(['vcpus', self.info['vcpus']]) - sxpr.append(['cpumap', self.info['cpumap']]) + if self.infoIsSet('cpumap'): + sxpr.append(['cpumap', self.info['cpumap']]) if self.infoIsSet('vcpu_to_cpu'): sxpr.append(['cpu', self.info['vcpu_to_cpu'][0]]) sxpr.append(['vcpu_to_cpu', self.prettyVCpuMap()]) @@ -813,12 +937,8 @@ sxpr.append(['up_time', str(up_time) ]) sxpr.append(['start_time', str(self.info['start_time']) ]) - if self.store_channel: - sxpr.append(self.store_channel.sxpr()) if self.store_mfn: sxpr.append(['store_mfn', self.store_mfn]) - if self.console_channel: - sxpr.append(['console_channel', self.console_channel.sxpr()]) if self.console_mfn: sxpr.append(['console_mfn', self.console_mfn]) @@ -847,13 +967,9 @@ if c in '_-.:/+': continue if c in string.ascii_letters: continue raise VmError('invalid vm name') - dominfo = domain_exists(name) - # When creating or rebooting, a domain with my name should not exist. - # When restoring, a domain with my name will exist, but it should have - # my domain id. + + dominfo = domain_by_name(name) if not dominfo: - return - if dominfo.is_terminated(): return if self.domid is None: raise VmError("VM name '%s' already in use by domain %d" % @@ -864,9 +980,8 @@ def construct(self): - """Construct the vm instance from its configuration. - - @param config: configuration + """Construct the domain. + @raise: VmError on error """ @@ -876,22 +991,15 @@ self.domid = xc.domain_create(dom = 0, ssidref = self.info['ssidref']) - if self.domid <= 0: + if self.domid < 0: raise VmError('Creating domain failed: name=%s' % self.info['name']) - try: - self.initDomain() - self.construct_image() - self.configure() - self.exportToDB() - except Exception, ex: - # Catch errors, cleanup and re-raise. - print 'Domain construction error:', ex - import traceback - traceback.print_exc() - self.destroy() - raise + self.dompath = DOMROOT + str(self.domid) + + # Ensure that the domain entry is clean. This prevents a stale + # shutdown_start_time from killing the domain, for example. + self.removeDom() def initDomain(self): @@ -926,43 +1034,22 @@ self.domid, self.info['name'], self.info['memory_KiB']) - def configure_vcpus(self, vcpus): - d = {} - for v in range(0, vcpus): - d["cpu/%d/availability" % v] = "online" - self.writeVm(d) - def construct_image(self): """Construct the boot image for the domain. """ self.create_channel() self.image.createImage() - self.exportToDB() - if self.store_channel and self.store_mfn >= 0: - IntroduceDomain(self.domid, self.store_mfn, - self.store_channel.port1, self.path) - # get the configured value of vcpus and update store - self.configure_vcpus(self.info['vcpus']) + IntroduceDomain(self.domid, self.store_mfn, + self.store_channel, self.dompath) ## public: - def delete(self): - """Delete the vm's db. - """ - try: - xstransact.Remove(self.path, 'domid') - except Exception, ex: - log.warning("error in domain db delete: %s", ex) - - - def cleanup(self): - """Cleanup vm resources: release devices. Nothrow guarantee.""" - - self.state_set(STATE_VM_TERMINATED) + def cleanupDomain(self): + """Cleanup domain resources; release devices. Idempotent. Nothrow + guarantee.""" + self.release_devices() - self.closeStoreChannel() - self.closeConsoleChannel() if self.image: try: @@ -972,46 +1059,62 @@ "XendDomainInfo.cleanup: image.destroy() failed.") self.image = None - - def destroy(self): - """Cleanup vm and destroy domain. Nothrow guarantee.""" - - log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid)) - - self.cleanup() + try: + self.removeDom() + except: + log.exception("Removing domain path failed.") + + try: + if not self.info['name'].startswith(ZOMBIE_PREFIX): + self.info['name'] = self.generateZombieName() + except: + log.exception("Renaming Zombie failed.") + + self.state_set(STATE_VM_TERMINATED) + + + def cleanupVm(self): + """Cleanup VM resources. Idempotent. Nothrow guarantee.""" try: self.removeVm() - except Exception: + except: log.exception("Removing VM path failed.") - try: - self.removeDom() - except Exception: - log.exception("Removing domain path failed.") + + def destroy(self): + """Cleanup VM and destroy domain. Nothrow guarantee.""" + + log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid)) + + self.cleanupVm() + self.destroyDomain() + + + def destroyDomain(self): + log.debug("XendDomainInfo.destroyDomain(%s)", str(self.domid)) try: if self.domid is not None: xc.domain_destroy(dom=self.domid) - except Exception: + except: log.exception("XendDomainInfo.destroy: xc.domain_destroy failed.") - - def is_terminated(self): - """Check if a domain has been terminated. - """ - return self.state == STATE_VM_TERMINATED + self.cleanupDomain() + + + ## private: def release_devices(self): - """Release all vm devices. Nothrow guarantee.""" + """Release all domain's devices. Nothrow guarantee.""" while True: - t = xstransact("%s/device" % self.path) + t = xstransact("%s/device" % self.dompath) for n in controllerClasses.keys(): for d in t.list(n): try: t.remove(d) - except ex: + except: # Log and swallow any exceptions in removal -- # there's nothing more we can do. log.exception( @@ -1020,22 +1123,29 @@ if t.commit(): break + def eventChannel(self, path=None): """Create an event channel to the domain. @param path under which port is stored in db """ - port = 0 if path: try: - port = int(self.readDom(path)) + return int(self.readDom(path)) except: - # if anything goes wrong, assume the port was not yet set + # The port is not yet set, i.e. the channel has not yet been + # created. pass - ret = EventChannel.interdomain(0, self.domid, port1=port, port2=0) - self.storeDom(path, ret.port1) - return ret - + + try: + port = xc.evtchn_alloc_unbound(dom=self.domid, remote_dom=0) + except: + log.exception("Exception in alloc_unbound(%d)", self.domid) + raise + + self.storeDom(path, port) + return port + def create_channel(self): """Create the channels to the domain. """ @@ -1055,6 +1165,9 @@ self.create_configured_devices() if self.image: self.image.createDeviceModel() + + + ## public: def device_create(self, dev_config): """Create a new device. @@ -1073,24 +1186,10 @@ @param devid: device id """ deviceClass = sxp.name(dev_config) - self.configureDevice(deviceClass, devid, dev_config) - - - def restart_needed(self, reason): - """Determine if the vm needs to be restarted when shutdown - for the given reason. - - @param reason: shutdown reason - @return True if needs restart, False otherwise - """ - if self.info['restart_mode'] == RESTART_NEVER: - return False - if self.info['restart_mode'] == RESTART_ALWAYS: - return True - if self.info['restart_mode'] == RESTART_ONREBOOT: - return reason == 'reboot' - return False - + self.reconfigureDevice(deviceClass, devid, dev_config) + + + ## private: def restart_check(self): """Check if domain restart is OK. @@ -1109,11 +1208,14 @@ self.restart_count += 1 - def restart(self): - """Restart the domain after it has exited. """ + def restart(self, rename = False): + """Restart the domain after it has exited. + + @param rename True if the old domain is to be renamed and preserved, + False if it is to be destroyed. + """ # self.restart_check() - self.cleanup() config = self.sxpr() @@ -1127,17 +1229,91 @@ self.writeVm('xend/restart_in_progress', 'True') try: - self.destroy() + if rename: + self.preserveForRestart() + else: + self.destroy() + try: xd = get_component('xen.xend.XendDomain') - xd.domain_unpause(xd.domain_create(config).getDomid()) - except Exception, exn: + new_dom = xd.domain_create(config) + try: + xc.domain_unpause(new_dom.getDomid()) + except: + new_dom.destroy() + raise + except: log.exception('Failed to restart domain %d.', self.domid) finally: self.removeVm('xend/restart_in_progress') # self.configure_bootloader() # self.exportToDB() + + + def preserveForRestart(self): + """Preserve a domain that has been shut down, by giving it a new UUID, + cloning the VM details, and giving it a new name. This allows us to + keep this domain for debugging, but restart a new one in its place + preserving the restart semantics (name and UUID preserved). + """ + + new_name = self.generateUniqueName() + new_uuid = getUuid() + log.info("Renaming dead domain %s (%d, %s) to %s (%s).", + self.info['name'], self.domid, self.uuid, new_name, new_uuid) + self.release_devices() + self.info['name'] = new_name + self.uuid = new_uuid + self.vmpath = VMROOT + new_uuid + self.storeVmDetails() + self.preserve() + + + def preserve(self): + log.info("Preserving dead domain %s (%d).", self.info['name'], + self.domid) + self.storeDom('xend/shutdown_completed', 'True') + self.state_set(STATE_VM_TERMINATED) + + + ## public: + + def renameUniquely(self): + """Rename this domain so that it has a unique name. This is used by + XendDomain to recover from non-uniqueness errors; we should never have + allowed the system to reach this state in the first place.""" + new_name = self.generateUniqueName() + + log.error('Renaming %s (%d, %s) to %s', self.info['name'], self.domid, + self.uuid, new_name) + + self.setName(new_name) + + + # private: + + def generateUniqueName(self): + n = 1 + while True: + name = "%s-%d" % (self.info['name'], n) + try: + self.check_name(name) + return name + except VmError: + n += 1 + + + def generateZombieName(self): + n = 0 + name = ZOMBIE_PREFIX + self.info['name'] + while True: + try: + self.check_name(name) + return name + except VmError: + n += 1 + name = "%s%d-%s" % (ZOMBIE_PREFIX, n, self.info['name']) def configure_bootloader(self): @@ -1182,32 +1358,31 @@ log.error("Invalid VCPU %d" % vcpu) return if int(state) == 0: + self.info['vcpu_avail'] &= ~(1 << vcpu) availability = "offline" else: + self.info['vcpu_avail'] &= (1 << vcpu) availability = "online" - self.storeVm("cpu/%d/availability" % vcpu, availability) + self.storeVm('vcpu_avail', self.info['vcpu_avail']) + self.storeDom("cpu/%d/availability" % vcpu, availability) def send_sysrq(self, key=0): - self.storeVm("control/sysrq", '%c' % key) - - def dom0_init_store(self): - if not self.store_channel: - self.store_channel = self.eventChannel("store/port") - if not self.store_channel: - return - ref = xc.init_store(self.store_channel.port2) + self.storeDom("control/sysrq", '%c' % key) + + + def initStoreConnection(self): + ref = xc.init_store(self.store_channel) if ref and ref >= 0: self.setStoreRef(ref) try: - IntroduceDomain(self.domid, ref, self.store_channel.port1, - self.path) + IntroduceDomain(self.domid, ref, self.store_channel, + self.dompath) except RuntimeError, ex: if ex.args[0] == errno.EISCONN: pass else: raise - # get run-time value of vcpus and update store - self.configure_vcpus(dom_get(self.domid)['vcpus']) + def dom0_enforce_vcpus(self): dom = 0 diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/XendRoot.py --- a/tools/python/xen/xend/XendRoot.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/XendRoot.py Sat Oct 8 20:28:24 2005 @@ -26,16 +26,14 @@ import os import os.path +import string import sys -import EventServer from XendLogging import XendLogging from XendError import XendError -# Initial create of the event server. -eserver = EventServer.instance() - import sxp + class XendRoot: """Root of the management classes.""" @@ -96,9 +94,7 @@ self.config = None self.logging = None self.configure() - eserver.subscribe('xend.*', self.event_handler) - #eserver.subscribe('xend.domain.created', self.event_handler) - #eserver.subscribe('xend.domain.died', self.event_handler) + def add_component(self, name, val): """Add a xend component. @@ -117,9 +113,6 @@ @return: component object (or None) """ return self.components.get(name) - - def start(self): - eserver.inject('xend.start', 0) def _format(self, msg, args): if args: @@ -246,10 +239,10 @@ return sxp.child_value(self.config, name, val=val) def get_config_bool(self, name, val=None): - v = self.get_config_value(name, val) - if v in ['yes', '1', 'on', 'true', 1, True]: + v = string.lower(str(self.get_config_value(name, val))) + if v in ['yes', 'y', '1', 'on', 'true', 't']: return True - if v in ['no', '0', 'off', 'false', 0, False]: + if v in ['no', 'n', '0', 'off', 'false', 'f']: return False raise XendError("invalid xend config %s: expected bool: %s" % (name, v)) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/XendVnet.py --- a/tools/python/xen/xend/XendVnet.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/XendVnet.py Sat Oct 8 20:28:24 2005 @@ -13,6 +13,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2005 XenSource Ltd #============================================================================ """Handler for vnet operations. @@ -22,7 +23,8 @@ from xen.xend import sxp from xen.xend.XendError import XendError from xen.xend.XendLogging import log -from xen.xend.xenstore import XenNode, DBMap, DBVar +from xen.xend.xenstore.xstransact import xstransact + def vnet_cmd(cmd): out = None @@ -39,39 +41,40 @@ vifctl_ops = {'up': 'vif.add', 'down': 'vif.del'} - __exports__ = [ - DBVar('id', ty='str'), - DBVar('dbid', ty='str'), - DBVar('config', ty='sxpr'), - ] - - def __init__(self, db, config=None): + def __init__(self, dbpath, config=None): if config: - self.id = sxp.child_value(config, 'id') - self.id = str(self.id) + self.id = str(sxp.child_value(config, 'id')) self.dbid = self.id.replace(':', '-') - self.db = db.addChild(self.dbid) + self.dbpath = dbpath + '/' + self.dbid self.config = config else: - self.db = db + self.dbpath = dbpath self.importFromDB() - config = self.config - self.bridge = sxp.child_value(config, 'bridge') + self.bridge = sxp.child_value(self.config, 'bridge') if not self.bridge: self.bridge = "vnet%s" % self.id - self.vnetif = sxp.child_value(config, 'vnetif') + self.vnetif = sxp.child_value(self.config, 'vnetif') if not self.vnetif: self.vnetif = "vnif%s" % self.id - def saveToDB(self, save=False, sync=False): - self.db.saveDB(save=save, sync=sync) def exportToDB(self, save=False, sync=False): - self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync) + to_store = { + 'id' : self.id, + 'dbid' : self.dbid, + 'config' : sxp.to_string + } + xstransact.Write(self.dbpath, to_store) + def importFromDB(self): - self.db.importFromDB(self, fields=self.__exports__) + (self.id, self.dbid, c) = xstransact.Gather(self.dbpath, + ('id', str), + ('dbid', str), + ('config', str)) + self.config = sxp.from_string(c) + def sxpr(self): return self.config @@ -88,7 +91,7 @@ Brctl.vif_bridge_rem({'bridge': self.bridge, 'vif': self.vnetif}) Brctl.bridge_del(self.bridge) val = vnet_cmd(['vnet.del', self.id]) - self.db.delete() + xstransact.Remove(self.dbpath) return val def vifctl(self, op, vif, vmac): @@ -107,18 +110,17 @@ def __init__(self): # Table of vnet info indexed by vnet id. self.vnet = {} - self.db = DBMap(db=XenNode(self.dbpath)) - self.db.readDB() - for vnetdb in self.db.values(): + listing = xstransact.List(self.dbpath) + for entry in listing: try: - info = XendVnetInfo(vnetdb) + info = XendVnetInfo(self.dbpath + '/' + entry) self.vnet[info.id] = info info.configure() except XendError, ex: log.warning("Failed to configure vnet %s: %s", str(info.id), str(ex)) except Exception, ex: log.exception("Vnet error") - vnetdb.delete() + xstransact.Remove(self.dbpath + '/' + entry) def vnet_of_bridge(self, bridge): """Get the vnet for a bridge (if any). @@ -155,9 +157,9 @@ @param config: config """ - info = XendVnetInfo(self.db, config=config) + info = XendVnetInfo(self.dbpath, config=config) self.vnet[info.id] = info - info.saveToDB() + info.exportToDB() info.configure() def vnet_delete(self, id): diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/image.py Sat Oct 8 20:28:24 2005 @@ -25,8 +25,6 @@ from xen.xend.XendError import VmError from xen.xend.XendLogging import log -from xen.xend.server import channel - xc = xen.lowlevel.xc.new() @@ -168,11 +166,11 @@ def buildDomain(self): if self.vm.store_channel: - store_evtchn = self.vm.store_channel.port2 + store_evtchn = self.vm.store_channel else: store_evtchn = 0 if self.vm.console_channel: - console_evtchn = self.vm.console_channel.port2 + console_evtchn = self.vm.console_channel else: console_evtchn = 0 @@ -228,16 +226,17 @@ def buildDomain(self): # Create an event channel - self.device_channel = channel.eventChannel(0, self.vm.getDomid()) - log.info("VMX device model port: %d", self.device_channel.port2) + self.device_channel = xc.evtchn_alloc_unbound(dom=self.vm.getDomid(), + remote_dom=0) + log.info("VMX device model port: %d", self.device_channel) if self.vm.store_channel: - store_evtchn = self.vm.store_channel.port2 + store_evtchn = self.vm.store_channel else: store_evtchn = 0 log.debug("dom = %d", self.vm.getDomid()) log.debug("image = %s", self.kernel) - log.debug("control_evtchn = %d", self.device_channel.port2) + log.debug("control_evtchn = %d", self.device_channel) log.debug("store_evtchn = %d", store_evtchn) log.debug("memsize = %d", self.vm.getMemoryTarget() / 1024) log.debug("flags = %d", self.flags) @@ -245,7 +244,7 @@ ret = xc.vmx_build(dom = self.vm.getDomid(), image = self.kernel, - control_evtchn = self.device_channel.port2, + control_evtchn = self.device_channel, store_evtchn = store_evtchn, memsize = self.vm.getMemoryTarget() / 1024, flags = self.flags, @@ -334,7 +333,7 @@ if len(vnc): args = args + vnc args = args + ([ "-d", "%d" % self.vm.getDomid(), - "-p", "%d" % self.device_channel.port1, + "-p", "%d" % self.device_channel, "-m", "%s" % (self.vm.getMemoryTarget() / 1024)]) args = args + self.dmargs env = dict(os.environ) @@ -358,7 +357,6 @@ return vncconnect def destroy(self): - channel.eventChannelClose(self.device_channel) import signal if not self.pid: return diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/DevController.py Sat Oct 8 20:28:24 2005 @@ -75,10 +75,29 @@ """ frontpath = self.frontendPath(devid) - backpath = xstransact.Read("%s/backend" % frontpath) + backpath = xstransact.Read(frontpath, "backend") xstransact.Remove(frontpath) - xstransact.Remove(backpath) + + if backpath: + xstransact.Remove(backpath) + else: + raise VmError("Device not connected") + + + def configurations(self): + return map(self.configuration, self.deviceIDs()) + + + def configuration(self, devid): + """@return an s-expression giving the current configuration of the + specified device. This would be suitable for giving to {@link + #createDevice} in order to recreate that device.""" + + backdomid = int(xstransact.Read(self.frontendPath(devid), + "backend-id")) + + return [self.deviceClass, ['backend', backdomid]] def sxprs(self): @@ -150,7 +169,20 @@ raise - ## private: + def readBackend(self, devid, *args): + frontpath = self.frontendPath(devid) + backpath = xstransact.Read(frontpath, "backend") + return xstransact.Read(backpath, *args) + + + def deviceIDs(self): + """@return The IDs of each of the devices currently configured for + this instance's deviceClass. + """ + return map(int, xstransact.List(self.frontendRoot())) + + +## private: def writeDetails(self, config, devid, backDetails, frontDetails): """Write the details in the store to trigger creation of a device. @@ -167,8 +199,17 @@ """ import xen.xend.XendDomain - backdom = xen.xend.XendDomain.instance().domain_lookup_by_name( - sxp.child_value(config, 'backend', '0')) + xd = xen.xend.XendDomain.instance() + + backdom_name = sxp.child_value(config, 'backend') + if backdom_name: + backdom = xd.domain_lookup_by_name_or_id_nr(backdom_name) + else: + backdom = xd.privilegedDomain() + + if not backdom: + raise VmError("Cannot configure device for unknown backend %s" % + backdom_name) frontpath = self.frontendPath(devid) backpath = self.backendPath(backdom, devid) @@ -197,9 +238,9 @@ def backendPath(self, backdom, devid): """@param backdom [XendDomainInfo] The backend domain info.""" - return "%s/backend/%s/%s/%d" % (backdom.getPath(), + return "%s/backend/%s/%s/%d" % (backdom.getDomainPath(), self.deviceClass, - self.vm.getUuid(), devid) + self.vm.getDomid(), devid) def frontendPath(self, devid): @@ -207,8 +248,9 @@ def frontendRoot(self): - return "%s/device/%s" % (self.vm.getPath(), self.deviceClass) + return "%s/device/%s" % (self.vm.getDomainPath(), self.deviceClass) def frontendMiscPath(self): - return "%s/device-misc/%s" % (self.vm.getPath(), self.deviceClass) + return "%s/device-misc/%s" % (self.vm.getDomainPath(), + self.deviceClass) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/SrvDaemon.py --- a/tools/python/xen/xend/server/SrvDaemon.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/SrvDaemon.py Sat Oct 8 20:28:24 2005 @@ -2,6 +2,7 @@ ## Xen controller daemon ## Copyright (c) 2004, K A Fraser (University of Cambridge) ## Copyright (C) 2004, Mike Wray <mike.wray@xxxxxx> +## Copyright (C) 2005, XenSource Ltd ########################################################### import os @@ -13,18 +14,12 @@ import re import traceback -from xen.xend import EventServer from xen.xend.server import SrvServer from xen.xend.XendLogging import log -from xen.xend import XendRoot import event import relocate from params import * - - -eserver = EventServer.instance() -xroot = XendRoot.instance() class Daemon: @@ -71,14 +66,17 @@ @param pidfile: file to read @return pid or 0 """ - pid = 0 if os.path.isfile(pidfile) and os.path.getsize(pidfile): try: - pid = open(pidfile, 'r').read() - pid = int(pid) + f = open(pidfile, 'r') + try: + return int(f.read()) + finally: + f.close() except: - pid = 0 - return pid + return 0 + else: + return 0 def find_process(self, pid, name): """Search for a process. @@ -146,8 +144,10 @@ if self.child: # Parent pidfile = open(pidfile, 'w') - pidfile.write(str(self.child)) - pidfile.close() + try: + pidfile.write(str(self.child)) + finally: + pidfile.close() return self.child @@ -200,8 +200,10 @@ if self.fork_pid(XEND_PID_FILE): os.close(w) r = os.fdopen(r, 'r') - s = r.read() - r.close() + try: + s = r.read() + finally: + r.close() if not len(s): ret = 1 else: diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/SrvDomain.py --- a/tools/python/xen/xend/server/SrvDomain.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/SrvDomain.py Sat Oct 8 20:28:24 2005 @@ -71,8 +71,7 @@ def op_destroy(self, op, req): fn = FormFn(self.xd.domain_destroy, - [['dom', 'int'], - ['reason', 'str']]) + [['dom', 'int']]) val = fn(req.args, {'dom': self.dom.domid}) req.setHeader("Location", "%s/.." % req.prePathURL()) return val @@ -157,19 +156,11 @@ val = fn(req.args, {'dom': self.dom.domid}) return val - def op_device_refresh(self, op, req): - fn = FormFn(self.xd.domain_device_refresh, - [['dom', 'int'], - ['type', 'str'], - ['idx', 'int']]) - val = fn(req.args, {'dom': self.dom.domid}) - return val - def op_device_destroy(self, op, req): fn = FormFn(self.xd.domain_device_destroy, [['dom', 'int'], ['type', 'str'], - ['idx', 'int']]) + ['dev', 'str']]) val = fn(req.args, {'dom': self.dom.domid}) return val @@ -177,7 +168,7 @@ fn = FormFn(self.xd.domain_device_configure, [['dom', 'int'], ['config', 'sxpr'], - ['idx', 'int']]) + ['dev', 'str']]) val = fn(req.args, {'dom': self.dom.domid}) return val @@ -232,8 +223,6 @@ req.write('<form method="post" action="%s">' % url) req.write('<input type="submit" name="op" value="destroy">') - req.write('<input type="radio" name="reason" value="halt" checked>Halt') - req.write('<input type="radio" name="reason" value="reboot">Reboot') req.write('</form>') req.write('<form method="post" action="%s">' % url) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/SrvDomainDir.py --- a/tools/python/xen/xend/server/SrvDomainDir.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/SrvDomainDir.py Sat Oct 8 20:28:24 2005 @@ -38,7 +38,7 @@ self.xd = XendDomain.instance() def domain(self, x): - dom = self.xd.domain_lookup_by_name(x) + dom = self.xd.domain_lookup_by_name_or_id(x) if not dom: raise XendError('No such domain ' + str(x)) return SrvDomain(dom) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/SrvRoot.py --- a/tools/python/xen/xend/server/SrvRoot.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/SrvRoot.py Sat Oct 8 20:28:24 2005 @@ -13,10 +13,9 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2005 XenSource Ltd #============================================================================ -from xen.xend import XendRoot -xroot = XendRoot.instance() from xen.web.SrvDir import SrvDir class SrvRoot(SrvDir): @@ -39,8 +38,6 @@ self.add(name, klass) for (name, klass) in self.subdirs: self.get(name) - xroot.start() def __repr__(self): return "<SrvRoot %x %s>" %(id(self), self.table.keys()) - diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/blkif.py Sat Oct 8 20:28:24 2005 @@ -23,7 +23,7 @@ from xen.util import blkif from xen.xend import sxp -from xen.xend.server.DevController import DevController +from DevController import DevController class BlkifController(DevController): @@ -40,14 +40,15 @@ def getDeviceDetails(self, config): """@see DevController.getDeviceDetails""" - typedev = sxp.child_value(config, 'dev') - if re.match('^ioemu:', typedev): + dev = sxp.child_value(config, 'dev') + if re.match('^ioemu:', dev): return (0,{},{}) - devid = blkif.blkdev_name_to_number(sxp.child_value(config, 'dev')) + devid = blkif.blkdev_name_to_number(dev) (typ, params) = string.split(sxp.child_value(config, 'uname'), ':', 1) - back = { 'type' : typ, + back = { 'dev' : dev, + 'type' : typ, 'params' : params } @@ -57,3 +58,44 @@ front = { 'virtual-device' : "%i" % devid } return (devid, back, front) + + + def configuration(self, devid): + """@see DevController.configuration""" + + result = DevController.configuration(self, devid) + + (dev, typ, params, ro) = self.readBackend(devid, + 'dev', 'type', 'params', + 'read-only') + + if dev: + result.append(['dev', dev]) + if typ and params: + result.append(['uname', typ + ":" + params]) + if ro: + result.append(['mode', 'r']) + else: + result.append(['mode', 'w']) + + return result + + + def destroyDevice(self, devid): + """@see DevController.destroyDevice""" + + # If we are given a device name, then look up the device ID from it, + # and destroy that ID instead. If what we are given is an integer, + # then assume it's a device ID and pass it straight through to our + # superclass's method. + + try: + DevController.destroyDevice(self, int(devid)) + except ValueError: + for i in self.deviceIDs(): + if self.readBackend(i, 'dev') == devid: + DevController.destroyDevice(self, i) + return + # Try this, but it's almost certainly going to throw VmError, + # since we can't find the device. + DevController.destroyDevice(self, int(devid)) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/event.py --- a/tools/python/xen/xend/server/event.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/event.py Sat Oct 8 20:28:24 2005 @@ -13,19 +13,23 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2005 XenSource Ltd #============================================================================ import sys import StringIO -from xen.web import reactor, protocol +from xen.web import protocol, tcp, unix from xen.xend import scheduler from xen.xend import sxp from xen.xend import PrettyPrint -from xen.xend import EventServer; eserver = EventServer.instance() from xen.xend.XendError import XendError -from xen.xend import XendRoot; xroot = XendRoot.instance() +from xen.xend import XendRoot + + +xroot = XendRoot.instance() + DEBUG = 0 @@ -38,13 +42,7 @@ self.daemon = daemon # Event queue. self.queue = [] - # Subscribed events. - self.events = [] self.parser = sxp.Parser() - self.pretty = 0 - - # For debugging subscribe to everything and make output pretty. - #self.subscribe(['*']) self.pretty = 1 def dataReceived(self, data): @@ -68,7 +66,7 @@ scheduler.now(self.connectionLost) def connectionLost(self, reason=None): - self.unsubscribe() + pass def send_reply(self, sxpr): io = StringIO.StringIO() @@ -99,16 +97,6 @@ def send_event(self, val): return self.send_reply(['event', val[0], val[1]]) - def unsubscribe(self): - for event in self.events: - eserver.unsubscribe(event, self.queue_event) - - def subscribe(self, events): - self.unsubscribe() - for event in events: - eserver.subscribe(event, self.queue_event) - self.events = events - def queue_event(self, name, v): # Despite the name we don't queue the event here. # We send it because the transport will queue it. @@ -126,7 +114,7 @@ op_method = getattr(self, op_method_name, self.operror) return op_method(op_name, req) - def op_help(self, name, req): + def op_help(self, _1, _2): def nameop(x): if x.startswith('op_'): return x[3:].replace('_', '.') @@ -136,37 +124,27 @@ l = [ nameop(k) for k in dir(self) if k.startswith('op_') ] return l - def op_quit(self, name, req): + def op_quit(self, _1, _2): self.loseConnection() - def op_exit(self, name, req): + def op_exit(self, _1, _2): sys.exit(0) - def op_pretty(self, name, req): + def op_pretty(self, _1, _2): self.pretty = 1 - def op_info(self, name, req): + def op_info(self, _1, _2): val = ['info'] #val += self.daemon.blkifs() #val += self.daemon.netifs() #val += self.daemon.usbifs() return val - def op_sys_subscribe(self, name, v): - # (sys.subscribe event*) - # Subscribe to the events: - self.subscribe(v[1:]) - - def op_sys_inject(self, name, v): - # (sys.inject event) - event = v[1] - eserver.inject(sxp.name(event), event) - - def op_trace(self, name, v): + def op_trace(self, _, v): mode = (v[1] == 'on') self.daemon.tracing(mode) - def op_log_stderr(self, name, v): + def op_log_stderr(self, _, v): mode = v[1] logging = xroot.get_logging() if mode == 'on': @@ -174,11 +152,11 @@ else: logging.removeLogStderr() - def op_domain_ls(self, name, v): + def op_domain_ls(self, _1, _2): xd = xroot.get_component("xen.xend.XendDomain") return xd.list_names() - def op_domain_configure(self, name, v): + def op_domain_configure(self, _, v): domid = sxp.child_value(v, "dom") config = sxp.child_value(v, "config") if domid is None: @@ -188,7 +166,7 @@ xd = xroot.get_component("xen.xend.XendDomain") xd.domain_configure(domid, config) - def op_domain_unpause(self, name, v): + def op_domain_unpause(self, _, v): domid = sxp.child_value(v, "dom") if domid is None: raise XendError("missing domain id") @@ -200,18 +178,18 @@ """ def __init__(self, daemon): - #protocol.ServerFactory.__init__(self) + protocol.ServerFactory.__init__(self) self.daemon = daemon - def buildProtocol(self, addr): + def buildProtocol(self, _): return EventProtocol(self.daemon) def listenEvent(daemon): factory = EventFactory(daemon) if xroot.get_xend_unix_server(): path = '/var/lib/xend/event-socket' - reactor.listenUNIX(path, factory) + unix.listenUNIX(path, factory) if xroot.get_xend_http_server(): port = xroot.get_xend_event_port() interface = xroot.get_xend_address() - reactor.listenTCP(port, factory, interface=interface) + tcp.listenTCP(port, factory, interface=interface) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/netif.py --- a/tools/python/xen/xend/server/netif.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/netif.py Sat Oct 8 20:28:24 2005 @@ -23,8 +23,12 @@ import os from xen.xend import sxp +from xen.xend import XendRoot from xen.xend.server.DevController import DevController + + +xroot = XendRoot.instance() class NetifController(DevController): @@ -37,9 +41,6 @@ def getDeviceDetails(self, config): """@see DevController.getDeviceDetails""" - - from xen.xend import XendRoot - xroot = XendRoot.instance() def _get_config_ipaddr(config): val = [] @@ -68,3 +69,26 @@ 'mac' : mac } return (devid, back, front) + + + def configuration(self, devid): + """@see DevController.configuration""" + + result = DevController.configuration(self, devid) + + (script, ip, bridge, mac) = self.readBackend(devid, + 'script', 'ip', 'bridge', + 'mac') + + if script: + result.append(['script', + script.replace(xroot.network_script_dir + os.sep, + "")]) + if ip: + result.append(['ip', ip.split(" ")]) + if bridge: + result.append(['bridge', bridge]) + if mac: + result.append(['mac', mac]) + + return result diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/relocate.py --- a/tools/python/xen/xend/server/relocate.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/relocate.py Sat Oct 8 20:28:24 2005 @@ -13,21 +13,24 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2005 XenSource Ltd #============================================================================ import socket import sys import StringIO -from xen.web import reactor, protocol +from xen.web import protocol, tcp, unix from xen.xend import scheduler from xen.xend import sxp -from xen.xend import EventServer; eserver = EventServer.instance() from xen.xend.XendError import XendError -from xen.xend import XendRoot; xroot = XendRoot.instance() +from xen.xend import XendRoot from xen.xend.XendLogging import log -from xen.xend import XendCheckpoint + + +xroot = XendRoot.instance() + DEBUG = 0 @@ -115,7 +118,7 @@ self.send_reply(["ready", name]) self.transport.sock.setblocking(1) xd = xroot.get_component("xen.xend.XendDomain") - XendCheckpoint.restore(xd, self.transport.sock.fileno()) + xd.domain_restore_fd(self.transport.sock.fileno()) self.transport.sock.setblocking(0) else: log.error(name + ": no transport") @@ -136,11 +139,11 @@ factory = RelocationFactory() if xroot.get_xend_unix_server(): path = '/var/lib/xend/relocation-socket' - reactor.listenUNIX(path, factory) + unix.listenUNIX(path, factory) if xroot.get_xend_relocation_server(): port = xroot.get_xend_relocation_port() interface = xroot.get_xend_relocation_address() - l = reactor.listenTCP(port, factory, interface=interface) + l = tcp.listenTCP(port, factory, interface=interface) l.setCloExec() def setupRelocation(dst, port): diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/tpmif.py --- a/tools/python/xen/xend/server/tpmif.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/server/tpmif.py Sat Oct 8 20:28:24 2005 @@ -37,11 +37,24 @@ def getDeviceDetails(self, config): """@see DevController.getDeviceDetails""" - + devid = int(sxp.child_value(config, 'instance', '0')) - log.debug("The domain has a TPM with instance %d." % devid) + log.info("The domain has a TPM with instance %d." % devid) back = { 'instance' : "%i" % devid } front = { 'handle' : "%i" % devid } return (devid, back, front) + + def configuration(self, devid): + + result = DevController.configuration(self, devid) + + (instance) = self.readBackend(devif, + 'instance') + + if instance: + result.append(['instance', instance]) + log.info("configuration: instance=%d." % instance) + + return result diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/xenstore/__init__.py --- a/tools/python/xen/xend/xenstore/__init__.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/xenstore/__init__.py Sat Oct 8 20:28:24 2005 @@ -14,5 +14,3 @@ #============================================================================ # Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx> #============================================================================ -from xsnode import * -from xsobj import * diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/xenstore/xsutil.py --- a/tools/python/xen/xend/xenstore/xsutil.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/xenstore/xsutil.py Sat Oct 8 20:28:24 2005 @@ -7,14 +7,17 @@ import threading from xen.lowlevel import xs -handles = {} +xs_lock = threading.Lock() +xs_handle = None -# XXX need to g/c handles from dead threads def xshandle(): - if not handles.has_key(threading.currentThread()): - handles[threading.currentThread()] = xs.open() - return handles[threading.currentThread()] - + global xs_handle, xs_lock + if not xs_handle: + xs_lock.acquire() + if not xs_handle: + xs_handle = xs.open() + xs_lock.release() + return xs_handle def IntroduceDomain(domid, page, port, path): return xshandle().introduce_domain(domid, page, port, path) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/xenstore/xswatch.py --- a/tools/python/xen/xend/xenstore/xswatch.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/xenstore/xswatch.py Sat Oct 8 20:28:24 2005 @@ -12,7 +12,6 @@ class xswatch: watchThread = None - threadcond = threading.Condition() xs = None xslock = threading.Lock() @@ -21,43 +20,31 @@ self.args = args self.kwargs = kwargs xswatch.watchStart() - xswatch.xslock.acquire() xswatch.xs.watch(path, self) - xswatch.xslock.release() def watchStart(cls): - cls.threadcond.acquire() + cls.xslock.acquire() if cls.watchThread: - cls.threadcond.release() + cls.xslock.release() return + # XXX: When we fix xenstored to have better watch semantics, + # this can change to shared xshandle(). Currently that would result + # in duplicate watch firings, thus failed extra xs.acknowledge_watch. + cls.xs = xs.open() cls.watchThread = threading.Thread(name="Watcher", target=cls.watchMain) cls.watchThread.setDaemon(True) cls.watchThread.start() - while cls.xs == None: - cls.threadcond.wait() - cls.threadcond.release() + cls.xslock.release() watchStart = classmethod(watchStart) def watchMain(cls): - cls.threadcond.acquire() - cls.xs = xs.open() - cls.threadcond.notifyAll() - cls.threadcond.release() while True: try: - (fd, _1, _2) = select.select([ cls.xs ], [], []) - cls.xslock.acquire() - # reconfirm ready to read with lock - (fd, _1, _2) = select.select([ cls.xs ], [], [], 0.001) - if not cls.xs in fd: - cls.xslock.release() - continue we = cls.xs.read_watch() watch = we[1] cls.xs.acknowledge_watch(watch) - cls.xslock.release() except RuntimeError, ex: print ex raise diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xm/create.py Sat Oct 8 20:28:24 2005 @@ -163,10 +163,46 @@ gopts.var('restart', val='onreboot|always|never', fn=set_value, default=None, - use="""Whether the domain should be restarted on exit. + use="""Deprecated. Use on_poweroff, on_reboot, and on_crash + instead. + + Whether the domain should be restarted on exit. - onreboot: restart on exit with shutdown code reboot - always: always restart on exit, ignore exit code - never: never restart on exit, ignore exit code""") + +gopts.var('on_poweroff', val='destroy|restart|preserve|rename-restart', + fn=set_value, default=None, + use="""Behaviour when a domain exits with reason 'poweroff'. + - destroy: the domain is cleaned up as normal; + - restart: a new domain is started in place of the old one; + - preserve: no clean-up is done until the domain is manually + destroyed (using xm destroy, for example); + - rename-restart: the old domain is not cleaned up, but is + renamed and a new domain started in its place. + """) + +gopts.var('on_reboot', val='destroy|restart|preserve|rename-restart', + fn=set_value, default=None, + use="""Behaviour when a domain exits with reason 'reboot'. + - destroy: the domain is cleaned up as normal; + - restart: a new domain is started in place of the old one; + - preserve: no clean-up is done until the domain is manually + destroyed (using xm destroy, for example); + - rename-restart: the old domain is not cleaned up, but is + renamed and a new domain started in its place. + """) + +gopts.var('on_crash', val='destroy|restart|preserve|rename-restart', + fn=set_value, default=None, + use="""Behaviour when a domain exits with reason 'crash'. + - destroy: the domain is cleaned up as normal; + - restart: a new domain is started in place of the old one; + - preserve: no clean-up is done until the domain is manually + destroyed (using xm destroy, for example); + - rename-restart: the old domain is not cleaned up, but is + renamed and a new domain started in its place. + """) gopts.var('blkif', val='no|yes', fn=set_bool, default=0, @@ -536,6 +572,12 @@ config.append(['backend', ['tpmif']]) if vals.restart: config.append(['restart', vals.restart]) + if vals.on_poweroff: + config.append(['on_poweroff', vals.on_poweroff]) + if vals.on_reboot: + config.append(['on_reboot', vals.on_reboot]) + if vals.on_crash: + config.append(['on_crash', vals.on_crash]) if vals.bootloader: config.append(['bootloader', vals.bootloader]) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xm/destroy.py --- a/tools/python/xen/xm/destroy.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xm/destroy.py Sat Oct 8 20:28:24 2005 @@ -21,18 +21,14 @@ from xen.xend.XendClient import server from xen.xm.opts import * -gopts = Opts(use="""[options] [DOM] +gopts = Opts(use="""[DOM] -Destroy a domain, optionally restarting it. +Destroy a domain. """) gopts.opt('help', short='h', fn=set_true, default=0, use="Print this help.") - -gopts.opt('reboot', short='R', - fn=set_true, default=0, - use='Destroy and restart.') def main(argv): opts = gopts @@ -42,10 +38,4 @@ return if len(args) < 1: opts.err('Missing domain') dom = args[0] - if opts.vals.reboot: - mode = 'reboot' - else: - mode = 'halt' - server.xend_domain_destroy(dom, mode) - - + server.xend_domain_destroy(dom) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xm/main.py Sat Oct 8 20:28:24 2005 @@ -30,9 +30,15 @@ import socket import warnings warnings.filterwarnings('ignore', category=FutureWarning) + +import xen.xend.XendError +import xen.xend.XendProtocol + from xen.xend import PrettyPrint from xen.xend import sxp from xen.xm.opts import * + + shorthelp = """Usage: xm <subcommand> [args] Control, list, and manipulate Xen guest instances @@ -80,8 +86,8 @@ shutdown [-w|-a] <DomId> shutdown a domain sysrq <DomId> <letter> send a sysrq to a domain unpause <DomId> unpause a paused domain - vcpu-enable <DomId> <VCPU> disable VCPU in a domain - vcpu-disable <DomId> <VCPU> enable VCPU in a domain + vcpu-enable <DomId> <VCPU> enable VCPU in a domain + vcpu-disable <DomId> <VCPU> disable VCPU in a domain vcpu-list <DomId> get the list of VCPUs for a domain vcpu-pin <DomId> <VCpu> <CPUS> set which cpus a VCPU can use. @@ -100,9 +106,10 @@ Virtual Device Commands: block-attach <DomId> <BackDev> <FrontDev> <Mode> [BackDomId] Create a new virtual block device - block-detach <DomId> <DevId> Destroy a domain's virtual block device + block-detach <DomId> <DevId> Destroy a domain's virtual block device, + where <DevId> may either be the device ID + or the device name as mounted in the guest. block-list <DomId> List virtual block devices for a domain - block-refresh <DomId> <DevId> Refresh a virtual block device for a domain network-limit <DomId> <Vif> <Credit> <Period> Limit the transmission rate of a virtual network interface network-list <DomId> List virtual network interfaces for a domain @@ -159,6 +166,9 @@ error = str(ex) if error == "Not found" and dom != None: err("Domain '%s' not found when running 'xm %s'" % (dom, cmd)) + sys.exit(1) + elif error == "Exception: Device not connected": + err("Device not connected") sys.exit(1) else: raise ex @@ -513,15 +523,6 @@ from xen.xend.XendClient import server server.xend_domain_device_create(dom, vbd) -def xm_block_refresh(args): - arg_check(args,2,"block-refresh") - - dom = args[0] - dev = args[1] - - from xen.xend.XendClient import server - server.xend_domain_device_refresh(dom, 'vbd', dev) - def xm_block_detach(args): arg_check(args,2,"block-detach") @@ -608,7 +609,6 @@ "block-attach": xm_block_attach, "block-detach": xm_block_detach, "block-list": xm_block_list, - "block-refresh": xm_block_refresh, # network "network-limit": xm_network_limit, "network-list": xm_network_list, @@ -637,7 +637,6 @@ "vbd-create": "block-create", "vbd-destroy": "block-destroy", "vbd-list": "block-list", - "vbd-refresh": "block-refresh", } help = { @@ -687,7 +686,6 @@ args = argv[2:] if cmd: try: - from xen.xend.XendClient import XendError rc = cmd(args) if rc: usage() @@ -698,7 +696,15 @@ except IOError: err("Most commands need root access. Please try again as root") sys.exit(1) - except XendError, ex: + except xen.xend.XendError.XendError, ex: + if len(args) > 0: + handle_xend_error(argv[1], args[0], ex) + else: + print "Unexpected error:", sys.exc_info()[0] + print + print "Please report to xen-devel@xxxxxxxxxxxxxxxxxxx" + raise + except xen.xend.XendProtocol.XendError, ex: if len(args) > 0: handle_xend_error(argv[1], args[0], ex) else: @@ -719,6 +725,3 @@ if __name__ == "__main__": main() - - - diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xm/shutdown.py --- a/tools/python/xen/xm/shutdown.py Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xm/shutdown.py Sat Oct 8 20:28:24 2005 @@ -73,19 +73,15 @@ opts.info("All domains terminated") def shutdown_mode(opts): - mode = 'poweroff' - if opts.vals.wait: - mode = 'halt' - if opts.vals.reboot: - opts.err("Can't specify wait and reboot") + if opts.vals.halt and opts.vals.reboot: + opts.err("Can't specify halt and reboot") + + if opts.vals.halt: + return 'halt' + elif opts.vals.reboot: + return 'reboot' else: - if opts.vals.halt and opts.vals.reboot: - opts.err("Can't specify halt and reboot") - if opts.vals.halt: - mode = 'halt' - elif opts.vals.reboot: - mode = 'reboot' - return mode + return 'poweroff' def main_all(opts, args): mode = shutdown_mode(opts) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Sat Oct 8 17:37:45 2005 +++ b/tools/tests/test_x86_emulator.c Sat Oct 8 20:28:24 2005 @@ -78,7 +78,7 @@ int main(int argc, char **argv) { - struct xen_regs regs; + struct cpu_user_regs regs; char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */ unsigned int res = 0x7FFFFFFF; u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 }; diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstat/libxenstat/Makefile --- a/tools/xenstat/libxenstat/Makefile Sat Oct 8 17:37:45 2005 +++ b/tools/xenstat/libxenstat/Makefile Sat Oct 8 20:28:24 2005 @@ -38,9 +38,7 @@ WARN_FLAGS=-Wall -Werror -CFLAGS+=-Isrc -CFLAGS+=-I$(XEN_ROOT)/xen/include/public -CFLAGS+=-I$(LINUX_ROOT)/include/asm-xen/linux-public/ +CFLAGS+=-Isrc -I$(XEN_LIBXC) LDFLAGS+=-Lsrc all: $(LIB) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstat/libxenstat/src/xen-interface.c --- a/tools/xenstat/libxenstat/src/xen-interface.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstat/libxenstat/src/xen-interface.c Sat Oct 8 20:28:24 2005 @@ -23,9 +23,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include "version.h" -#include "privcmd.h" -#include "xen.h" +#include <xen/linux/privcmd.h> struct xi_handle { int fd; diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstat/libxenstat/src/xen-interface.h --- a/tools/xenstat/libxenstat/src/xen-interface.h Sat Oct 8 17:37:45 2005 +++ b/tools/xenstat/libxenstat/src/xen-interface.h Sat Oct 8 20:28:24 2005 @@ -26,8 +26,10 @@ typedef uint32_t u32; typedef uint64_t u64; -#include "dom0_ops.h" -#include "version.h" +#include <xen/xen.h> +#include <xen/dom0_ops.h> +#include <xen/sched.h> +#include <xen/version.h> /* Opaque handles */ typedef struct xi_handle xi_handle; diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstat/libxenstat/src/xenstat.c Sat Oct 8 20:28:24 2005 @@ -22,7 +22,6 @@ #include <unistd.h> #include <xen-interface.h> #include "xenstat.h" -#include "version.h" /* * Types diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/Makefile Sat Oct 8 20:28:24 2005 @@ -15,7 +15,6 @@ BASECFLAGS+= -O3 $(PROFILE) #BASECFLAGS+= -I$(XEN_ROOT)/tools BASECFLAGS+= -I$(XEN_ROOT)/tools/libxc -BASECFLAGS+= -I$(XEN_ROOT)/xen/include/public BASECFLAGS+= -I. CFLAGS += $(BASECFLAGS) @@ -30,12 +29,11 @@ all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump -testcode: xs_test xenstored_test xs_random xs_dom0_test +testcode: xs_test xenstored_test xs_random xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@ -$(CLIENTS): libxenstore.so $(CLIENTS): xenstore-%: xenstore_%.o $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -L. -lxenstore -o $@ @@ -48,6 +46,7 @@ xs_tdb_dump: xs_tdb_dump.o utils.o tdb.o talloc.o $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ +xs_test xs_random xs_stress xs_crashme: LDFLAGS+=-lpthread xs_test: xs_test.o xs_lib.o utils.o xs_random: xs_random.o xs_test_lib.o xs_lib.o talloc.o utils.o xs_stress: xs_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o @@ -70,12 +69,12 @@ $(COMPILE.c) -o $@ $< libxenstore.so: xs.opic xs_lib.opic - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so -shared -o $@ $^ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so -shared -o $@ $^ -lpthread clean: testsuite-clean rm -f *.o *.opic *.so rm -f xenstored xs_random xs_stress xs_crashme - rm -f xs_test xenstored_test xs_dom0_test + rm -f xs_test xenstored_test $(RM) $(PROG_DEP) print-dir: @@ -121,9 +120,6 @@ rm -rf $(TESTDIR)/store $(TESTDIR)/transactions export $(TESTENV); PID=`./xenstored_test --output-pid --trace-file=/tmp/trace`; ./xs_stress 5000; ret=$$?; kill $$PID; exit $$ret -xs_dom0_test: xs_dom0_test.o utils.o - $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@ - TAGS: etags `find . -name '*.[ch]'` diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/fake_libxc.c --- a/tools/xenstore/fake_libxc.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/fake_libxc.c Sat Oct 8 20:28:24 2005 @@ -36,12 +36,11 @@ static u16 port; /* The event channel maps to a signal, shared page to an mmapped file. */ -int xc_evtchn_send(int xc_handle __attribute__((unused)), int local_port) +void evtchn_notify(int local_port) { assert(local_port == port); if (kill(xs_test_pid, SIGUSR2) != 0) barf_perror("fake event channel failed"); - return 0; } void *xc_map_foreign_range(int xc_handle, u32 dom __attribute__((unused)), @@ -107,15 +106,6 @@ return 1; } -int xc_evtchn_bind_virq(int xc_handle __attribute__((unused)), - int virq __attribute__((unused)), - int *port) -{ - if (port) - *port = 0; - return 0; -} - static void send_to_fd(int signo __attribute__((unused))) { int saved_errno = errno; diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/speedtest.c --- a/tools/xenstore/speedtest.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/speedtest.c Sat Oct 8 20:28:24 2005 @@ -98,7 +98,7 @@ if (i % print == 0) write(1, ".", 1); - if (!xs_transaction_start(h, "/")) { + if (!xs_transaction_start(h)) { kill_daemon(pid); barf_perror("Starting transaction"); } diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/testsuite/12readonly.test --- a/tools/xenstore/testsuite/12readonly.test Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/testsuite/12readonly.test Sat Oct 8 20:28:24 2005 @@ -27,8 +27,6 @@ setperm /test 100 NONE expect setperm failed: Permission denied setperm /test 100 NONE -expect shutdown failed: Permission denied -shutdown expect introduce failed: Permission denied introduce 1 100 7 /home diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/testsuite/test.sh --- a/tools/xenstore/testsuite/test.sh Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/testsuite/test.sh Sat Oct 8 20:28:24 2005 @@ -23,7 +23,8 @@ cat testsuite/tmp/xenstored_errors return 1 fi - echo shutdown | ./xs_test + kill $PID + sleep 1 return 0 else # In case daemon is wedged. diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xenstored_core.c Sat Oct 8 20:28:24 2005 @@ -44,13 +44,14 @@ #include "list.h" #include "talloc.h" #include "xs_lib.h" -#include "xenstored.h" #include "xenstored_core.h" #include "xenstored_watch.h" #include "xenstored_transaction.h" #include "xenstored_domain.h" #include "xenctrl.h" #include "tdb.h" + +int event_fd; static bool verbose; LIST_HEAD(connections); @@ -149,7 +150,6 @@ { switch (type) { case XS_DEBUG: return "DEBUG"; - case XS_SHUTDOWN: return "SHUTDOWN"; case XS_DIRECTORY: return "DIRECTORY"; case XS_READ: return "READ"; case XS_GET_PERMS: return "GET_PERMS"; @@ -309,8 +309,7 @@ return 0; } -static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock, - int event_fd) +static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock) { struct connection *i; int max; @@ -1082,17 +1081,6 @@ case XS_SET_PERMS: do_set_perms(conn, in); break; - - case XS_SHUTDOWN: - /* FIXME: Implement gentle shutdown too. */ - /* Only tools can do this. */ - if (conn->id != 0 || !conn->can_write) { - send_error(conn, EACCES); - break; - } - send_ack(conn, XS_SHUTDOWN); - /* Everything hangs off auto-free context, freed at exit. */ - exit(0); case XS_DEBUG: if (streq(in->buffer, "print")) @@ -1464,7 +1452,7 @@ int main(int argc, char *argv[]) { - int opt, *sock, *ro_sock, event_fd, max; + int opt, *sock, *ro_sock, max; struct sockaddr_un addr; fd_set inset, outset; bool dofork = true; @@ -1568,7 +1556,7 @@ #endif /* Get ready to listen to the tools. */ - max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd); + max = initialize_set(&inset, &outset, *sock, *ro_sock); /* Main loop. */ /* FIXME: Rewrite so noone can starve. */ @@ -1588,7 +1576,7 @@ accept_connection(*ro_sock, false); if (FD_ISSET(event_fd, &inset)) - handle_event(event_fd); + handle_event(); list_for_each_entry(i, &connections, list) { if (i->domain) @@ -1624,7 +1612,6 @@ } } - max = initialize_set(&inset, &outset, *sock, *ro_sock, - event_fd); - } -} + max = initialize_set(&inset, &outset, *sock, *ro_sock); + } +} diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xenstored_core.h --- a/tools/xenstore/xenstored_core.h Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xenstored_core.h Sat Oct 8 20:28:24 2005 @@ -26,7 +26,6 @@ #include <stdint.h> #include <errno.h> #include "xs_lib.h" -#include "xenstored.h" #include "list.h" #include "tdb.h" @@ -173,4 +172,6 @@ void trace_watch_timeout(const struct connection *conn, const char *node, const char *token); void trace(const char *fmt, ...); +extern int event_fd; + #endif /* _XENSTORED_CORE_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xenstored_domain.c --- a/tools/xenstore/xenstored_domain.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xenstored_domain.c Sat Oct 8 20:28:24 2005 @@ -36,6 +36,8 @@ #include "xenstored_watch.h" #include "xenstored_test.h" +#include <xen/linux/evtchn.h> + static int *xc_handle; static int eventchn_fd; static int virq_port; @@ -77,8 +79,16 @@ char buf[0]; } __attribute__((packed)); -#define EVENTCHN_BIND _IO('E', 2) -#define EVENTCHN_UNBIND _IO('E', 3) +#ifndef TESTING +static void evtchn_notify(int port) +{ + struct ioctl_evtchn_notify notify; + notify.port = port; + (void)ioctl(event_fd, IOCTL_EVTCHN_NOTIFY, &notify); +} +#else +extern void evtchn_notify(int port); +#endif /* FIXME: Mark connection as broken (close it?) when this happens. */ static bool check_buffer(const struct ringbuf_head *h) @@ -165,9 +175,7 @@ memcpy(dest, data, len); mb(); update_output_chunk(conn->domain->output, len); - /* FIXME: Probably not neccessary. */ - mb(); - xc_evtchn_send(*xc_handle, conn->domain->port); + evtchn_notify(conn->domain->port); return len; } @@ -200,21 +208,24 @@ /* If it was full, tell them we've taken some. */ if (was_full) - xc_evtchn_send(*xc_handle, conn->domain->port); + evtchn_notify(conn->domain->port); return len; } static int destroy_domain(void *_domain) { struct domain *domain = _domain; + struct ioctl_evtchn_unbind unbind; list_del(&domain->list); - if (domain->port && - (ioctl(eventchn_fd, EVENTCHN_UNBIND, domain->port) != 0)) - eprintf("> Unbinding port %i failed!\n", domain->port); - - if(domain->page) + if (domain->port) { + unbind.port = domain->port; + if (ioctl(eventchn_fd, IOCTL_EVTCHN_UNBIND, &unbind) == -1) + eprintf("> Unbinding port %i failed!\n", domain->port); + } + + if (domain->page) munmap(domain->page, getpagesize()); return 0; @@ -247,7 +258,7 @@ } /* We scan all domains rather than use the information given here. */ -void handle_event(int event_fd) +void handle_event(void) { u16 port; @@ -278,6 +289,9 @@ const char *path) { struct domain *domain; + struct ioctl_evtchn_bind_interdomain bind; + int rc; + domain = talloc(context, struct domain); domain->port = 0; domain->shutdown = 0; @@ -298,10 +312,13 @@ domain->output = domain->page + getpagesize()/2; /* Tell kernel we're interested in this event. */ - if (ioctl(eventchn_fd, EVENTCHN_BIND, port) != 0) + bind.remote_domain = domid; + bind.remote_port = port; + rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); + if (rc == -1) return NULL; - domain->port = port; + domain->port = rc; domain->conn = new_connection(writechn, readchn); domain->conn->domain = domain; return domain; @@ -445,6 +462,8 @@ int domain_init(void) { struct stat st; + struct ioctl_evtchn_bind_virq bind; + int rc; /* The size of the ringbuffer: half a page minus head structure. */ ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head); @@ -482,11 +501,11 @@ if (eventchn_fd < 0) barf_perror("Failed to open evtchn device"); - if (xc_evtchn_bind_virq(*xc_handle, VIRQ_DOM_EXC, &virq_port)) - barf_perror("Failed to bind to domain exception virq"); - - if (ioctl(eventchn_fd, EVENTCHN_BIND, virq_port) != 0) + bind.virq = VIRQ_DOM_EXC; + rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind); + if (rc == -1) barf_perror("Failed to bind to domain exception virq port"); + virq_port = rc; return eventchn_fd; } diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xenstored_domain.h --- a/tools/xenstore/xenstored_domain.h Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xenstored_domain.h Sat Oct 8 20:28:24 2005 @@ -20,7 +20,7 @@ #ifndef _XENSTORED_DOMAIN_H #define _XENSTORED_DOMAIN_H -void handle_event(int event_fd); +void handle_event(void); /* domid, mfn, eventchn, path */ void do_introduce(struct connection *conn, struct buffered_data *in); diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xs.c --- a/tools/xenstore/xs.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xs.c Sat Oct 8 20:28:24 2005 @@ -32,84 +32,153 @@ #include <stdint.h> #include <errno.h> #include <sys/ioctl.h> +#include <pthread.h> #include "xs.h" -#include "xenstored.h" -#include "xs_lib.h" +#include "list.h" #include "utils.h" -struct xs_handle -{ +struct xs_stored_msg { + struct list_head list; + struct xsd_sockmsg hdr; + char *body; +}; + +struct xs_handle { + /* Communications channel to xenstore daemon. */ int fd; + + /* + * A read thread which pulls messages off the comms channel and + * signals waiters. + */ + pthread_t read_thr; + + /* + * A list of fired watch messages, protected by a mutex. Users can + * wait on the conditional variable until a watch is pending. + */ + struct list_head watch_list; + pthread_mutex_t watch_mutex; + pthread_cond_t watch_condvar; + + /* Clients can select() on this pipe to wait for a watch to fire. */ + int watch_pipe[2]; + + /* + * A list of replies. Currently only one will ever be outstanding + * because we serialise requests. The requester can wait on the + * conditional variable for its response. + */ + struct list_head reply_list; + pthread_mutex_t reply_mutex; + pthread_cond_t reply_condvar; + + /* One request at a time. */ + pthread_mutex_t request_mutex; + + /* One transaction at a time. */ + pthread_mutex_t transaction_mutex; }; -/* Get the socket from the store daemon handle. - */ +static void *read_thread(void *arg); + int xs_fileno(struct xs_handle *h) { - return h->fd; -} - -static struct xs_handle *get_socket(const char *connect_to) + char c = 0; + + pthread_mutex_lock(&h->watch_mutex); + + if ((h->watch_pipe[0] == -1) && (pipe(h->watch_pipe) != -1)) { + /* Kick things off if the watch list is already non-empty. */ + if (!list_empty(&h->watch_list)) + while (write(h->watch_pipe[1], &c, 1) != 1) + continue; + } + + pthread_mutex_unlock(&h->watch_mutex); + + return h->watch_pipe[0]; +} + +static int get_socket(const char *connect_to) { struct sockaddr_un addr; int sock, saved_errno; - struct xs_handle *h = NULL; sock = socket(PF_UNIX, SOCK_STREAM, 0); if (sock < 0) - return NULL; + return -1; addr.sun_family = AF_UNIX; strcpy(addr.sun_path, connect_to); - if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == 0) { - h = malloc(sizeof(*h)); - if (h) { - h->fd = sock; - return h; - } - } - + if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) != 0) { + saved_errno = errno; + close(sock); + errno = saved_errno; + return -1; + } + + return sock; +} + +static int get_dev(const char *connect_to) +{ + return open(connect_to, O_RDWR); +} + +static struct xs_handle *get_handle(const char *connect_to) +{ + struct stat buf; + struct xs_handle *h = NULL; + int fd = -1, saved_errno; + + if (stat(connect_to, &buf) != 0) + goto error; + + if (S_ISSOCK(buf.st_mode)) + fd = get_socket(connect_to); + else + fd = get_dev(connect_to); + + if (fd == -1) + goto error; + + h = malloc(sizeof(*h)); + if (h == NULL) + goto error; + + h->fd = fd; + + /* Watch pipe is allocated on demand in xs_fileno(). */ + h->watch_pipe[0] = h->watch_pipe[1] = -1; + + INIT_LIST_HEAD(&h->watch_list); + pthread_mutex_init(&h->watch_mutex, NULL); + pthread_cond_init(&h->watch_condvar, NULL); + + INIT_LIST_HEAD(&h->reply_list); + pthread_mutex_init(&h->reply_mutex, NULL); + pthread_cond_init(&h->reply_condvar, NULL); + + pthread_mutex_init(&h->request_mutex, NULL); + pthread_mutex_init(&h->transaction_mutex, NULL); + + if (pthread_create(&h->read_thr, NULL, read_thread, h) != 0) + goto error; + + return h; + + error: saved_errno = errno; - close(sock); + if (h != NULL) + free(h); + if (fd != -1) + close(fd); errno = saved_errno; return NULL; } -static struct xs_handle *get_dev(const char *connect_to) -{ - int fd, saved_errno; - struct xs_handle *h; - - fd = open(connect_to, O_RDWR); - if (fd < 0) - return NULL; - - h = malloc(sizeof(*h)); - if (h) { - h->fd = fd; - return h; - } - - saved_errno = errno; - close(fd); - errno = saved_errno; - return NULL; -} - -static struct xs_handle *get_handle(const char *connect_to) -{ - struct stat buf; - - if (stat(connect_to, &buf) != 0) - return NULL; - - if (S_ISSOCK(buf.st_mode)) - return get_socket(connect_to); - else - return get_dev(connect_to); -} - struct xs_handle *xs_daemon_open(void) { return get_handle(xs_daemon_socket()); @@ -127,8 +196,39 @@ void xs_daemon_close(struct xs_handle *h) { - if (h->fd >= 0) - close(h->fd); + struct xs_stored_msg *msg, *tmsg; + + pthread_mutex_lock(&h->transaction_mutex); + pthread_mutex_lock(&h->request_mutex); + pthread_mutex_lock(&h->reply_mutex); + pthread_mutex_lock(&h->watch_mutex); + + /* XXX FIXME: May leak an unpublished message buffer. */ + pthread_cancel(h->read_thr); + pthread_join(h->read_thr, NULL); + + list_for_each_entry_safe(msg, tmsg, &h->reply_list, list) { + free(msg->body); + free(msg); + } + + list_for_each_entry_safe(msg, tmsg, &h->watch_list, list) { + free(msg->body); + free(msg); + } + + pthread_mutex_unlock(&h->transaction_mutex); + pthread_mutex_unlock(&h->request_mutex); + pthread_mutex_unlock(&h->reply_mutex); + pthread_mutex_unlock(&h->watch_mutex); + + if (h->watch_pipe[0] != -1) { + close(h->watch_pipe[0]); + close(h->watch_pipe[1]); + } + + close(h->fd); + free(h); } @@ -171,31 +271,28 @@ } /* Adds extra nul terminator, because we generally (always?) hold strings. */ -static void *read_reply(int fd, enum xsd_sockmsg_type *type, unsigned int *len) -{ - struct xsd_sockmsg msg; - void *ret; - int saved_errno; - - if (!read_all(fd, &msg, sizeof(msg))) - return NULL; - - ret = malloc(msg.len + 1); - if (!ret) - return NULL; - - if (!read_all(fd, ret, msg.len)) { - saved_errno = errno; - free(ret); - errno = saved_errno; - return NULL; - } - - *type = msg.type; +static void *read_reply( + struct xs_handle *h, enum xsd_sockmsg_type *type, unsigned int *len) +{ + struct xs_stored_msg *msg; + char *body; + + pthread_mutex_lock(&h->reply_mutex); + while (list_empty(&h->reply_list)) + pthread_cond_wait(&h->reply_condvar, &h->reply_mutex); + msg = list_top(&h->reply_list, struct xs_stored_msg, list); + list_del(&msg->list); + assert(list_empty(&h->reply_list)); + pthread_mutex_unlock(&h->reply_mutex); + + *type = msg->hdr.type; if (len) - *len = msg.len; - ((char *)ret)[msg.len] = '\0'; - return ret; + *len = msg->hdr.len; + body = msg->body; + + free(msg); + + return body; } /* Send message to xs, get malloc'ed reply. NULL and set errno on error. */ @@ -219,6 +316,8 @@ ignorepipe.sa_flags = 0; sigaction(SIGPIPE, &ignorepipe, &oldact); + pthread_mutex_lock(&h->request_mutex); + if (!xs_write_all(h->fd, &msg, sizeof(msg))) goto fail; @@ -226,14 +325,11 @@ if (!xs_write_all(h->fd, iovec[i].iov_base, iovec[i].iov_len)) goto fail; - /* Watches can have fired before reply comes: daemon detects - * and re-transmits, so we can ignore this. */ - do { - free(ret); - ret = read_reply(h->fd, &msg.type, len); - if (!ret) - goto fail; - } while (msg.type == XS_WATCH_EVENT); + ret = read_reply(h, &msg.type, len); + if (!ret) + goto fail; + + pthread_mutex_unlock(&h->request_mutex); sigaction(SIGPIPE, &oldact, NULL); if (msg.type == XS_ERROR) { @@ -254,6 +350,7 @@ fail: /* We're in a bad state, so close fd. */ saved_errno = errno; + pthread_mutex_unlock(&h->request_mutex); sigaction(SIGPIPE, &oldact, NULL); close_fd: close(h->fd); @@ -449,25 +546,50 @@ * Returns array of two pointers: path and token, or NULL. * Call free() after use. */ -char **xs_read_watch(struct xs_handle *h) -{ - struct xsd_sockmsg msg; - char **ret; - - if (!read_all(h->fd, &msg, sizeof(msg))) +char **xs_read_watch(struct xs_handle *h, unsigned int *num) +{ + struct xs_stored_msg *msg; + char **ret, *strings, c = 0; + unsigned int num_strings, i; + + pthread_mutex_lock(&h->watch_mutex); + + /* Wait on the condition variable for a watch to fire. */ + while (list_empty(&h->watch_list)) + pthread_cond_wait(&h->watch_condvar, &h->watch_mutex); + msg = list_top(&h->watch_list, struct xs_stored_msg, list); + list_del(&msg->list); + + /* Clear the pipe token if there are no more pending watches. */ + if (list_empty(&h->watch_list) && (h->watch_pipe[0] != -1)) + while (read(h->watch_pipe[0], &c, 1) != 1) + continue; + + pthread_mutex_unlock(&h->watch_mutex); + + assert(msg->hdr.type == XS_WATCH_EVENT); + + strings = msg->body; + num_strings = xs_count_strings(strings, msg->hdr.len); + + ret = malloc(sizeof(char*) * num_strings + msg->hdr.len); + if (!ret) { + free_no_errno(strings); + free_no_errno(msg); return NULL; - - assert(msg.type == XS_WATCH_EVENT); - ret = malloc(sizeof(char *)*2 + msg.len); - if (!ret) - return NULL; - - ret[0] = (char *)(ret + 2); - if (!read_all(h->fd, ret[0], msg.len)) { - free_no_errno(ret); - return NULL; - } - ret[1] = ret[0] + strlen(ret[0]) + 1; + } + + ret[0] = (char *)(ret + num_strings); + memcpy(ret[0], strings, msg->hdr.len); + + free(strings); + free(msg); + + for (i = 1; i < num_strings; i++) + ret[i] = ret[i - 1] + strlen(ret[i - 1]) + 1; + + *num = num_strings; + return ret; } @@ -502,6 +624,7 @@ */ bool xs_transaction_start(struct xs_handle *h) { + pthread_mutex_lock(&h->transaction_mutex); return xs_bool(xs_single(h, XS_TRANSACTION_START, "", NULL)); } @@ -513,12 +636,18 @@ bool xs_transaction_end(struct xs_handle *h, bool abort) { char abortstr[2]; + bool rc; if (abort) strcpy(abortstr, "F"); else strcpy(abortstr, "T"); - return xs_bool(xs_single(h, XS_TRANSACTION_END, abortstr, NULL)); + + rc = xs_bool(xs_single(h, XS_TRANSACTION_END, abortstr, NULL)); + + pthread_mutex_unlock(&h->transaction_mutex); + + return rc; } /* Introduce a new domain. @@ -567,18 +696,6 @@ return xs_single(h, XS_GET_DOMAIN_PATH, domid_str, NULL); } -bool xs_shutdown(struct xs_handle *h) -{ - bool ret = xs_bool(xs_single(h, XS_SHUTDOWN, "", NULL)); - if (ret) { - char c; - /* Wait for it to actually shutdown. */ - while ((read(h->fd, &c, 1) < 0) && (errno == EINTR)) - continue; - } - return ret; -} - /* Only useful for DEBUG versions */ char *xs_debug_command(struct xs_handle *h, const char *cmd, void *data, unsigned int len) @@ -592,3 +709,75 @@ return xs_talkv(h, XS_DEBUG, iov, ARRAY_SIZE(iov), NULL); } + +static void *read_thread(void *arg) +{ + struct xs_handle *h = arg; + struct xs_stored_msg *msg = NULL; + char *body = NULL; + + for (;;) { + msg = NULL; + body = NULL; + + /* Allocate message structure and read the message header. */ + msg = malloc(sizeof(*msg)); + if (msg == NULL) + goto error; + if (!read_all(h->fd, &msg->hdr, sizeof(msg->hdr))) + goto error; + + /* Allocate and read the message body. */ + body = msg->body = malloc(msg->hdr.len + 1); + if (body == NULL) + goto error; + if (!read_all(h->fd, body, msg->hdr.len)) + goto error; + body[msg->hdr.len] = '\0'; + + if (msg->hdr.type == XS_WATCH_EVENT) { + pthread_mutex_lock(&h->watch_mutex); + + /* Kick users out of their select() loop. */ + if (list_empty(&h->watch_list) && + (h->watch_pipe[1] != -1)) + while (write(h->watch_pipe[1], body, 1) != 1) + continue; + + list_add_tail(&msg->list, &h->watch_list); + pthread_cond_signal(&h->watch_condvar); + + pthread_mutex_unlock(&h->watch_mutex); + } else { + pthread_mutex_lock(&h->reply_mutex); + + /* There should only ever be one response pending! */ + if (!list_empty(&h->reply_list)) { + pthread_mutex_unlock(&h->reply_mutex); + goto error; + } + + list_add_tail(&msg->list, &h->reply_list); + pthread_cond_signal(&h->reply_condvar); + + pthread_mutex_unlock(&h->reply_mutex); + } + } + + error: + if (body != NULL) + free(body); + if (msg != NULL) + free(msg); + return NULL; +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xs.h --- a/tools/xenstore/xs.h Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xs.h Sat Oct 8 20:28:24 2005 @@ -20,7 +20,7 @@ #ifndef _XS_H #define _XS_H -#include "xs_lib.h" +#include <xs_lib.h> struct xs_handle; @@ -91,10 +91,10 @@ int xs_fileno(struct xs_handle *h); /* Find out what node change was on (will block if nothing pending). - * Returns array of two pointers: path and token, or NULL. - * Call free() after use. + * Returns array containing the path and token. Use XS_WATCH_* to access these + * elements. Call free() after use. */ -char **xs_read_watch(struct xs_handle *h); +char **xs_read_watch(struct xs_handle *h, unsigned int *num); /* Acknowledge watch on node. Watches must be acknowledged before * any other watches can be read. @@ -141,7 +141,4 @@ char *xs_debug_command(struct xs_handle *h, const char *cmd, void *data, unsigned int len); -/* Shut down the daemon. */ -bool xs_shutdown(struct xs_handle *h); - #endif /* _XS_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xs_crashme.c --- a/tools/xenstore/xs_crashme.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xs_crashme.c Sat Oct 8 20:28:24 2005 @@ -24,7 +24,6 @@ #include "xs.h" #include "talloc.h" #include <errno.h> -#include "xenstored.h" #define XSTEST #define RAND_FREQ 128 /* One char in 32 is corrupted. */ @@ -231,20 +230,6 @@ return ret; } -static int random_flags(int *state) -{ - switch (get_randomness(state) % 4) { - case 0: - return 0; - case 1: - return O_CREAT; - case 2: - return O_CREAT|O_EXCL; - default: - return get_randomness(state); - } -} - /* Do the next operation, return the results. */ static void do_next_op(struct xs_handle *h, bool verbose) { @@ -315,7 +300,7 @@ case 7: { if (verbose) printf("START %s\n", name); - xs_transaction_start(h, name); + xs_transaction_start(h); break; } case 8: { diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xs_lib.h --- a/tools/xenstore/xs_lib.h Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xs_lib.h Sat Oct 8 20:28:24 2005 @@ -23,6 +23,8 @@ #include <stdbool.h> #include <limits.h> #include <xenctrl.h> +#include <errno.h> +#include <xen/io/xs_wire.h> /* Bitmask of permissions. */ enum xs_perm_type { diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xs_random.c --- a/tools/xenstore/xs_random.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xs_random.c Sat Oct 8 20:28:24 2005 @@ -879,20 +879,11 @@ static void cleanup_xs_ops(void) { char *cmd; + if (daemon_pid) { - struct xs_handle *h; - h = xs_daemon_open(); - if (h) { - if (xs_shutdown(h)) { - waitpid(daemon_pid, NULL, 0); - daemon_pid = 0; - } - xs_daemon_close(h); - } - if (daemon_pid) { - kill(daemon_pid, SIGTERM); - waitpid(daemon_pid, NULL, 0); - } + kill(daemon_pid, SIGTERM); + waitpid(daemon_pid, NULL, 0); + daemon_pid = 0; } cmd = talloc_asprintf(NULL, "rm -rf testsuite/tmp/*"); diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xs_test.c --- a/tools/xenstore/xs_test.c Sat Oct 8 17:37:45 2005 +++ b/tools/xenstore/xs_test.c Sat Oct 8 20:28:24 2005 @@ -198,7 +198,6 @@ " rm <path>\n" " getperm <path>\n" " setperm <path> <id> <flags> ...\n" - " shutdown\n" " watch <path> <token>\n" " watchnoack <path> <token>\n" " waitwatch\n" @@ -214,8 +213,6 @@ " notimeout\n" " readonly\n" " readwrite\n" - " noackwrite <path> <value>...\n" - " readack\n" " dump\n"); } @@ -353,37 +350,6 @@ { if (!xs_write(handles[handle], path, data, strlen(data))) failed(handle); -} - -static void do_noackwrite(unsigned int handle, - char *path, char *data) -{ - struct xsd_sockmsg msg; - - msg.len = strlen(path) + 1 + strlen(data); - msg.type = XS_WRITE; - if (!write_all_choice(handles[handle]->fd, &msg, sizeof(msg))) - failed(handle); - if (!write_all_choice(handles[handle]->fd, path, strlen(path) + 1)) - failed(handle); - if (!write_all_choice(handles[handle]->fd, data, strlen(data))) - failed(handle); - /* Do not wait for ack. */ -} - -static void do_readack(unsigned int handle) -{ - enum xsd_sockmsg_type type; - char *ret = NULL; - - /* Watches can have fired before reply comes: daemon detects - * and re-transmits, so we can ignore this. */ - do { - free(ret); - ret = read_reply(handles[handle]->fd, &type, NULL); - if (!ret) - failed(handle); - } while (type == XS_WATCH_EVENT); } static void do_setid(unsigned int handle, char *id) @@ -475,12 +441,6 @@ failed(handle); } -static void do_shutdown(unsigned int handle) -{ - if (!xs_shutdown(handles[handle])) - failed(handle); -} - static void do_watch(unsigned int handle, const char *node, const char *token, bool swallow_event) { @@ -489,8 +449,11 @@ /* Convenient for testing... */ if (swallow_event) { - char **vec = xs_read_watch(handles[handle]); - if (!vec || !streq(vec[0], node) || !streq(vec[1], token)) + unsigned int num; + char **vec = xs_read_watch(handles[handle], &num); + if (!vec || + !streq(vec[XS_WATCH_PATH], node) || + !streq(vec[XS_WATCH_TOKEN], token)) failed(handle); if (!xs_acknowledge_watch(handles[handle], token)) failed(handle); @@ -522,6 +485,7 @@ struct timeval tv = {.tv_sec = timeout_ms/1000, .tv_usec = (timeout_ms*1000)%1000000 }; fd_set set; + unsigned int num; if (xs_fileno(handles[handle]) != -2) { /* Manually select here so we can time out gracefully. */ @@ -537,16 +501,17 @@ set_timeout(); } - vec = xs_read_watch(handles[handle]); + vec = xs_read_watch(handles[handle], &num); if (!vec) { failed(handle); return; } if (handle) - output("%i:%s:%s\n", handle, vec[0], vec[1]); + output("%i:%s:%s\n", handle, + vec[XS_WATCH_PATH], vec[XS_WATCH_TOKEN]); else - output("%s:%s\n", vec[0], vec[1]); + output("%s:%s\n", vec[XS_WATCH_PATH], vec[XS_WATCH_TOKEN]); free(vec); } @@ -775,8 +740,6 @@ do_getperm(handle, arg(line, 1)); else if (streq(command, "setperm")) do_setperm(handle, arg(line, 1), line); - else if (streq(command, "shutdown")) - do_shutdown(handle); else if (streq(command, "watch")) do_watch(handle, arg(line, 1), arg(line, 2), true); else if (streq(command, "watchnoack")) @@ -818,11 +781,7 @@ readonly = false; xs_daemon_close(handles[handle]); handles[handle] = NULL; - } else if (streq(command, "noackwrite")) - do_noackwrite(handle, arg(line,1), arg(line,2)); - else if (streq(command, "readack")) - do_readack(handle); - else + } else barf("Unknown command %s", command); fflush(stdout); disarm_timeout(); diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/Makefile --- a/xen/Makefile Sat Oct 8 17:37:45 2005 +++ b/xen/Makefile Sat Oct 8 20:28:24 2005 @@ -1,7 +1,3 @@ -# Default is to install to local 'dist' directory. -DISTDIR ?= ../dist -DESTDIR ?= $(DISTDIR)/install - INSTALL = install INSTALL_DATA = $(INSTALL) -m0644 INSTALL_DIR = $(INSTALL) -d -m0755 diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/Rules.mk --- a/xen/Rules.mk Sat Oct 8 17:37:45 2005 +++ b/xen/Rules.mk Sat Oct 8 20:28:24 2005 @@ -10,7 +10,8 @@ domu_debug ?= n crash_debug ?= n -include $(BASEDIR)/../Config.mk +XEN_ROOT=$(BASEDIR)/.. +include $(XEN_ROOT)/Config.mk # Set ARCH/SUBARCH appropriately. override COMPILE_SUBARCH := $(XEN_COMPILE_ARCH) diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/dom0_ops.c --- a/xen/arch/x86/dom0_ops.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/dom0_ops.c Sat Oct 8 20:28:24 2005 @@ -186,7 +186,7 @@ pi->sockets_per_node = num_online_cpus() / (pi->threads_per_core * pi->cores_per_socket); pi->nr_nodes = 1; - pi->total_pages = max_page; + pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); pi->cpu_khz = cpu_khz; memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/domain.c Sat Oct 8 20:28:24 2005 @@ -323,47 +323,16 @@ #ifdef CONFIG_VMX static int vmx_switch_on; -static int vmx_final_setup_guest( - struct vcpu *v, struct vcpu_guest_context *ctxt) -{ - int error; - struct cpu_user_regs *regs; - struct vmcs_struct *vmcs; - - regs = &ctxt->user_regs; - - /* - * Create a new VMCS - */ - if (!(vmcs = alloc_vmcs())) { - printk("Failed to create a new VMCS\n"); - return -ENOMEM; - } - - memset(&v->arch.arch_vmx, 0, sizeof (struct arch_vmx_struct)); - - v->arch.arch_vmx.vmcs = vmcs; - error = construct_vmcs( - &v->arch.arch_vmx, regs, ctxt, VMCS_USE_HOST_ENV); - if ( error < 0 ) - { - printk("Failed to construct a new VMCS\n"); - goto out; - } - +static void vmx_final_setup_guest(struct vcpu *v) +{ v->arch.schedule_tail = arch_vmx_do_launch; -#if defined (__i386__) - v->domain->arch.vmx_platform.real_mode_data = - (unsigned long *) regs->esi; -#endif - if (v == v->domain->vcpu[0]) { - /* + /* * Required to do this once per domain * XXX todo: add a seperate function to do these. */ - memset(&v->domain->shared_info->evtchn_mask[0], 0xff, + memset(&v->domain->shared_info->evtchn_mask[0], 0xff, sizeof(v->domain->shared_info->evtchn_mask)); /* Put the domain in shadow mode even though we're going to be using @@ -375,23 +344,6 @@ if (!vmx_switch_on) vmx_switch_on = 1; - - return 0; - -out: - free_vmcs(vmcs); - if(v->arch.arch_vmx.io_bitmap_a != 0) { - free_xenheap_pages( - v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000)); - v->arch.arch_vmx.io_bitmap_a = 0; - } - if(v->arch.arch_vmx.io_bitmap_b != 0) { - free_xenheap_pages( - v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000)); - v->arch.arch_vmx.io_bitmap_b = 0; - } - v->arch.arch_vmx.vmcs = 0; - return error; } #endif @@ -480,8 +432,7 @@ if ( !pagetable_get_paddr(d->arch.phys_table) ) d->arch.phys_table = v->arch.guest_table; - if ( (rc = vmx_final_setup_guest(v, c)) != 0 ) - return rc; + vmx_final_setup_guest(v); } update_pagetables(v); @@ -968,20 +919,7 @@ if ( !VMX_DOMAIN(v) ) return; - BUG_ON(v->arch.arch_vmx.vmcs == NULL); - free_vmcs(v->arch.arch_vmx.vmcs); - if(v->arch.arch_vmx.io_bitmap_a != 0) { - free_xenheap_pages( - v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000)); - v->arch.arch_vmx.io_bitmap_a = 0; - } - if(v->arch.arch_vmx.io_bitmap_b != 0) { - free_xenheap_pages( - v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000)); - v->arch.arch_vmx.io_bitmap_b = 0; - } - v->arch.arch_vmx.vmcs = 0; - + destroy_vmcs(&v->arch.arch_vmx); free_monitor_pagetable(v); rem_ac_timer(&v->domain->arch.vmx_platform.vmx_pit.pit_timer); } diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/mm.c Sat Oct 8 20:28:24 2005 @@ -147,6 +147,7 @@ /* Frame table and its size in pages. */ struct pfn_info *frame_table; unsigned long max_page; +unsigned long total_pages; void __init init_frametable(void) { diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/setup.c Sat Oct 8 20:28:24 2005 @@ -418,6 +418,7 @@ printk("System RAM: %luMB (%lukB)\n", nr_pages >> (20 - PAGE_SHIFT), nr_pages << (PAGE_SHIFT - 10)); + total_pages = nr_pages; init_frametable(); @@ -474,13 +475,14 @@ { static char dom0_cmdline[MAX_GUEST_CMDLINE]; - /* Skip past the image name. */ + /* Skip past the image name and copy to a local buffer. */ while ( *cmdline == ' ' ) cmdline++; if ( (cmdline = strchr(cmdline, ' ')) != NULL ) + { while ( *cmdline == ' ' ) cmdline++; - - /* Copy the command line to a local buffer. */ - strcpy(dom0_cmdline, cmdline); + strcpy(dom0_cmdline, cmdline); + } + cmdline = dom0_cmdline; /* Append any extra parameters. */ diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/traps.c Sat Oct 8 20:28:24 2005 @@ -895,6 +895,14 @@ *reg = pagetable_get_paddr(v->arch.guest_table); break; + case 4: /* Read CR4 */ + /* + * Guests can read CR4 to see what features Xen has enabled. We + * therefore lie about PGE & PSE as they are unavailable to guests. + */ + *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE); + break; + default: goto fail; } diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/vmx.c Sat Oct 8 20:28:24 2005 @@ -41,14 +41,12 @@ #if CONFIG_PAGING_LEVELS >= 3 #include <asm/shadow_64.h> #endif - +#include <public/sched.h> #include <public/io/ioreq.h> int hvm_enabled; #ifdef CONFIG_VMX - -int vmcs_size; unsigned int opt_vmx_debug_level = 0; integer_param("vmx_debug", opt_vmx_debug_level); @@ -71,7 +69,7 @@ /* * To avoid MSR save/restore at every VM exit/entry time, we restore * the x86_64 specific MSRs at domain switch time. Since those MSRs are - * are not modified once set for generic domains, we don't save them, + * are not modified once set for generic domains, we don't save them, * but simply reset them to the values set at percpu_traps_init(). */ void vmx_load_msrs(struct vcpu *n) @@ -162,13 +160,13 @@ static inline int long_mode_do_msr_write(struct cpu_user_regs *regs) { - u64 msr_content = regs->eax | ((u64)regs->edx << 32); + u64 msr_content = regs->eax | ((u64)regs->edx << 32); struct vcpu *vc = current; struct msr_state * msr = &vc->arch.arch_vmx.msr_content; - struct msr_state * host_state = + struct msr_state * host_state = &percpu_msr[smp_processor_id()]; - VMX_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n", + VMX_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n", regs->ecx, msr_content); switch (regs->ecx){ @@ -191,11 +189,11 @@ msr_content; if (msr_content & ~(EFER_LME | EFER_LMA)){ msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content; - if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){ + if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){ rdmsrl(MSR_EFER, host_state->msr_items[VMX_INDEX_MSR_EFER]); set_bit(VMX_INDEX_MSR_EFER, &host_state->flags); - set_bit(VMX_INDEX_MSR_EFER, &msr->flags); + set_bit(VMX_INDEX_MSR_EFER, &msr->flags); wrmsrl(MSR_EFER, msr_content); } } @@ -211,7 +209,7 @@ } if (regs->ecx == MSR_FS_BASE) __vmwrite(GUEST_FS_BASE, msr_content); - else + else __vmwrite(GUEST_GS_BASE, msr_content); break; @@ -233,14 +231,14 @@ } void -vmx_restore_msrs(struct vcpu *d) +vmx_restore_msrs(struct vcpu *v) { int i = 0; struct msr_state *guest_state; struct msr_state *host_state; unsigned long guest_flags ; - guest_state = &d->arch.arch_vmx.msr_content;; + guest_state = &v->arch.arch_vmx.msr_content;; host_state = &percpu_msr[smp_processor_id()]; wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs); @@ -276,13 +274,13 @@ void do_nmi(struct cpu_user_regs *, unsigned long); static int check_vmx_controls(ctrls, msr) -{ - u32 vmx_msr_low, vmx_msr_high; +{ + u32 vmx_msr_low, vmx_msr_high; rdmsr(msr, vmx_msr_low, vmx_msr_high); if (ctrls < vmx_msr_low || ctrls > vmx_msr_high) { printk("Insufficient VMX capability 0x%x, " - "msr=0x%x,low=0x%8x,high=0x%x\n", + "msr=0x%x,low=0x%8x,high=0x%x\n", ctrls, msr, vmx_msr_low, vmx_msr_high); return 0; } @@ -304,7 +302,7 @@ if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability))) return 0; - + rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx); if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) { @@ -314,28 +312,28 @@ } } else { - wrmsr(IA32_FEATURE_CONTROL_MSR, + wrmsr(IA32_FEATURE_CONTROL_MSR, IA32_FEATURE_CONTROL_MSR_LOCK | IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0); } - if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS, + if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS, MSR_IA32_VMX_PINBASED_CTLS_MSR)) return 0; - if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS, + if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS, MSR_IA32_VMX_PROCBASED_CTLS_MSR)) return 0; - if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS, + if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS, MSR_IA32_VMX_EXIT_CTLS_MSR)) return 0; - if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS, + if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS, MSR_IA32_VMX_ENTRY_CTLS_MSR)) return 0; set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */ if (!(vmcs = alloc_vmcs())) { - printk("Failed to allocate VMCS\n"); + printk("Failed to allocate VMCS\n"); return 0; } @@ -366,7 +364,7 @@ if ((len) < 1 || (len) > 15) \ __vmx_bug(&regs); -static void inline __update_guest_eip(unsigned long inst_len) +static void inline __update_guest_eip(unsigned long inst_len) { unsigned long current_eip; @@ -375,7 +373,7 @@ } -static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) +static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) { unsigned long gpa; /* FIXME: PAE */ int result; @@ -385,7 +383,7 @@ unsigned long eip; __vmread(GUEST_RIP, &eip); - VMX_DBG_LOG(DBG_LEVEL_VMMU, + VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx", va, eip, (unsigned long)regs->error_code); } @@ -427,12 +425,13 @@ static void vmx_do_no_device_fault(void) { unsigned long cr0; - + struct vcpu *v = current; + clts(); setup_fpu(current); - __vmread_vcpu(CR0_READ_SHADOW, &cr0); + __vmread_vcpu(v, CR0_READ_SHADOW, &cr0); if (!(cr0 & X86_CR0_TS)) { - __vmread_vcpu(GUEST_CR0, &cr0); + __vmread_vcpu(v, GUEST_CR0, &cr0); cr0 &= ~X86_CR0_TS; __vmwrite(GUEST_CR0, cr0); } @@ -440,14 +439,14 @@ } -static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs) +static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs) { unsigned int eax, ebx, ecx, edx; unsigned long eip; __vmread(GUEST_RIP, &eip); - VMX_DBG_LOG(DBG_LEVEL_1, + VMX_DBG_LOG(DBG_LEVEL_1, "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx," " (esi) %lx, (edi) %lx", (unsigned long)regs->eax, (unsigned long)regs->ebx, @@ -462,8 +461,8 @@ clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); #else - struct vcpu *d = current; - if (d->domain->arch.ops->guest_paging_levels == PAGING_L2) + struct vcpu *v = current; + if (v->domain->arch.ops->guest_paging_levels == PAGING_L2) { clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PAE, &edx); @@ -480,7 +479,7 @@ regs->ecx = (unsigned long) ecx; regs->edx = (unsigned long) edx; - VMX_DBG_LOG(DBG_LEVEL_1, + VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x", eip, input, eax, ebx, ecx, edx); @@ -500,7 +499,7 @@ reg = exit_qualification & DEBUG_REG_ACCESS_NUM; - VMX_DBG_LOG(DBG_LEVEL_1, + VMX_DBG_LOG(DBG_LEVEL_1, "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx", eip, reg, exit_qualification); @@ -513,16 +512,16 @@ CASE_GET_REG_P(ESI, esi); CASE_GET_REG_P(EDI, edi); case REG_ESP: - break; + break; default: __vmx_bug(regs); } - + switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) { - case TYPE_MOV_TO_DR: + case TYPE_MOV_TO_DR: /* don't need to check the range */ if (reg != REG_ESP) - v->arch.guest_context.debugreg[reg] = *reg_p; + v->arch.guest_context.debugreg[reg] = *reg_p; else { unsigned long value; __vmread(GUEST_RSP, &value); @@ -543,7 +542,7 @@ * Invalidate the TLB for va. Invalidate the shadow page corresponding * the address va. */ -static void vmx_vmexit_do_invlpg(unsigned long va) +static void vmx_vmexit_do_invlpg(unsigned long va) { unsigned long eip; struct vcpu *v = current; @@ -658,24 +657,24 @@ vmx_wait_io(); } -static void vmx_io_instruction(struct cpu_user_regs *regs, - unsigned long exit_qualification, unsigned long inst_len) -{ - struct mi_per_cpu_info *mpcip; +static void vmx_io_instruction(struct cpu_user_regs *regs, + unsigned long exit_qualification, unsigned long inst_len) +{ + struct mmio_op *mmio_opp; unsigned long eip, cs, eflags; unsigned long port, size, dir; int vm86; - mpcip = &current->domain->arch.vmx_platform.mpci; - mpcip->instr = INSTR_PIO; - mpcip->flags = 0; + mmio_opp = &current->arch.arch_vmx.mmio_op; + mmio_opp->instr = INSTR_PIO; + mmio_opp->flags = 0; __vmread(GUEST_RIP, &eip); __vmread(GUEST_CS_SELECTOR, &cs); __vmread(GUEST_RFLAGS, &eflags); vm86 = eflags & X86_EFLAGS_VM ? 1 : 0; - VMX_DBG_LOG(DBG_LEVEL_1, + VMX_DBG_LOG(DBG_LEVEL_1, "vmx_io_instruction: vm86 %d, eip=%lx:%lx, " "exit_qualification = %lx", vm86, cs, eip, exit_qualification); @@ -702,7 +701,7 @@ addr = dir == IOREQ_WRITE ? regs->esi : regs->edi; if (test_bit(5, &exit_qualification)) { /* "rep" prefix */ - mpcip->flags |= REPZ; + mmio_opp->flags |= REPZ; count = vm86 ? regs->ecx & 0xFFFF : regs->ecx; } @@ -713,7 +712,7 @@ if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) { unsigned long value = 0; - mpcip->flags |= OVERLAP; + mmio_opp->flags |= OVERLAP; if (dir == IOREQ_WRITE) vmx_copy(&value, addr, size, VMX_COPY_IN); send_pio_req(regs, port, 1, size, value, dir, 0); @@ -772,7 +771,7 @@ } int -vmx_world_save(struct vcpu *d, struct vmx_assist_context *c) +vmx_world_save(struct vcpu *v, struct vmx_assist_context *c) { unsigned long inst_len; int error = 0; @@ -784,7 +783,7 @@ error |= __vmread(GUEST_RFLAGS, &c->eflags); error |= __vmread(CR0_READ_SHADOW, &c->cr0); - c->cr3 = d->arch.arch_vmx.cpu_cr3; + c->cr3 = v->arch.arch_vmx.cpu_cr3; error |= __vmread(CR4_READ_SHADOW, &c->cr4); error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit); @@ -837,7 +836,7 @@ } int -vmx_world_restore(struct vcpu *d, struct vmx_assist_context *c) +vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c) { unsigned long mfn, old_cr4; int error = 0; @@ -848,45 +847,45 @@ error |= __vmwrite(CR0_READ_SHADOW, c->cr0); - if (!vmx_paging_enabled(d)) { + if (!vmx_paging_enabled(v)) { VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table"); - __vmwrite(GUEST_CR3, pagetable_get_paddr(d->domain->arch.phys_table)); + __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table)); goto skip_cr3; } - if (c->cr3 == d->arch.arch_vmx.cpu_cr3) { - /* - * This is simple TLB flush, implying the guest has + if (c->cr3 == v->arch.arch_vmx.cpu_cr3) { + /* + * This is simple TLB flush, implying the guest has * removed some translation or changed page attributes. * We simply invalidate the shadow. */ mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT); - if (mfn != pagetable_get_pfn(d->arch.guest_table)) { + if (mfn != pagetable_get_pfn(v->arch.guest_table)) { printk("Invalid CR3 value=%x", c->cr3); domain_crash_synchronous(); return 0; } - shadow_sync_all(d->domain); + shadow_sync_all(v->domain); } else { /* * If different, make a shadow. Check if the PDBR is valid * first. */ VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3); - if ((c->cr3 >> PAGE_SHIFT) > d->domain->max_pages) { + if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) { printk("Invalid CR3 value=%x", c->cr3); - domain_crash_synchronous(); + domain_crash_synchronous(); return 0; } mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT); - d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); - update_pagetables(d); - /* + v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + update_pagetables(v); + /* * arch.shadow_table should now hold the next CR3 for shadow */ - d->arch.arch_vmx.cpu_cr3 = c->cr3; + v->arch.arch_vmx.cpu_cr3 = c->cr3; VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3); - __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table)); + __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table)); } skip_cr3: @@ -947,7 +946,7 @@ enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE }; int -vmx_assist(struct vcpu *d, int mode) +vmx_assist(struct vcpu *v, int mode) { struct vmx_assist_context c; u32 magic; @@ -971,7 +970,7 @@ if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN)) goto error; if (cp != 0) { - if (!vmx_world_save(d, &c)) + if (!vmx_world_save(v, &c)) goto error; if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_OUT)) goto error; @@ -983,7 +982,7 @@ if (cp != 0) { if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN)) goto error; - if (!vmx_world_restore(d, &c)) + if (!vmx_world_restore(v, &c)) goto error; return 1; } @@ -1000,7 +999,7 @@ if (cp != 0) { if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN)) goto error; - if (!vmx_world_restore(d, &c)) + if (!vmx_world_restore(v, &c)) goto error; return 1; } @@ -1009,21 +1008,21 @@ error: printf("Failed to transfer to vmxassist\n"); - domain_crash_synchronous(); + domain_crash_synchronous(); return 0; } static int vmx_set_cr0(unsigned long value) { - struct vcpu *d = current; + struct vcpu *v = current; unsigned long mfn; unsigned long eip; int paging_enabled; unsigned long vm_entry_value; - /* + /* * CR0: We don't want to lose PE and PG. */ - paging_enabled = vmx_paging_enabled(d); + paging_enabled = vmx_paging_enabled(v); __vmwrite(GUEST_CR0, value | X86_CR0_PE | X86_CR0_PG | X86_CR0_NE); __vmwrite(CR0_READ_SHADOW, value); @@ -1034,33 +1033,33 @@ * The guest CR3 must be pointing to the guest physical. */ if ( !VALID_MFN(mfn = get_mfn_from_pfn( - d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) || - !get_page(pfn_to_page(mfn), d->domain) ) + v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) || + !get_page(pfn_to_page(mfn), v->domain) ) { - printk("Invalid CR3 value = %lx", d->arch.arch_vmx.cpu_cr3); + printk("Invalid CR3 value = %lx", v->arch.arch_vmx.cpu_cr3); domain_crash_synchronous(); /* need to take a clean path */ } #if defined(__x86_64__) if (test_bit(VMX_CPU_STATE_LME_ENABLED, - &d->arch.arch_vmx.cpu_state) && + &v->arch.arch_vmx.cpu_state) && !test_bit(VMX_CPU_STATE_PAE_ENABLED, - &d->arch.arch_vmx.cpu_state)){ + &v->arch.arch_vmx.cpu_state)){ VMX_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n"); - vmx_inject_exception(d, TRAP_gp_fault, 0); + vmx_inject_exception(v, TRAP_gp_fault, 0); } if (test_bit(VMX_CPU_STATE_LME_ENABLED, - &d->arch.arch_vmx.cpu_state)){ + &v->arch.arch_vmx.cpu_state)){ /* Here the PAE is should to be opened */ VMX_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n"); set_bit(VMX_CPU_STATE_LMA_ENABLED, - &d->arch.arch_vmx.cpu_state); + &v->arch.arch_vmx.cpu_state); __vmread(VM_ENTRY_CONTROLS, &vm_entry_value); vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE; __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); -#if CONFIG_PAGING_LEVELS >= 4 - if(!shadow_set_guest_paging_levels(d->domain, 4)) { +#if CONFIG_PAGING_LEVELS >= 4 + if(!shadow_set_guest_paging_levels(v->domain, 4)) { printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); /* need to take a clean path */ } @@ -1069,7 +1068,7 @@ else { #if CONFIG_PAGING_LEVELS >= 4 - if(!shadow_set_guest_paging_levels(d->domain, 2)) { + if(!shadow_set_guest_paging_levels(v->domain, 2)) { printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); /* need to take a clean path */ } @@ -1081,7 +1080,7 @@ __vmread(GUEST_CR4, &crn); if ( (!(crn & X86_CR4_PAE)) && test_bit(VMX_CPU_STATE_PAE_ENABLED, - &d->arch.arch_vmx.cpu_state)){ + &v->arch.arch_vmx.cpu_state)){ VMX_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n"); __vmwrite(GUEST_CR4, crn | X86_CR4_PAE); } @@ -1089,24 +1088,24 @@ /* * Now arch.guest_table points to machine physical. */ - d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); - update_pagetables(d); - - VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", + v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + update_pagetables(v); + + VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); - __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table)); - /* + __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table)); + /* * arch->shadow_table should hold the next CR3 for shadow */ - VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", - d->arch.arch_vmx.cpu_cr3, mfn); + VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", + v->arch.arch_vmx.cpu_cr3, mfn); } if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled) - if(d->arch.arch_vmx.cpu_cr3) + if(v->arch.arch_vmx.cpu_cr3) put_page(pfn_to_page(get_mfn_from_pfn( - d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT))); + v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT))); /* * VMX does not implement real-mode virtualization. We emulate @@ -1116,38 +1115,38 @@ if ((value & X86_CR0_PE) == 0) { if ( value & X86_CR0_PG ) { /* inject GP here */ - vmx_inject_exception(d, TRAP_gp_fault, 0); + vmx_inject_exception(v, TRAP_gp_fault, 0); return 0; } else { - /* + /* * Disable paging here. * Same to PE == 1 && PG == 0 */ if (test_bit(VMX_CPU_STATE_LMA_ENABLED, - &d->arch.arch_vmx.cpu_state)){ + &v->arch.arch_vmx.cpu_state)){ clear_bit(VMX_CPU_STATE_LMA_ENABLED, - &d->arch.arch_vmx.cpu_state); + &v->arch.arch_vmx.cpu_state); __vmread(VM_ENTRY_CONTROLS, &vm_entry_value); vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE; __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); } } - if (vmx_assist(d, VMX_ASSIST_INVOKE)) { - set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &d->arch.arch_vmx.cpu_state); + if (vmx_assist(v, VMX_ASSIST_INVOKE)) { + set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.arch_vmx.cpu_state); __vmread(GUEST_RIP, &eip); VMX_DBG_LOG(DBG_LEVEL_1, "Transfering control to vmxassist %%eip 0x%lx\n", eip); return 0; /* do not update eip! */ } } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED, - &d->arch.arch_vmx.cpu_state)) { + &v->arch.arch_vmx.cpu_state)) { __vmread(GUEST_RIP, &eip); VMX_DBG_LOG(DBG_LEVEL_1, "Enabling CR0.PE at %%eip 0x%lx\n", eip); - if (vmx_assist(d, VMX_ASSIST_RESTORE)) { + if (vmx_assist(v, VMX_ASSIST_RESTORE)) { clear_bit(VMX_CPU_STATE_ASSIST_ENABLED, - &d->arch.arch_vmx.cpu_state); + &v->arch.arch_vmx.cpu_state); __vmread(GUEST_RIP, &eip); VMX_DBG_LOG(DBG_LEVEL_1, "Restoring to %%eip 0x%lx\n", eip); @@ -1188,7 +1187,7 @@ { unsigned long value; unsigned long old_cr; - struct vcpu *d = current; + struct vcpu *v = current; switch (gp) { CASE_GET_REG(EAX, eax); @@ -1206,82 +1205,82 @@ printk("invalid gp: %d\n", gp); __vmx_bug(regs); } - + VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value); VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current); switch(cr) { - case 0: + case 0: { return vmx_set_cr0(value); } - case 3: + case 3: { unsigned long old_base_mfn, mfn; /* * If paging is not enabled yet, simply copy the value to CR3. */ - if (!vmx_paging_enabled(d)) { - d->arch.arch_vmx.cpu_cr3 = value; + if (!vmx_paging_enabled(v)) { + v->arch.arch_vmx.cpu_cr3 = value; break; } - + /* * We make a new one if the shadow does not exist. */ - if (value == d->arch.arch_vmx.cpu_cr3) { - /* - * This is simple TLB flush, implying the guest has + if (value == v->arch.arch_vmx.cpu_cr3) { + /* + * This is simple TLB flush, implying the guest has * removed some translation or changed page attributes. * We simply invalidate the shadow. */ mfn = get_mfn_from_pfn(value >> PAGE_SHIFT); - if (mfn != pagetable_get_pfn(d->arch.guest_table)) + if (mfn != pagetable_get_pfn(v->arch.guest_table)) __vmx_bug(regs); - shadow_sync_all(d->domain); + shadow_sync_all(v->domain); } else { /* * If different, make a shadow. Check if the PDBR is valid * first. */ VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); - if ( ((value >> PAGE_SHIFT) > d->domain->max_pages ) || + if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) || !VALID_MFN(mfn = get_mfn_from_pfn(value >> PAGE_SHIFT)) || - !get_page(pfn_to_page(mfn), d->domain) ) + !get_page(pfn_to_page(mfn), v->domain) ) { printk("Invalid CR3 value=%lx", value); domain_crash_synchronous(); /* need to take a clean path */ } - old_base_mfn = pagetable_get_pfn(d->arch.guest_table); - d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); if (old_base_mfn) put_page(pfn_to_page(old_base_mfn)); - update_pagetables(d); - /* + update_pagetables(v); + /* * arch.shadow_table should now hold the next CR3 for shadow */ - d->arch.arch_vmx.cpu_cr3 = value; + v->arch.arch_vmx.cpu_cr3 = value; VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); - __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table)); - } - break; - } - case 4: + __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table)); + } + break; + } + case 4: { /* CR4 */ unsigned long old_guest_cr; __vmread(GUEST_CR4, &old_guest_cr); if (value & X86_CR4_PAE){ - set_bit(VMX_CPU_STATE_PAE_ENABLED, &d->arch.arch_vmx.cpu_state); + set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state); } else { if (test_bit(VMX_CPU_STATE_LMA_ENABLED, - &d->arch.arch_vmx.cpu_state)){ - vmx_inject_exception(d, TRAP_gp_fault, 0); + &v->arch.arch_vmx.cpu_state)){ + vmx_inject_exception(v, TRAP_gp_fault, 0); } - clear_bit(VMX_CPU_STATE_PAE_ENABLED, &d->arch.arch_vmx.cpu_state); + clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state); } __vmread(CR4_READ_SHADOW, &old_cr); @@ -1294,7 +1293,7 @@ * all TLB entries except global entries. */ if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) { - shadow_sync_all(d->domain); + shadow_sync_all(v->domain); } break; } @@ -1317,12 +1316,12 @@ static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs) { unsigned long value; - struct vcpu *d = current; + struct vcpu *v = current; if (cr != 3) __vmx_bug(regs); - value = (unsigned long) d->arch.arch_vmx.cpu_cr3; + value = (unsigned long) v->arch.arch_vmx.cpu_cr3; switch (gp) { CASE_SET_REG(EAX, eax); @@ -1349,6 +1348,7 @@ { unsigned int gp, cr; unsigned long value; + struct vcpu *v = current; switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) { case TYPE_MOV_TO_CR: @@ -1371,17 +1371,17 @@ clts(); setup_fpu(current); - __vmread_vcpu(GUEST_CR0, &value); + __vmread_vcpu(v, GUEST_CR0, &value); value &= ~X86_CR0_TS; /* clear TS */ __vmwrite(GUEST_CR0, value); - __vmread_vcpu(CR0_READ_SHADOW, &value); + __vmread_vcpu(v, CR0_READ_SHADOW, &value); value &= ~X86_CR0_TS; /* clear TS */ __vmwrite(CR0_READ_SHADOW, value); break; case TYPE_LMSW: TRACE_VMEXIT(1,TYPE_LMSW); - __vmread_vcpu(CR0_READ_SHADOW, &value); + __vmread_vcpu(v, CR0_READ_SHADOW, &value); value = (value & ~0xF) | (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF); return vmx_set_cr0(value); @@ -1398,7 +1398,7 @@ u64 msr_content = 0; VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx", - (unsigned long)regs->ecx, (unsigned long)regs->eax, + (unsigned long)regs->ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); switch (regs->ecx) { case MSR_IA32_SYSENTER_CS: @@ -1431,7 +1431,7 @@ u64 msr_content; VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx", - (unsigned long)regs->ecx, (unsigned long)regs->eax, + (unsigned long)regs->ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32); @@ -1518,7 +1518,7 @@ char print_buf[BUF_SIZ]; static int index; -static void vmx_print_line(const char c, struct vcpu *d) +static void vmx_print_line(const char c, struct vcpu *v) { if (index == MAX_LINE || c == '\n') { @@ -1526,7 +1526,7 @@ print_buf[index++] = c; } print_buf[index] = '\0'; - printk("(GUEST: %u) %s\n", d->domain->domain_id, (char *) &print_buf); + printk("(GUEST: %u) %s\n", v->domain->domain_id, (char *) &print_buf); index = 0; } else @@ -1586,7 +1586,7 @@ if ((error = __vmread(VM_EXIT_REASON, &exit_reason))) __vmx_bug(&regs); - + perfc_incra(vmexits, exit_reason); __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field); @@ -1594,14 +1594,14 @@ __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len); - if (inst_len >= 1 && inst_len <= 15) + if (inst_len >= 1 && inst_len <= 15) __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len); if (idtv_info_field & 0x800) { /* valid error code */ unsigned long error_code; __vmread(IDT_VECTORING_ERROR_CODE, &error_code); __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); - } + } VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field); } @@ -1614,7 +1614,7 @@ if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { printk("Failed vm entry\n"); - domain_crash_synchronous(); + domain_crash_synchronous(); return; } @@ -1630,7 +1630,7 @@ case EXIT_REASON_EXCEPTION_NMI: { /* - * We don't set the software-interrupt exiting (INT n). + * We don't set the software-interrupt exiting (INT n). * (1) We can get an exception (e.g. #PG) in the guest, or * (2) NMI */ @@ -1682,22 +1682,22 @@ case TRAP_no_device: { vmx_do_no_device_fault(); - break; + break; } case TRAP_page_fault: { __vmread(EXIT_QUALIFICATION, &va); __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code); - + TRACE_VMEXIT(3,regs.error_code); TRACE_VMEXIT(4,va); - VMX_DBG_LOG(DBG_LEVEL_VMMU, + VMX_DBG_LOG(DBG_LEVEL_VMMU, "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", (unsigned long)regs.eax, (unsigned long)regs.ebx, (unsigned long)regs.ecx, (unsigned long)regs.edx, (unsigned long)regs.esi, (unsigned long)regs.edi); - v->domain->arch.vmx_platform.mpci.inst_decoder_regs = &regs; + v->arch.arch_vmx.mmio_op.inst_decoder_regs = &regs; if (!(error = vmx_do_page_fault(va, &regs))) { /* @@ -1718,11 +1718,11 @@ } break; } - case EXIT_REASON_EXTERNAL_INTERRUPT: + case EXIT_REASON_EXTERNAL_INTERRUPT: vmx_vmexit_do_extint(&regs); break; case EXIT_REASON_PENDING_INTERRUPT: - __vmwrite(CPU_BASED_VM_EXEC_CONTROL, + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, MONITOR_CPU_BASED_EXEC_CONTROLS); break; case EXIT_REASON_TASK_SWITCH: @@ -1762,7 +1762,7 @@ __get_instruction_length(inst_len); __vmread(EXIT_QUALIFICATION, &exit_qualification); - VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx", + VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx", eip, inst_len, exit_qualification); if (vmx_cr_access(exit_qualification, &regs)) __update_guest_eip(inst_len); @@ -1771,7 +1771,7 @@ break; } case EXIT_REASON_DR_ACCESS: - __vmread(EXIT_QUALIFICATION, &exit_qualification); + __vmread(EXIT_QUALIFICATION, &exit_qualification); vmx_dr_access(exit_qualification, &regs); __get_instruction_length(inst_len); __update_guest_eip(inst_len); @@ -1803,13 +1803,13 @@ asmlinkage void load_cr2(void) { - struct vcpu *d = current; - - local_irq_disable(); + struct vcpu *v = current; + + local_irq_disable(); #ifdef __i386__ - asm volatile("movl %0,%%cr2": :"r" (d->arch.arch_vmx.cpu_cr2)); + asm volatile("movl %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2)); #else - asm volatile("movq %0,%%cr2": :"r" (d->arch.arch_vmx.cpu_cr2)); + asm volatile("movq %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2)); #endif } @@ -1831,7 +1831,7 @@ TRACE_3D(TRC_VMEXIT,0,0,0); return; } -#endif +#endif #endif /* CONFIG_VMX */ /* diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/vmx_io.c --- a/xen/arch/x86/vmx_io.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/vmx_io.c Sat Oct 8 20:28:24 2005 @@ -1,5 +1,5 @@ /* - * vmx_io.c: handling I/O, interrupts related VMX entry/exit + * vmx_io.c: handling I/O, interrupts related VMX entry/exit * Copyright (c) 2004, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it @@ -42,7 +42,7 @@ #ifdef CONFIG_VMX #if defined (__i386__) void load_cpu_user_regs(struct cpu_user_regs *regs) -{ +{ /* * Write the guest register value into VMCS */ @@ -52,7 +52,7 @@ __vmwrite(GUEST_RFLAGS, regs->eflags); if (regs->eflags & EF_TF) __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); - else + else __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); __vmwrite(GUEST_CS_SELECTOR, regs->cs); @@ -189,7 +189,7 @@ __vmwrite(GUEST_RFLAGS, regs->rflags); if (regs->rflags & EF_TF) __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); - else + else __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); __vmwrite(GUEST_CS_SELECTOR, regs->cs); @@ -265,52 +265,52 @@ } switch (index) { - case 0: + case 0: __set_reg_value(&regs->rax, size, value); break; - case 1: + case 1: __set_reg_value(&regs->rcx, size, value); break; - case 2: + case 2: __set_reg_value(&regs->rdx, size, value); break; - case 3: + case 3: __set_reg_value(&regs->rbx, size, value); break; - case 4: + case 4: __set_reg_value(&regs->rsp, size, value); break; - case 5: + case 5: __set_reg_value(&regs->rbp, size, value); break; - case 6: + case 6: __set_reg_value(&regs->rsi, size, value); break; - case 7: + case 7: __set_reg_value(&regs->rdi, size, value); break; - case 8: + case 8: __set_reg_value(&regs->r8, size, value); break; - case 9: + case 9: __set_reg_value(&regs->r9, size, value); break; - case 10: + case 10: __set_reg_value(&regs->r10, size, value); break; - case 11: + case 11: __set_reg_value(&regs->r11, size, value); break; - case 12: + case 12: __set_reg_value(&regs->r12, size, value); break; - case 13: + case 13: __set_reg_value(&regs->r13, size, value); break; - case 14: + case 14: __set_reg_value(&regs->r14, size, value); break; - case 15: + case 15: __set_reg_value(&regs->r15, size, value); break; default: @@ -391,7 +391,7 @@ } static void vmx_pio_assist(struct cpu_user_regs *regs, ioreq_t *p, - struct mi_per_cpu_info *mpcip) + struct mmio_op *mmio_opp) { unsigned long old_eax; int sign = p->df ? -1 : 1; @@ -399,15 +399,15 @@ if (p->dir == IOREQ_WRITE) { if (p->pdata_valid) { regs->esi += sign * p->count * p->size; - if (mpcip->flags & REPZ) + if (mmio_opp->flags & REPZ) regs->ecx -= p->count; } } else { - if (mpcip->flags & OVERLAP) { + if (mmio_opp->flags & OVERLAP) { unsigned long addr; regs->edi += sign * p->count * p->size; - if (mpcip->flags & REPZ) + if (mmio_opp->flags & REPZ) regs->ecx -= p->count; addr = regs->edi; @@ -416,7 +416,7 @@ vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT); } else if (p->pdata_valid) { regs->edi += sign * p->count * p->size; - if (mpcip->flags & REPZ) + if (mmio_opp->flags & REPZ) regs->ecx -= p->count; } else { old_eax = regs->eax; @@ -439,18 +439,18 @@ } static void vmx_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p, - struct mi_per_cpu_info *mpcip) + struct mmio_op *mmio_opp) { int sign = p->df ? -1 : 1; int size = -1, index = -1; unsigned long value = 0, diff = 0; unsigned long src, dst; - src = mpcip->operand[0]; - dst = mpcip->operand[1]; + src = mmio_opp->operand[0]; + dst = mmio_opp->operand[1]; size = operand_size(src); - switch (mpcip->instr) { + switch (mmio_opp->instr) { case INSTR_MOV: if (dst & REGISTER) { index = operand_index(dst); @@ -475,7 +475,7 @@ regs->esi += sign * p->count * p->size; regs->edi += sign * p->count * p->size; - if ((mpcip->flags & OVERLAP) && p->dir == IOREQ_READ) { + if ((mmio_opp->flags & OVERLAP) && p->dir == IOREQ_READ) { unsigned long addr = regs->edi; if (sign > 0) @@ -483,14 +483,14 @@ vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT); } - if (mpcip->flags & REPZ) + if (mmio_opp->flags & REPZ) regs->ecx -= p->count; break; case INSTR_STOS: sign = p->df ? -1 : 1; regs->edi += sign * p->count * p->size; - if (mpcip->flags & REPZ) + if (mmio_opp->flags & REPZ) regs->ecx -= p->count; break; @@ -500,7 +500,7 @@ value = get_reg_value(size, index, 0, regs); diff = (unsigned long) p->u.data & value; } else if (src & IMMEDIATE) { - value = mpcip->immediate; + value = mmio_opp->immediate; diff = (unsigned long) p->u.data & value; } else if (src & MEMORY) { index = operand_index(dst); @@ -527,7 +527,7 @@ value = get_reg_value(size, index, 0, regs); diff = (unsigned long) p->u.data | value; } else if (src & IMMEDIATE) { - value = mpcip->immediate; + value = mmio_opp->immediate; diff = (unsigned long) p->u.data | value; } else if (src & MEMORY) { index = operand_index(dst); @@ -554,7 +554,7 @@ value = get_reg_value(size, index, 0, regs); diff = (unsigned long) p->u.data ^ value; } else if (src & IMMEDIATE) { - value = mpcip->immediate; + value = mmio_opp->immediate; diff = (unsigned long) p->u.data ^ value; } else if (src & MEMORY) { index = operand_index(dst); @@ -581,7 +581,7 @@ value = get_reg_value(size, index, 0, regs); diff = (unsigned long) p->u.data - value; } else if (src & IMMEDIATE) { - value = mpcip->immediate; + value = mmio_opp->immediate; diff = (unsigned long) p->u.data - value; } else if (src & MEMORY) { index = operand_index(dst); @@ -608,7 +608,7 @@ index = operand_index(src); value = get_reg_value(size, index, 0, regs); } else if (src & IMMEDIATE) { - value = mpcip->immediate; + value = mmio_opp->immediate; } else if (src & MEMORY) { index = operand_index(dst); value = get_reg_value(size, index, 0, regs); @@ -629,21 +629,21 @@ load_cpu_user_regs(regs); } -void vmx_io_assist(struct vcpu *v) +void vmx_io_assist(struct vcpu *v) { vcpu_iodata_t *vio; ioreq_t *p; struct cpu_user_regs *regs = guest_cpu_user_regs(); - struct mi_per_cpu_info *mpci_p; + struct mmio_op *mmio_opp; struct cpu_user_regs *inst_decoder_regs; - mpci_p = &v->domain->arch.vmx_platform.mpci; - inst_decoder_regs = mpci_p->inst_decoder_regs; + mmio_opp = &v->arch.arch_vmx.mmio_op; + inst_decoder_regs = mmio_opp->inst_decoder_regs; vio = get_vio(v->domain, v->vcpu_id); if (vio == 0) { - VMX_DBG_LOG(DBG_LEVEL_1, + VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx", (unsigned long) vio); printf("bad shared page: %lx\n", (unsigned long) vio); domain_crash_synchronous(); @@ -660,15 +660,15 @@ clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); if (p->type == IOREQ_TYPE_PIO) - vmx_pio_assist(regs, p, mpci_p); + vmx_pio_assist(regs, p, mmio_opp); else - vmx_mmio_assist(regs, p, mpci_p); + vmx_mmio_assist(regs, p, mmio_opp); } /* else an interrupt send event raced us */ } } -int vmx_clear_pending_io_event(struct vcpu *v) +int vmx_clear_pending_io_event(struct vcpu *v) { struct domain *d = v->domain; int port = iopacket_port(d); @@ -678,7 +678,7 @@ clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel); /* Note: VMX domains may need upcalls as well */ - if (!v->vcpu_info->evtchn_pending_sel) + if (!v->vcpu_info->evtchn_pending_sel) clear_bit(0, &v->vcpu_info->evtchn_upcall_pending); /* clear the pending bit for port */ @@ -688,18 +688,18 @@ /* Because we've cleared the pending events first, we need to guarantee that * all events to be handled by xen for VMX domains are taken care of here. * - * interrupts are guaranteed to be checked before resuming guest. - * VMX upcalls have been already arranged for if necessary. + * interrupts are guaranteed to be checked before resuming guest. + * VMX upcalls have been already arranged for if necessary. */ -void vmx_check_events(struct vcpu *d) -{ - /* clear the event *before* checking for work. This should avoid +void vmx_check_events(struct vcpu *v) +{ + /* clear the event *before* checking for work. This should avoid the set-and-check races */ if (vmx_clear_pending_io_event(current)) - vmx_io_assist(d); -} - -/* On exit from vmx_wait_io, we're guaranteed to have a I/O response from + vmx_io_assist(v); +} + +/* On exit from vmx_wait_io, we're guaranteed to have a I/O response from the device model */ void vmx_wait_io() { @@ -782,7 +782,7 @@ return __fls(pintr[0]); } -#define BSP_CPU(d) (!(d->vcpu_id)) +#define BSP_CPU(v) (!(v->vcpu_id)) static inline void clear_extint(struct vcpu *v) { global_iodata_t *spg; @@ -883,7 +883,7 @@ return ((eflags & X86_EFLAGS_IF) == 0); } -asmlinkage void vmx_intr_assist(void) +asmlinkage void vmx_intr_assist(void) { int intr_type = 0; int highest_vector; @@ -891,7 +891,7 @@ struct vcpu *v = current; highest_vector = find_highest_pending_irq(v, &intr_type); - __vmread_vcpu(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control); + __vmread_vcpu(v, CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control); if (highest_vector == -1) { disable_irq_window(cpu_exec_control); @@ -945,19 +945,19 @@ return; } -void vmx_do_resume(struct vcpu *d) +void vmx_do_resume(struct vcpu *v) { vmx_stts(); - if (event_pending(d)) { - vmx_check_events(d); - - if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) + if (event_pending(v)) { + vmx_check_events(v); + + if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) vmx_wait_io(); } /* We can't resume the guest if we're waiting on I/O */ - ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)); + ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)); } #endif /* CONFIG_VMX */ diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/vmx_platform.c --- a/xen/arch/x86/vmx_platform.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/vmx_platform.c Sat Oct 8 20:28:24 2005 @@ -22,8 +22,8 @@ #include <xen/mm.h> #include <asm/shadow.h> #include <xen/domain_page.h> -#include <asm/page.h> -#include <xen/event.h> +#include <asm/page.h> +#include <xen/event.h> #include <xen/trace.h> #include <asm/vmx.h> #include <asm/vmx_platform.h> @@ -69,16 +69,16 @@ } } -long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) +long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) { if (size == BYTE) { - switch (index) { + switch (index) { case 0: /* %al */ return (char)(regs->rax & 0xFF); case 1: /* %cl */ return (char)(regs->rcx & 0xFF); case 2: /* %dl */ - return (char)(regs->rdx & 0xFF); + return (char)(regs->rdx & 0xFF); case 3: /* %bl */ return (char)(regs->rbx & 0xFF); case 4: /* %ah */ @@ -90,7 +90,7 @@ case 7: /* %bh */ return (char)((regs->rbx & 0xFF00) >> 8); default: - printf("Error: (get_reg_value) Invalid index value\n"); + printf("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } /* NOTREACHED */ @@ -114,7 +114,7 @@ case 14: return __get_reg_value(regs->r14, size); case 15: return __get_reg_value(regs->r15, size); default: - printf("Error: (get_reg_value) Invalid index value\n"); + printf("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } } @@ -131,7 +131,7 @@ } static inline long __get_reg_value(unsigned long reg, int size) -{ +{ switch(size) { case WORD: return (short)(reg & 0xFFFF); @@ -144,15 +144,15 @@ } long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) -{ +{ if (size == BYTE) { - switch (index) { + switch (index) { case 0: /* %al */ return (char)(regs->eax & 0xFF); case 1: /* %cl */ return (char)(regs->ecx & 0xFF); case 2: /* %dl */ - return (char)(regs->edx & 0xFF); + return (char)(regs->edx & 0xFF); case 3: /* %bl */ return (char)(regs->ebx & 0xFF); case 4: /* %ah */ @@ -164,7 +164,7 @@ case 7: /* %bh */ return (char)((regs->ebx & 0xFF00) >> 8); default: - printf("Error: (get_reg_value) Invalid index value\n"); + printf("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } } @@ -179,7 +179,7 @@ case 6: return __get_reg_value(regs->esi, size); case 7: return __get_reg_value(regs->edi, size); default: - printf("Error: (get_reg_value) Invalid index value\n"); + printf("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } } @@ -283,9 +283,9 @@ //Only one operand in the instruction is register if (mod == 3) { - return (rm + (rex_b << 3)); + return (rm + (rex_b << 3)); } else { - return (reg + (rex_r << 3)); + return (reg + (rex_r << 3)); } return 0; } @@ -299,7 +299,7 @@ mmio_inst->operand[0] = 0; mmio_inst->operand[1] = 0; - + mmio_inst->flags = 0; } @@ -498,12 +498,12 @@ instr->instr = INSTR_MOVS; instr->op_size = BYTE; return DECODE_success; - + case 0xA5: /* movsw/movsl */ instr->instr = INSTR_MOVS; GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); return DECODE_success; - + case 0xAA: /* stosb */ instr->instr = INSTR_STOS; instr->op_size = BYTE; @@ -513,7 +513,7 @@ instr->instr = INSTR_STOS; GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); return DECODE_success; - + case 0xC6: if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm8, m8 */ instr->instr = INSTR_MOV; @@ -522,11 +522,11 @@ instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); instr->immediate = get_immediate(vm86, opcode+1, instr->op_size); instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); - + return DECODE_success; } else return DECODE_failure; - + case 0xC7: if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm16/32, m16/32 */ instr->instr = INSTR_MOV; @@ -535,7 +535,7 @@ instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); instr->immediate = get_immediate(vm86, opcode+1, instr->op_size); instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); - + return DECODE_success; } else return DECODE_failure; @@ -598,34 +598,34 @@ return inst_len; } -void send_mmio_req(unsigned char type, unsigned long gpa, +void send_mmio_req(unsigned char type, unsigned long gpa, unsigned long count, int size, long value, int dir, int pvalid) { - struct vcpu *d = current; + struct vcpu *v = current; vcpu_iodata_t *vio; ioreq_t *p; int vm86; struct cpu_user_regs *regs; extern long evtchn_send(int lport); - regs = current->domain->arch.vmx_platform.mpci.inst_decoder_regs; - - vio = get_vio(d->domain, d->vcpu_id); + regs = current->arch.arch_vmx.mmio_op.inst_decoder_regs; + + vio = get_vio(v->domain, v->vcpu_id); if (vio == NULL) { printf("bad shared page\n"); - domain_crash_synchronous(); + domain_crash_synchronous(); } p = &vio->vp_ioreq; vm86 = regs->eflags & X86_EFLAGS_VM; - if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) { + if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { printf("VMX I/O has not yet completed\n"); domain_crash_synchronous(); } - set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags); + set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); p->dir = dir; p->pdata_valid = pvalid; @@ -647,27 +647,27 @@ if (vmx_mmio_intercept(p)){ p->state = STATE_IORESP_READY; - vmx_io_assist(d); + vmx_io_assist(v); return; } - evtchn_send(iopacket_port(d->domain)); + evtchn_send(iopacket_port(v->domain)); vmx_wait_io(); } static void mmio_operands(int type, unsigned long gpa, struct instruction *inst, - struct mi_per_cpu_info *mpcip, struct cpu_user_regs *regs) + struct mmio_op *mmio_opp, struct cpu_user_regs *regs) { unsigned long value = 0; int index, size; - + size = operand_size(inst->operand[0]); - mpcip->flags = inst->flags; - mpcip->instr = inst->instr; - mpcip->operand[0] = inst->operand[0]; /* source */ - mpcip->operand[1] = inst->operand[1]; /* destination */ - mpcip->immediate = inst->immediate; + mmio_opp->flags = inst->flags; + mmio_opp->instr = inst->instr; + mmio_opp->operand[0] = inst->operand[0]; /* source */ + mmio_opp->operand[1] = inst->operand[1]; /* destination */ + mmio_opp->immediate = inst->immediate; if (inst->operand[0] & REGISTER) { /* dest is memory */ index = operand_index(inst->operand[0]); @@ -687,19 +687,19 @@ #define GET_REPEAT_COUNT() \ (mmio_inst.flags & REPZ ? (vm86 ? regs->ecx & 0xFFFF : regs->ecx) : 1) - + void handle_mmio(unsigned long va, unsigned long gpa) { unsigned long eip, eflags, cs; unsigned long inst_len, inst_addr; - struct mi_per_cpu_info *mpcip; + struct mmio_op *mmio_opp; struct cpu_user_regs *regs; struct instruction mmio_inst; unsigned char inst[MAX_INST_LEN]; int i, vm86, ret; - - mpcip = &current->domain->arch.vmx_platform.mpci; - regs = mpcip->inst_decoder_regs; + + mmio_opp = &current->arch.arch_vmx.mmio_op; + regs = mmio_opp->inst_decoder_regs; __vmread(GUEST_RIP, &eip); __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len); @@ -720,7 +720,7 @@ } init_instruction(&mmio_inst); - + if (vmx_decode(inst, &mmio_inst) == DECODE_failure) { printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:", va, gpa, inst_len); @@ -735,7 +735,7 @@ switch (mmio_inst.instr) { case INSTR_MOV: - mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mmio_opp, regs); break; case INSTR_MOVS: @@ -769,8 +769,8 @@ } } - mpcip->flags = mmio_inst.flags; - mpcip->instr = mmio_inst.instr; + mmio_opp->flags = mmio_inst.flags; + mmio_opp->instr = mmio_inst.instr; /* * In case of a movs spanning multiple pages, we break the accesses @@ -785,7 +785,7 @@ if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) { unsigned long value = 0; - mpcip->flags |= OVERLAP; + mmio_opp->flags |= OVERLAP; regs->eip -= inst_len; /* do not advance %eip */ @@ -808,7 +808,7 @@ } case INSTR_MOVZ: - mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mmio_opp, regs); break; case INSTR_STOS: @@ -816,31 +816,31 @@ * Since the destination is always in (contiguous) mmio space we don't * need to break it up into pages. */ - mpcip->flags = mmio_inst.flags; - mpcip->instr = mmio_inst.instr; + mmio_opp->flags = mmio_inst.flags; + mmio_opp->instr = mmio_inst.instr; send_mmio_req(IOREQ_TYPE_COPY, gpa, GET_REPEAT_COUNT(), mmio_inst.op_size, regs->eax, IOREQ_WRITE, 0); break; case INSTR_OR: - mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mpcip, regs); + mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mmio_opp, regs); break; case INSTR_AND: - mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mpcip, regs); + mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mmio_opp, regs); break; case INSTR_XOR: - mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mpcip, regs); + mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mmio_opp, regs); break; case INSTR_CMP: /* Pass through */ case INSTR_TEST: - mpcip->flags = mmio_inst.flags; - mpcip->instr = mmio_inst.instr; - mpcip->operand[0] = mmio_inst.operand[0]; /* source */ - mpcip->operand[1] = mmio_inst.operand[1]; /* destination */ - mpcip->immediate = mmio_inst.immediate; + mmio_opp->flags = mmio_inst.flags; + mmio_opp->instr = mmio_inst.instr; + mmio_opp->operand[0] = mmio_inst.operand[0]; /* source */ + mmio_opp->operand[1] = mmio_inst.operand[1]; /* destination */ + mmio_opp->immediate = mmio_inst.immediate; /* send the request and wait for the value */ send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, mmio_inst.op_size, 0, IOREQ_READ, 0); diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/vmx_vmcs.c --- a/xen/arch/x86/vmx_vmcs.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/vmx_vmcs.c Sat Oct 8 20:28:24 2005 @@ -37,6 +37,8 @@ #endif #ifdef CONFIG_VMX +int vmcs_size; + struct vmcs_struct *alloc_vmcs(void) { struct vmcs_struct *vmcs; @@ -51,13 +53,35 @@ return vmcs; } -void free_vmcs(struct vmcs_struct *vmcs) +static void free_vmcs(struct vmcs_struct *vmcs) { int order; order = get_order_from_bytes(vmcs_size); free_xenheap_pages(vmcs, order); } + +static int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr) +{ + int error; + + if ((error = __vmptrld(phys_ptr))) { + clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags); + return error; + } + set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags); + return 0; +} + +#if 0 +static int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr) +{ + /* take the current VMCS */ + __vmptrst(phys_ptr); + clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags); + return 0; +} +#endif static inline int construct_vmcs_controls(struct arch_vmx_struct *arch_vmx) { @@ -118,7 +142,7 @@ #endif }; -static void vmx_setup_platform(struct vcpu *v, struct cpu_user_regs *regs) +static void vmx_setup_platform(struct vcpu *v) { int i; unsigned char e820_map_nr; @@ -161,9 +185,6 @@ } unmap_domain_page(p); - if (v->vcpu_id) - return; - /* Initialise shared page */ mpfn = get_mfn_from_pfn(gpfn); if (mpfn == INVALID_MFN) { @@ -184,7 +205,7 @@ &v->domain->shared_info->evtchn_mask[0]); } -void vmx_set_host_env(struct vcpu *v) +static void vmx_set_host_env(struct vcpu *v) { unsigned int tr, cpu, error = 0; struct host_execution_env host_env; @@ -209,14 +230,13 @@ error |= __vmwrite(HOST_TR_BASE, host_env.tr_base); } -void vmx_do_launch(struct vcpu *v) +static void vmx_do_launch(struct vcpu *v) { /* Update CR3, GDT, LDT, TR */ unsigned int error = 0; unsigned long pfn = 0; unsigned long cr0, cr4; struct pfn_info *page; - struct cpu_user_regs *regs = guest_cpu_user_regs(); __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : ); @@ -246,7 +266,7 @@ page = (struct pfn_info *) alloc_domheap_page(NULL); pfn = (unsigned long) (page - frame_table); - vmx_setup_platform(v, regs); + vmx_setup_platform(v); vmx_set_host_env(v); @@ -267,8 +287,7 @@ /* * Initially set the same environement as host. */ -static inline int -construct_init_vmcs_guest(struct cpu_user_regs *regs) +static inline int construct_init_vmcs_guest(cpu_user_regs_t *regs) { int error = 0; union vmcs_arbytes arbytes; @@ -374,34 +393,33 @@ return error; } -static inline int construct_vmcs_host(struct host_execution_env *host_env) +static inline int construct_vmcs_host() { int error = 0; +#ifdef __x86_64__ + unsigned long fs_base; + unsigned long gs_base; +#endif unsigned long crn; /* Host Selectors */ - host_env->ds_selector = __HYPERVISOR_DS; - error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector); - error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector); - error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector); + error |= __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS); + error |= __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS); + error |= __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS); #if defined (__i386__) - error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector); - error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector); - error |= __vmwrite(HOST_FS_BASE, host_env->ds_base); - error |= __vmwrite(HOST_GS_BASE, host_env->ds_base); + error |= __vmwrite(HOST_FS_SELECTOR, __HYPERVISOR_DS); + error |= __vmwrite(HOST_GS_SELECTOR, __HYPERVISOR_DS); + error |= __vmwrite(HOST_FS_BASE, 0); + error |= __vmwrite(HOST_GS_BASE, 0); #else - rdmsrl(MSR_FS_BASE, host_env->fs_base); - rdmsrl(MSR_GS_BASE, host_env->gs_base); - error |= __vmwrite(HOST_FS_BASE, host_env->fs_base); - error |= __vmwrite(HOST_GS_BASE, host_env->gs_base); - -#endif - host_env->cs_selector = __HYPERVISOR_CS; - error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector); - - host_env->ds_base = 0; - host_env->cs_base = 0; + rdmsrl(MSR_FS_BASE, fs_base); + rdmsrl(MSR_GS_BASE, gs_base); + error |= __vmwrite(HOST_FS_BASE, fs_base); + error |= __vmwrite(HOST_GS_BASE, gs_base); + +#endif + error |= __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : ); error |= __vmwrite(HOST_CR0, crn); /* same CR0 */ @@ -423,55 +441,58 @@ /* * Need to extend to support full virtualization. - * The variable use_host_env indicates if the new VMCS needs to use - * the same setups as the host has (xenolinux). */ - -int construct_vmcs(struct arch_vmx_struct *arch_vmx, - struct cpu_user_regs *regs, - struct vcpu_guest_context *ctxt, - int use_host_env) +static int construct_vmcs(struct arch_vmx_struct *arch_vmx, + cpu_user_regs_t *regs) { int error; + long rc; u64 vmcs_phys_ptr; - struct host_execution_env host_env; - - if (use_host_env != VMCS_USE_HOST_ENV) - return -EINVAL; - - memset(&host_env, 0, sizeof(struct host_execution_env)); - + memset(arch_vmx, 0, sizeof(struct arch_vmx_struct)); + /* + * Create a new VMCS + */ + if (!(arch_vmx->vmcs = alloc_vmcs())) { + printk("Failed to create a new VMCS\n"); + rc = -ENOMEM; + goto err_out; + } vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs); - if ((error = __vmpclear (vmcs_phys_ptr))) { + if ((error = __vmpclear(vmcs_phys_ptr))) { printk("construct_vmcs: VMCLEAR failed\n"); - return -EINVAL; + rc = -EINVAL; + goto err_out; } if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) { printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n", (unsigned long) vmcs_phys_ptr); - return -EINVAL; + rc = -EINVAL; + goto err_out; } if ((error = construct_vmcs_controls(arch_vmx))) { printk("construct_vmcs: construct_vmcs_controls failed\n"); - return -EINVAL; + rc = -EINVAL; + goto err_out; } /* host selectors */ - if ((error = construct_vmcs_host(&host_env))) { + if ((error = construct_vmcs_host())) { printk("construct_vmcs: construct_vmcs_host failed\n"); - return -EINVAL; + rc = -EINVAL; + goto err_out; } /* guest selectors */ if ((error = construct_init_vmcs_guest(regs))) { printk("construct_vmcs: construct_vmcs_guest failed\n"); - return -EINVAL; - } - + rc = -EINVAL; + goto err_out; + } if ((error |= __vmwrite(EXCEPTION_BITMAP, MONITOR_DEFAULT_EXCEPTION_BITMAP))) { printk("construct_vmcs: setting Exception bitmap failed\n"); - return -EINVAL; + rc = -EINVAL; + goto err_out; } if (regs->eflags & EF_TF) @@ -480,6 +501,27 @@ __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); return 0; + +err_out: + destroy_vmcs(arch_vmx); + return rc; +} + +void destroy_vmcs(struct arch_vmx_struct *arch_vmx) +{ + if(arch_vmx->vmcs != NULL) + free_vmcs(arch_vmx->vmcs); + if(arch_vmx->io_bitmap_a != 0) { + free_xenheap_pages( + arch_vmx->io_bitmap_a, get_order_from_bytes(0x1000)); + arch_vmx->io_bitmap_a = 0; + } + if(arch_vmx->io_bitmap_b != 0) { + free_xenheap_pages( + arch_vmx->io_bitmap_b, get_order_from_bytes(0x1000)); + arch_vmx->io_bitmap_b = 0; + } + arch_vmx->vmcs = 0; } /* @@ -506,26 +548,6 @@ return 0; } -int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr) -{ - int error; - - if ((error = __vmptrld(phys_ptr))) { - clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags); - return error; - } - set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags); - return 0; -} - -int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr) -{ - /* take the current VMCS */ - __vmptrst(phys_ptr); - clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags); - return 0; -} - void vm_launch_fail(unsigned long eflags) { unsigned long error; @@ -553,9 +575,19 @@ void arch_vmx_do_launch(struct vcpu *v) { - u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs); - - load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr); + int error; + cpu_user_regs_t *regs = &current->arch.guest_context.user_regs; + + error = construct_vmcs(&v->arch.arch_vmx, regs); + if ( error < 0 ) + { + if (v->vcpu_id == 0) { + printk("Failed to construct a new VMCS for BSP.\n"); + } else { + printk("Failed to construct a new VMCS for AP %d\n", v->vcpu_id); + } + domain_crash_synchronous(); + } vmx_do_launch(v); reset_stack_and_jump(vmx_asm_do_launch); } diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/x86_32/entry.S Sat Oct 8 20:28:24 2005 @@ -808,7 +808,7 @@ .long do_vm_assist .long do_update_va_mapping_otherdomain .long do_switch_vm86 - .long do_boot_vcpu + .long do_vcpu_op .long do_ni_hypercall /* 25 */ .long do_mmuext_op .long do_acm_op /* 27 */ @@ -841,7 +841,7 @@ .byte 2 /* do_vm_assist */ .byte 5 /* do_update_va_mapping_otherdomain */ .byte 0 /* do_switch_vm86 */ - .byte 2 /* do_boot_vcpu */ + .byte 3 /* do_vcpu_op */ .byte 0 /* do_ni_hypercall */ /* 25 */ .byte 4 /* do_mmuext_op */ .byte 1 /* do_acm_op */ diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/x86_32/mm.c Sat Oct 8 20:28:24 2005 @@ -156,6 +156,7 @@ */ if ( (offsetof(struct pfn_info, u.inuse._domain) != (offsetof(struct pfn_info, count_info) + sizeof(u32))) || + ((offsetof(struct pfn_info, count_info) & 7) != 0) || (sizeof(struct pfn_info) != 24) ) { printk("Weird pfn_info layout (%ld,%ld,%d)\n", diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/x86_64/entry.S Sat Oct 8 20:28:24 2005 @@ -629,7 +629,7 @@ .quad do_vm_assist .quad do_update_va_mapping_otherdomain .quad do_switch_to_user - .quad do_boot_vcpu + .quad do_vcpu_op .quad do_set_segment_base /* 25 */ .quad do_mmuext_op .quad do_acm_op @@ -662,7 +662,7 @@ .byte 2 /* do_vm_assist */ .byte 4 /* do_update_va_mapping_otherdomain */ .byte 0 /* do_switch_to_user */ - .byte 2 /* do_boot_vcpu */ + .byte 3 /* do_vcpu_op */ .byte 2 /* do_set_segment_base */ /* 25 */ .byte 4 /* do_mmuext_op */ .byte 1 /* do_acm_op */ diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Sat Oct 8 17:37:45 2005 +++ b/xen/arch/x86/x86_64/mm.c Sat Oct 8 20:28:24 2005 @@ -137,8 +137,10 @@ * count_info and domain fields must be adjacent, as we perform atomic * 64-bit operations on them. */ - if ( (offsetof(struct pfn_info, u.inuse._domain) != - (offsetof(struct pfn_info, count_info) + sizeof(u32))) ) + if ( ((offsetof(struct pfn_info, u.inuse._domain) != + (offsetof(struct pfn_info, count_info) + sizeof(u32)))) || + ((offsetof(struct pfn_info, count_info) & 7) != 0) || + (sizeof(struct pfn_info) != 40) ) { printk("Weird pfn_info layout (%ld,%ld,%ld)\n", offsetof(struct pfn_info, count_info), diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/common/domain.c --- a/xen/common/domain.c Sat Oct 8 17:37:45 2005 +++ b/xen/common/domain.c Sat Oct 8 20:28:24 2005 @@ -18,6 +18,8 @@ #include <xen/domain_page.h> #include <asm/debugger.h> #include <public/dom0_ops.h> +#include <public/sched.h> +#include <public/vcpu.h> /* Both these structures are protected by the domlist_lock. */ rwlock_t domlist_lock = RW_LOCK_UNLOCKED; @@ -366,37 +368,17 @@ return rc; } -/* - * final_setup_guest is used for final setup and launching of domains other - * than domain 0. ie. the domains that are being built by the userspace dom0 - * domain builder. - */ -long do_boot_vcpu(unsigned long vcpu, struct vcpu_guest_context *ctxt) -{ - struct domain *d = current->domain; - struct vcpu *v; - int rc = 0; - struct vcpu_guest_context *c; - - if ( (vcpu >= MAX_VIRT_CPUS) || (d->vcpu[vcpu] != NULL) ) - return -EINVAL; - - if ( alloc_vcpu_struct(d, vcpu) == NULL ) +int boot_vcpu(struct domain *d, int vcpuid, struct vcpu_guest_context *ctxt) +{ + struct vcpu *v; + int rc; + + ASSERT(d->vcpu[vcpuid] == NULL); + + if ( alloc_vcpu_struct(d, vcpuid) == NULL ) return -ENOMEM; - if ( (c = xmalloc(struct vcpu_guest_context)) == NULL ) - { - rc = -ENOMEM; - goto out; - } - - if ( copy_from_user(c, ctxt, sizeof(*c)) ) - { - rc = -EFAULT; - goto out; - } - - v = d->vcpu[vcpu]; + v = d->vcpu[vcpuid]; atomic_set(&v->pausecnt, 0); v->cpumap = CPUMAP_RUNANYWHERE; @@ -405,22 +387,73 @@ arch_do_boot_vcpu(v); - if ( (rc = arch_set_info_guest(v, c)) != 0 ) + if ( (rc = arch_set_info_guest(v, ctxt)) != 0 ) goto out; sched_add_domain(v); - /* domain_unpause_by_systemcontroller */ - if ( test_and_clear_bit(_VCPUF_ctrl_pause, &v->vcpu_flags) ) - vcpu_wake(v); - - xfree(c); + set_bit(_VCPUF_down, &v->vcpu_flags); + clear_bit(_VCPUF_ctrl_pause, &v->vcpu_flags); + return 0; out: - xfree(c); - arch_free_vcpu_struct(d->vcpu[vcpu]); - d->vcpu[vcpu] = NULL; + arch_free_vcpu_struct(d->vcpu[vcpuid]); + d->vcpu[vcpuid] = NULL; + return rc; +} + +long do_vcpu_op(int cmd, int vcpuid, void *arg) +{ + struct domain *d = current->domain; + struct vcpu *v; + struct vcpu_guest_context *ctxt; + long rc = 0; + + if ( (vcpuid < 0) || (vcpuid >= MAX_VIRT_CPUS) ) + return -EINVAL; + + if ( ((v = d->vcpu[vcpuid]) == NULL) && (cmd != VCPUOP_initialise) ) + return -ENOENT; + + switch ( cmd ) + { + case VCPUOP_initialise: + if ( (ctxt = xmalloc(struct vcpu_guest_context)) == NULL ) + { + rc = -ENOMEM; + break; + } + + if ( copy_from_user(ctxt, arg, sizeof(*ctxt)) ) + { + xfree(ctxt); + rc = -EFAULT; + break; + } + + LOCK_BIGLOCK(d); + rc = (d->vcpu[vcpuid] == NULL) ? boot_vcpu(d, vcpuid, ctxt) : -EEXIST; + UNLOCK_BIGLOCK(d); + + xfree(ctxt); + break; + + case VCPUOP_up: + if ( test_and_clear_bit(_VCPUF_down, &v->vcpu_flags) ) + vcpu_wake(v); + break; + + case VCPUOP_down: + if ( !test_and_set_bit(_VCPUF_down, &v->vcpu_flags) ) + vcpu_sleep_nosync(v); + break; + + case VCPUOP_is_up: + rc = !test_bit(_VCPUF_down, &v->vcpu_flags); + break; + } + return rc; } diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/common/event_channel.c --- a/xen/common/event_channel.c Sat Oct 8 17:37:45 2005 +++ b/xen/common/event_channel.c Sat Oct 8 20:28:24 2005 @@ -36,7 +36,13 @@ #define evtchn_from_port(d,p) \ (&(bucket_from_port(d,p))[(p)&(EVTCHNS_PER_BUCKET-1)]) -#define ERROR_EXIT(_errno) do { rc = (_errno); goto out; } while ( 0 ) +#define ERROR_EXIT(_errno) \ + do { \ + DPRINTK("EVTCHNOP failure: domain %d, error %d, line %d\n", \ + current->domain->domain_id, (_errno), __LINE__); \ + rc = (_errno); \ + goto out; \ + } while ( 0 ) static int get_free_port(struct domain *d) { @@ -63,222 +69,133 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc) { struct evtchn *chn; + struct domain *d; + int port; + domid_t dom = alloc->dom; + long rc = 0; + + if ( dom == DOMID_SELF ) + dom = current->domain->domain_id; + else if ( !IS_PRIV(current->domain) ) + return -EPERM; + + if ( (d = find_domain_by_id(dom)) == NULL ) + return -ESRCH; + + spin_lock(&d->evtchn_lock); + + if ( (port = get_free_port(d)) < 0 ) + ERROR_EXIT(port); + chn = evtchn_from_port(d, port); + + chn->state = ECS_UNBOUND; + chn->u.unbound.remote_domid = alloc->remote_dom; + + alloc->port = port; + + out: + spin_unlock(&d->evtchn_lock); + + put_domain(d); + + return rc; +} + + +static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) +{ + struct evtchn *lchn, *rchn; + struct domain *ld = current->domain, *rd; + int lport, rport = bind->remote_port; + long rc = 0; + + if ( (rd = find_domain_by_id(bind->remote_dom)) == NULL ) + return -ESRCH; + + /* Avoid deadlock by first acquiring lock of domain with smaller id. */ + if ( ld < rd ) + { + spin_lock(&ld->evtchn_lock); + spin_lock(&rd->evtchn_lock); + } + else + { + if ( ld != rd ) + spin_lock(&rd->evtchn_lock); + spin_lock(&ld->evtchn_lock); + } + + if ( (lport = get_free_port(ld)) < 0 ) + ERROR_EXIT(lport); + lchn = evtchn_from_port(ld, lport); + + if ( !port_is_valid(rd, rport) ) + ERROR_EXIT(-EINVAL); + rchn = evtchn_from_port(rd, rport); + if ( (rchn->state != ECS_UNBOUND) || + (rchn->u.unbound.remote_domid != ld->domain_id) ) + ERROR_EXIT(-EINVAL); + + lchn->u.interdomain.remote_dom = rd; + lchn->u.interdomain.remote_port = (u16)rport; + lchn->state = ECS_INTERDOMAIN; + + rchn->u.interdomain.remote_dom = ld; + rchn->u.interdomain.remote_port = (u16)lport; + rchn->state = ECS_INTERDOMAIN; + + /* + * We may have lost notifications on the remote unbound port. Fix that up + * here by conservatively always setting a notification on the local port. + */ + evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport); + + bind->local_port = lport; + + out: + spin_unlock(&ld->evtchn_lock); + if ( ld != rd ) + spin_unlock(&rd->evtchn_lock); + + put_domain(rd); + + return rc; +} + + +static long evtchn_bind_virq(evtchn_bind_virq_t *bind) +{ + struct evtchn *chn; + struct vcpu *v; struct domain *d = current->domain; - int port = alloc->port; + int port, virq = bind->virq, vcpu = bind->vcpu; long rc = 0; - - spin_lock(&d->evtchn_lock); - - /* Obtain, or ensure that we already have, a valid <port>. */ - if ( port == 0 ) - { - if ( (port = get_free_port(d)) < 0 ) - ERROR_EXIT(port); - } - else if ( !port_is_valid(d, port) ) - ERROR_EXIT(-EINVAL); - chn = evtchn_from_port(d, port); - - /* Validate channel's current state. */ - switch ( chn->state ) - { - case ECS_FREE: - chn->state = ECS_UNBOUND; - chn->u.unbound.remote_domid = alloc->dom; - break; - - case ECS_UNBOUND: - if ( chn->u.unbound.remote_domid != alloc->dom ) - ERROR_EXIT(-EINVAL); - break; - - default: - ERROR_EXIT(-EINVAL); - } - - out: - spin_unlock(&d->evtchn_lock); - - alloc->port = port; - return rc; -} - - -static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind) -{ - struct evtchn *chn1, *chn2; - struct domain *d1, *d2; - int port1 = bind->port1, port2 = bind->port2; - domid_t dom1 = bind->dom1, dom2 = bind->dom2; - long rc = 0; - - if ( !IS_PRIV(current->domain) && (dom1 != DOMID_SELF) ) - return -EPERM; - - if ( dom1 == DOMID_SELF ) - dom1 = current->domain->domain_id; - if ( dom2 == DOMID_SELF ) - dom2 = current->domain->domain_id; - - if ( ((d1 = find_domain_by_id(dom1)) == NULL) || - ((d2 = find_domain_by_id(dom2)) == NULL) ) - { - if ( d1 != NULL ) - put_domain(d1); - return -ESRCH; - } - - /* Avoid deadlock by first acquiring lock of domain with smaller id. */ - if ( d1 < d2 ) - { - spin_lock(&d1->evtchn_lock); - spin_lock(&d2->evtchn_lock); - } - else - { - if ( d1 != d2 ) - spin_lock(&d2->evtchn_lock); - spin_lock(&d1->evtchn_lock); - } - - /* Obtain, or ensure that we already have, a valid <port1>. */ - if ( port1 == 0 ) - { - if ( (port1 = get_free_port(d1)) < 0 ) - ERROR_EXIT(port1); - } - else if ( !port_is_valid(d1, port1) ) - ERROR_EXIT(-EINVAL); - chn1 = evtchn_from_port(d1, port1); - - /* Obtain, or ensure that we already have, a valid <port2>. */ - if ( port2 == 0 ) - { - /* Make port1 non-free while we allocate port2 (in case dom1==dom2). */ - u16 state = chn1->state; - chn1->state = ECS_INTERDOMAIN; - port2 = get_free_port(d2); - chn1->state = state; - if ( port2 < 0 ) - ERROR_EXIT(port2); - } - else if ( !port_is_valid(d2, port2) ) - ERROR_EXIT(-EINVAL); - chn2 = evtchn_from_port(d2, port2); - - /* Validate <dom1,port1>'s current state. */ - switch ( chn1->state ) - { - case ECS_FREE: - break; - - case ECS_UNBOUND: - if ( chn1->u.unbound.remote_domid != dom2 ) - ERROR_EXIT(-EINVAL); - break; - - case ECS_INTERDOMAIN: - if ( chn1->u.interdomain.remote_dom != d2 ) - ERROR_EXIT(-EINVAL); - if ( (chn1->u.interdomain.remote_port != port2) && (bind->port2 != 0) ) - ERROR_EXIT(-EINVAL); - port2 = chn1->u.interdomain.remote_port; - goto out; - - default: - ERROR_EXIT(-EINVAL); - } - - /* Validate <dom2,port2>'s current state. */ - switch ( chn2->state ) - { - case ECS_FREE: - if ( !IS_PRIV(current->domain) && (dom2 != DOMID_SELF) ) - ERROR_EXIT(-EPERM); - break; - - case ECS_UNBOUND: - if ( chn2->u.unbound.remote_domid != dom1 ) - ERROR_EXIT(-EINVAL); - break; - - case ECS_INTERDOMAIN: - if ( chn2->u.interdomain.remote_dom != d1 ) - ERROR_EXIT(-EINVAL); - if ( (chn2->u.interdomain.remote_port != port1) && (bind->port1 != 0) ) - ERROR_EXIT(-EINVAL); - port1 = chn2->u.interdomain.remote_port; - goto out; - - default: - ERROR_EXIT(-EINVAL); - } - - /* - * Everything checked out okay -- bind <dom1,port1> to <dom2,port2>. - */ - - chn1->u.interdomain.remote_dom = d2; - chn1->u.interdomain.remote_port = (u16)port2; - chn1->state = ECS_INTERDOMAIN; - - chn2->u.interdomain.remote_dom = d1; - chn2->u.interdomain.remote_port = (u16)port1; - chn2->state = ECS_INTERDOMAIN; - - out: - spin_unlock(&d1->evtchn_lock); - if ( d1 != d2 ) - spin_unlock(&d2->evtchn_lock); - - put_domain(d1); - put_domain(d2); - - bind->port1 = port1; - bind->port2 = port2; - - return rc; -} - - -static long evtchn_bind_virq(evtchn_bind_virq_t *bind) -{ - struct evtchn *chn; - struct vcpu *v = current; - struct domain *d = v->domain; - int port, virq = bind->virq; if ( virq >= ARRAY_SIZE(v->virq_to_evtchn) ) return -EINVAL; - if ( d->domain_id == 0 && virq >= VIRQ_CONSOLE ) - v = d->vcpu[0]; + if ( (vcpu >= ARRAY_SIZE(d->vcpu)) || ((v = d->vcpu[vcpu]) == NULL) ) + return -ENOENT; spin_lock(&d->evtchn_lock); - /* - * Port 0 is the fallback port for VIRQs that haven't been explicitly - * bound yet. - */ - if ( ((port = v->virq_to_evtchn[virq]) != 0) || - ((port = get_free_port(d)) < 0) ) - goto out; + if ( v->virq_to_evtchn[virq] != 0 ) + ERROR_EXIT(-EEXIST); + + if ( (port = get_free_port(d)) < 0 ) + ERROR_EXIT(port); chn = evtchn_from_port(d, port); chn->state = ECS_VIRQ; - chn->notify_vcpu_id = v->vcpu_id; + chn->notify_vcpu_id = vcpu; chn->u.virq = virq; - v->virq_to_evtchn[virq] = port; + v->virq_to_evtchn[virq] = bind->port = port; out: spin_unlock(&d->evtchn_lock); - if ( port < 0 ) - return port; - - bind->port = port; - return 0; + return rc; } @@ -286,24 +203,27 @@ { struct evtchn *chn; struct domain *d = current->domain; - int port; + int port, vcpu = bind->vcpu; + long rc = 0; + + if ( (vcpu >= ARRAY_SIZE(d->vcpu)) || (d->vcpu[vcpu] == NULL) ) + return -ENOENT; spin_lock(&d->evtchn_lock); - if ( (port = get_free_port(d)) >= 0 ) - { - chn = evtchn_from_port(d, port); - chn->state = ECS_IPI; - chn->notify_vcpu_id = current->vcpu_id; - } - + if ( (port = get_free_port(d)) < 0 ) + ERROR_EXIT(port); + + chn = evtchn_from_port(d, port); + chn->state = ECS_IPI; + chn->notify_vcpu_id = vcpu; + + bind->port = port; + + out: spin_unlock(&d->evtchn_lock); - if ( port < 0 ) - return port; - - bind->port = port; - return 0; + return rc; } @@ -311,16 +231,19 @@ { struct evtchn *chn; struct domain *d = current->domain; - int port, rc, pirq = bind->pirq; + int port, pirq = bind->pirq; + long rc; if ( pirq >= ARRAY_SIZE(d->pirq_to_evtchn) ) return -EINVAL; spin_lock(&d->evtchn_lock); - if ( ((rc = port = d->pirq_to_evtchn[pirq]) != 0) || - ((rc = port = get_free_port(d)) < 0) ) - goto out; + if ( d->pirq_to_evtchn[pirq] != 0 ) + ERROR_EXIT(-EEXIST); + + if ( (port = get_free_port(d)) < 0 ) + ERROR_EXIT(port); chn = evtchn_from_port(d, port); @@ -336,14 +259,12 @@ chn->state = ECS_PIRQ; chn->u.pirq = pirq; + bind->port = port; + out: spin_unlock(&d->evtchn_lock); - if ( rc < 0 ) - return rc; - - bind->port = port; - return 0; + return rc; } @@ -457,22 +378,7 @@ static long evtchn_close(evtchn_close_t *close) { - struct domain *d; - long rc; - domid_t dom = close->dom; - - if ( dom == DOMID_SELF ) - dom = current->domain->domain_id; - else if ( !IS_PRIV(current->domain) ) - return -EPERM; - - if ( (d = find_domain_by_id(dom)) == NULL ) - return -ESRCH; - - rc = __evtchn_close(d, close->port); - - put_domain(d); - return rc; + return __evtchn_close(current->domain, close->port); } @@ -502,6 +408,9 @@ case ECS_IPI: evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport); break; + case ECS_UNBOUND: + /* silently drop the notification */ + break; default: ret = -EINVAL; } @@ -590,9 +499,8 @@ struct evtchn *chn; long rc = 0; - if ( (vcpu >= MAX_VIRT_CPUS) || (d->vcpu[vcpu] == NULL) ) { - return -EINVAL; - } + if ( (vcpu >= ARRAY_SIZE(d->vcpu)) || (d->vcpu[vcpu] == NULL) ) + return -ENOENT; spin_lock(&d->evtchn_lock); @@ -668,7 +576,7 @@ break; case EVTCHNOP_send: - rc = evtchn_send(op.u.send.local_port); + rc = evtchn_send(op.u.send.port); break; case EVTCHNOP_status: diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Sat Oct 8 17:37:45 2005 +++ b/xen/common/sched_sedf.c Sat Oct 8 20:28:24 2005 @@ -500,9 +500,15 @@ curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id); __del_from_queue(curinf->vcpu); - + /*move them to their next period*/ curinf->deadl_abs += curinf->period; + /*ensure that the start of the next period is in the future*/ + if (unlikely(PERIOD_BEGIN(curinf) < now)) { + curinf->deadl_abs += + (DIV_UP(now - PERIOD_BEGIN(curinf), + curinf->period)) * curinf->period; + } /*and put them back into the queue*/ __add_to_waitqueue_sort(curinf->vcpu); continue; @@ -645,7 +651,7 @@ s_time_t end_xt, struct list_head *extraq[], int cpu) { struct task_slice ret; struct sedf_vcpu_info *runinf; - + ASSERT(end_xt > now); /* Enough time left to use for extratime? */ if (end_xt - now < EXTRA_QUANTUM) goto return_idle; diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/common/schedule.c --- a/xen/common/schedule.c Sat Oct 8 17:37:45 2005 +++ b/xen/common/schedule.c Sat Oct 8 20:28:24 2005 @@ -36,6 +36,7 @@ #include <xen/softirq.h> #include <xen/trace.h> #include <xen/mm.h> +#include <public/sched.h> #include <public/sched_ctl.h> extern void arch_getdomaininfo_ctxt(struct vcpu *, @@ -270,74 +271,11 @@ return 0; } -/* Mark target vcpu as non-runnable so it is not scheduled */ -static long do_vcpu_down(int vcpu) -{ - struct vcpu *target; - - if ( vcpu > MAX_VIRT_CPUS ) - return -EINVAL; - - target = current->domain->vcpu[vcpu]; - if ( target == NULL ) - return -ESRCH; - set_bit(_VCPUF_down, &target->vcpu_flags); - - return 0; -} - -/* Mark target vcpu as runnable and wake it */ -static long do_vcpu_up(int vcpu) -{ - struct vcpu *target; - - if (vcpu > MAX_VIRT_CPUS) - return -EINVAL; - - target = current->domain->vcpu[vcpu]; - if ( target == NULL ) - return -ESRCH; - clear_bit(_VCPUF_down, &target->vcpu_flags); - /* wake vcpu */ - vcpu_wake(target); - - return 0; -} - -static long do_vcpu_pickle(int vcpu, unsigned long arg) -{ - struct vcpu *v; - vcpu_guest_context_t *c; - int ret = 0; - - if (vcpu >= MAX_VIRT_CPUS) - return -EINVAL; - v = current->domain->vcpu[vcpu]; - if (!v) - return -ESRCH; - /* Don't pickle vcpus which are currently running */ - if (!test_bit(_VCPUF_down, &v->vcpu_flags)) { - return -EBUSY; - } - c = xmalloc(vcpu_guest_context_t); - if (!c) - return -ENOMEM; - arch_getdomaininfo_ctxt(v, c); - if (copy_to_user((vcpu_guest_context_t *)arg, - (const vcpu_guest_context_t *)c, sizeof(*c))) - ret = -EFAULT; - xfree(c); - return ret; -} - -/* - * Demultiplex scheduler-related hypercalls. - */ -long do_sched_op(unsigned long op, unsigned long arg) +long do_sched_op(int cmd, unsigned long arg) { long ret = 0; - switch ( op & SCHEDOP_cmdmask ) + switch ( cmd ) { case SCHEDOP_yield: { @@ -354,24 +292,8 @@ case SCHEDOP_shutdown: { TRACE_3D(TRC_SCHED_SHUTDOWN, - current->domain->domain_id, current->vcpu_id, - (op >> SCHEDOP_reasonshift)); - domain_shutdown((u8)(op >> SCHEDOP_reasonshift)); - break; - } - case SCHEDOP_vcpu_down: - { - ret = do_vcpu_down((int)(op >> SCHEDOP_vcpushift)); - break; - } - case SCHEDOP_vcpu_up: - { - ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift)); - break; - } - case SCHEDOP_vcpu_pickle: - { - ret = do_vcpu_pickle((int)(op >> SCHEDOP_vcpushift), arg); + current->domain->domain_id, current->vcpu_id, arg); + domain_shutdown((u8)arg); break; } @@ -395,8 +317,8 @@ return 0; } -/** sched_id - fetch ID of current scheduler */ -int sched_id() +/* sched_id - fetch ID of current scheduler */ +int sched_id(void) { return ops.sched_id; } diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Sat Oct 8 17:37:45 2005 +++ b/xen/include/asm-x86/mm.h Sat Oct 8 20:28:24 2005 @@ -22,9 +22,6 @@ /* Each frame can be threaded onto a doubly-linked list. */ struct list_head list; - /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ - u32 tlbflush_timestamp; - /* Reference count and various PGC_xxx flags and fields. */ u32 count_info; @@ -37,17 +34,20 @@ u32 _domain; /* pickled format */ /* Type reference count and various PGT_xxx flags and fields. */ unsigned long type_info; - } inuse; + } __attribute__ ((packed)) inuse; /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ struct { + /* Order-size of the free chunk this page is the head of. */ + u32 order; /* Mask of possibly-tainted TLBs. */ cpumask_t cpumask; - /* Order-size of the free chunk this page is the head of. */ - u8 order; - } free; + } __attribute__ ((packed)) free; } u; + + /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ + u32 tlbflush_timestamp; }; /* The following page types are MUTUALLY EXCLUSIVE. */ @@ -156,6 +156,7 @@ extern struct pfn_info *frame_table; extern unsigned long max_page; +extern unsigned long total_pages; void init_frametable(void); int alloc_page_type(struct pfn_info *page, unsigned long type); diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/asm-x86/vmx.h --- a/xen/include/asm-x86/vmx.h Sat Oct 8 17:37:45 2005 +++ b/xen/include/asm-x86/vmx.h Sat Oct 8 20:28:24 2005 @@ -39,7 +39,6 @@ extern void arch_vmx_do_resume(struct vcpu *); extern void arch_vmx_do_relaunch(struct vcpu *); -extern int vmcs_size; extern unsigned int cpu_rev; /* @@ -315,10 +314,8 @@ } -static always_inline void __vmwrite_vcpu(unsigned long field, unsigned long value) -{ - struct vcpu *v = current; - +static always_inline void __vmwrite_vcpu(struct vcpu *v, unsigned long field, unsigned long value) +{ switch(field) { case CR0_READ_SHADOW: v->arch.arch_vmx.cpu_shadow_cr0 = value; @@ -335,10 +332,8 @@ } } -static always_inline void __vmread_vcpu(unsigned long field, unsigned long *value) -{ - struct vcpu *v = current; - +static always_inline void __vmread_vcpu(struct vcpu *v, unsigned long field, unsigned long *value) +{ switch(field) { case CR0_READ_SHADOW: *value = v->arch.arch_vmx.cpu_shadow_cr0; @@ -353,24 +348,15 @@ printk("__vmread_cpu: invalid field %lx\n", field); break; } - - /* - * __vmwrite() can be used for non-current vcpu, and it's possible that - * the vcpu field is not initialized at that case. - * - */ - if (!*value) { - __vmread(field, value); - __vmwrite_vcpu(field, *value); - } } static inline int __vmwrite (unsigned long field, unsigned long value) { unsigned long eflags; + struct vcpu *v = current; __asm__ __volatile__ ( VMWRITE_OPCODE - MODRM_EAX_ECX + MODRM_EAX_ECX : : "a" (field) , "c" (value) : "memory"); @@ -382,7 +368,7 @@ case CR0_READ_SHADOW: case GUEST_CR0: case CPU_BASED_VM_EXEC_CONTROL: - __vmwrite_vcpu(field, value); + __vmwrite_vcpu(v, field, value); break; } @@ -438,23 +424,24 @@ static inline void vmx_stts(void) { unsigned long cr0; - - __vmread_vcpu(GUEST_CR0, &cr0); + struct vcpu *v = current; + + __vmread_vcpu(v, GUEST_CR0, &cr0); if (!(cr0 & X86_CR0_TS)) { __vmwrite(GUEST_CR0, cr0 | X86_CR0_TS); } - __vmread_vcpu(CR0_READ_SHADOW, &cr0); + __vmread_vcpu(v, CR0_READ_SHADOW, &cr0); if (!(cr0 & X86_CR0_TS)) __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM); } -/* Works only for ed == current */ +/* Works only for vcpu == current */ static inline int vmx_paging_enabled(struct vcpu *v) { unsigned long cr0; - __vmread_vcpu(CR0_READ_SHADOW, &cr0); + __vmread_vcpu(v, CR0_READ_SHADOW, &cr0); return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG); } diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/asm-x86/vmx_platform.h --- a/xen/include/asm-x86/vmx_platform.h Sat Oct 8 17:37:45 2005 +++ b/xen/include/asm-x86/vmx_platform.h Sat Oct 8 20:28:24 2005 @@ -75,20 +75,11 @@ #define MAX_INST_LEN 32 -struct mi_per_cpu_info { - int flags; - int instr; /* instruction */ - unsigned long operand[2]; /* operands */ - unsigned long immediate; /* immediate portion */ - struct cpu_user_regs *inst_decoder_regs; /* current context */ -}; - struct virtual_platform_def { unsigned long *real_mode_data; /* E820, etc. */ unsigned long shared_page_va; struct vmx_virpit_t vmx_pit; struct vmx_handler_t vmx_handler; - struct mi_per_cpu_info mpci; /* MMIO */ }; extern void handle_mmio(unsigned long, unsigned long); diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/asm-x86/vmx_vmcs.h --- a/xen/include/asm-x86/vmx_vmcs.h Sat Oct 8 17:37:45 2005 +++ b/xen/include/asm-x86/vmx_vmcs.h Sat Oct 8 20:28:24 2005 @@ -53,6 +53,8 @@ unsigned char data [0]; /* vmcs size is read from MSR */ }; +extern int vmcs_size; + enum { VMX_INDEX_MSR_LSTAR = 0, VMX_INDEX_MSR_STAR, @@ -67,6 +69,14 @@ unsigned long flags; unsigned long msr_items[VMX_MSR_COUNT]; unsigned long shadow_gs; +}; + +struct mmio_op { + int flags; + int instr; /* instruction */ + unsigned long operand[2]; /* operands */ + unsigned long immediate; /* immediate portion */ + struct cpu_user_regs *inst_decoder_regs; /* current context */ }; #define PC_DEBUG_PORT 0x80 @@ -81,7 +91,8 @@ unsigned long cpu_state; unsigned long cpu_based_exec_control; struct msr_state msr_content; - void *io_bitmap_a, *io_bitmap_b; + struct mmio_op mmio_op; /* MMIO */ + void *io_bitmap_a, *io_bitmap_b; }; #define vmx_schedule_tail(next) \ @@ -94,18 +105,11 @@ #define ARCH_VMX_VMCS_RESUME 2 /* Needs VMCS resume */ #define ARCH_VMX_IO_WAIT 3 /* Waiting for I/O completion */ -void vmx_do_launch(struct vcpu *); -void vmx_do_resume(struct vcpu *); -void vmx_set_host_env(struct vcpu *); - +void vmx_do_resume(struct vcpu *); struct vmcs_struct *alloc_vmcs(void); -void free_vmcs(struct vmcs_struct *); -int load_vmcs(struct arch_vmx_struct *, u64); -int store_vmcs(struct arch_vmx_struct *, u64); -int construct_vmcs(struct arch_vmx_struct *, struct cpu_user_regs *, - struct vcpu_guest_context *, int); int modify_vmcs(struct arch_vmx_struct *arch_vmx, struct cpu_user_regs *regs); +void destroy_vmcs(struct arch_vmx_struct *arch_vmx); #define VMCS_USE_HOST_ENV 1 #define VMCS_USE_SEPARATE_ENV 0 diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Sat Oct 8 17:37:45 2005 +++ b/xen/include/public/dom0_ops.h Sat Oct 8 20:28:24 2005 @@ -19,7 +19,7 @@ * This makes sure that old versions of dom0 tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define DOM0_INTERFACE_VERSION 0xAAAA1010 +#define DOM0_INTERFACE_VERSION 0xAAAA1011 /************************************************************************/ diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/public/event_channel.h --- a/xen/include/public/event_channel.h Sat Oct 8 17:37:45 2005 +++ b/xen/include/public/event_channel.h Sat Oct 8 20:28:24 2005 @@ -10,63 +10,55 @@ #define __XEN_PUBLIC_EVENT_CHANNEL_H__ /* - * EVTCHNOP_alloc_unbound: Prepare a local port for binding to <dom>. - * <port> may be wildcarded by setting to zero, in which case a fresh port - * will be allocated, and the field filled in on return. + * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as + * accepting interdomain bindings from domain <remote_dom>. A fresh port + * is allocated in <dom> and returned as <port>. + * NOTES: + * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. */ #define EVTCHNOP_alloc_unbound 6 typedef struct evtchn_alloc_unbound { /* IN parameters */ - domid_t dom; - /* IN/OUT parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ u32 port; } evtchn_alloc_unbound_t; /* * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between - * <dom1> and <dom2>. Either <port1> or <port2> may be wildcarded by setting to - * zero. On successful return both <port1> and <port2> are filled in and - * <dom1,port1> is fully bound to <dom2,port2>. - * - * NOTES: - * 1. A wildcarded port is allocated from the relevant domain's free list - * (i.e., some port that was previously EVTCHNSTAT_closed). However, if the - * remote port pair is already fully bound then a port is not allocated, - * and instead the existing local port is returned to the caller. - * 2. If the caller is unprivileged then <dom1> must be DOMID_SELF. - * 3. If the caller is unprivileged and <dom2,port2> is EVTCHNSTAT_closed - * then <dom2> must be DOMID_SELF. - * 4. If either port is already bound then it must be bound to the other - * specified domain and port (if not wildcarded). - * 5. If either port is awaiting binding (EVTCHNSTAT_unbound) then it must - * be awaiting binding to the other domain, and the other port pair must - * be closed or unbound. + * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * <local_port>. */ #define EVTCHNOP_bind_interdomain 0 typedef struct evtchn_bind_interdomain { /* IN parameters. */ - domid_t dom1, dom2; - /* IN/OUT parameters. */ - u32 port1, port2; + domid_t remote_dom; + u32 remote_port; + /* OUT parameters. */ + u32 local_port; } evtchn_bind_interdomain_t; /* - * EVTCHNOP_bind_virq: Bind a local event channel to IRQ <irq> on calling vcpu. + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified + * vcpu. * NOTES: * 1. A virtual IRQ may be bound to at most one event channel per vcpu. - * 2. The allocated event channel is bound to the calling vcpu. The binding + * 2. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ #define EVTCHNOP_bind_virq 1 typedef struct evtchn_bind_virq { /* IN parameters. */ u32 virq; + u32 vcpu; /* OUT parameters. */ u32 port; } evtchn_bind_virq_t; /* - * EVTCHNOP_bind_pirq: Bind a local event channel to IRQ <irq>. + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. * NOTES: * 1. A physical IRQ may be bound to at most one event channel per domain. * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. @@ -84,41 +76,35 @@ /* * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. * NOTES: - * 1. The allocated event channel is bound to the calling vcpu. The binding + * 1. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ #define EVTCHNOP_bind_ipi 7 typedef struct evtchn_bind_ipi { + u32 vcpu; /* OUT parameters. */ u32 port; } evtchn_bind_ipi_t; /* - * EVTCHNOP_close: Close the communication channel which has an endpoint at - * <dom, port>. If the channel is interdomain then the remote end is placed in - * the unbound state (EVTCHNSTAT_unbound), awaiting a new connection. - * NOTES: - * 1. <dom> may be specified as DOMID_SELF. - * 2. Only a sufficiently-privileged domain may close an event channel - * for which <dom> is not DOMID_SELF. + * EVTCHNOP_close: Close a local event channel <port>. If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. */ #define EVTCHNOP_close 3 typedef struct evtchn_close { /* IN parameters. */ - domid_t dom; - u32 port; - /* No OUT parameters. */ + u32 port; } evtchn_close_t; /* * EVTCHNOP_send: Send an event to the remote end of the channel whose local - * endpoint is <DOMID_SELF, local_port>. + * endpoint is <port>. */ #define EVTCHNOP_send 4 typedef struct evtchn_send { /* IN parameters. */ - u32 local_port; - /* No OUT parameters. */ + u32 port; } evtchn_send_t; /* diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/public/xen.h --- a/xen/include/public/xen.h Sat Oct 8 17:37:45 2005 +++ b/xen/include/public/xen.h Sat Oct 8 20:28:24 2005 @@ -55,7 +55,7 @@ #define __HYPERVISOR_update_va_mapping_otherdomain 22 #define __HYPERVISOR_switch_vm86 23 /* x86/32 only */ #define __HYPERVISOR_switch_to_user 23 /* x86/64 only */ -#define __HYPERVISOR_boot_vcpu 24 +#define __HYPERVISOR_vcpu_op 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 #define __HYPERVISOR_acm_op 27 @@ -194,29 +194,6 @@ #define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ #define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ #define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ - -/* - * Commands to HYPERVISOR_sched_op(). - */ -#define SCHEDOP_yield 0 /* Give up the CPU voluntarily. */ -#define SCHEDOP_block 1 /* Block until an event is received. */ -#define SCHEDOP_shutdown 2 /* Stop executing this domain. */ -#define SCHEDOP_vcpu_down 3 /* make target VCPU not-runnable. */ -#define SCHEDOP_vcpu_up 4 /* make target VCPU runnable. */ -#define SCHEDOP_vcpu_pickle 5 /* save a vcpu's context to memory. */ -#define SCHEDOP_cmdmask 255 /* 8-bit command. */ -#define SCHEDOP_reasonshift 8 /* 8-bit reason code. (SCHEDOP_shutdown) */ -#define SCHEDOP_vcpushift 8 /* 8-bit VCPU target. (SCHEDOP_up|down) */ - -/* - * Reason codes for SCHEDOP_shutdown. These may be interpreted by control - * software to determine the appropriate action. For the most part, Xen does - * not care about the shutdown code (SHUTDOWN_crash excepted). - */ -#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ -#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ -#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ -#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ /* * Commands to HYPERVISOR_console_io(). diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/arch/xen/i386/kernel/vsyscall-note.S --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/vsyscall-note.S Sat Oct 8 20:28:24 2005 @@ -0,0 +1,32 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + * First we get the vanilla i386 note that supplies the kernel version info. + */ + +#include "../../../i386/kernel/vsyscall-note.S" + +/* + * Now we add a special note telling glibc's dynamic linker a fake hardware + * flavor that it will use to choose the search path for libraries in the + * same way it uses real hardware capabilities like "mmx". + * We supply "nosegneg" as the fake capability, to indicate that we + * do not like negative offsets in instructions using segment overrides, + * since we implement those inefficiently. This makes it possible to + * install libraries optimized to avoid those access patterns in someplace + * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file + * corresponding to the bits here is needed to make ldconfig work right. + * It should contain: + * hwcap 0 nosegneg + * to match the mapping of bit to name that we give here. + */ +#define NOTE_KERNELCAP_BEGIN(ncaps, mask) \ + ASM_ELF_NOTE_BEGIN(".note.kernelcap", "a", "GNU", 2) \ + .long ncaps, mask +#define NOTE_KERNELCAP(bit, name) \ + .byte bit; .asciz name +#define NOTE_KERNELCAP_END ASM_ELF_NOTE_END + +NOTE_KERNELCAP_BEGIN(1, 1) +NOTE_KERNELCAP(1, "nosegneg") /* Change 1 back to 0 when glibc is fixed! */ +NOTE_KERNELCAP_END diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/char/tpm/Kconfig --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/Kconfig Sat Oct 8 20:28:24 2005 @@ -0,0 +1,58 @@ +# +# TPM device configuration +# + +menu "TPM devices" + +config TCG_TPM + tristate "TPM Hardware Support" + depends on EXPERIMENTAL && PCI + ---help--- + If you have a TPM security chip in your system, which + implements the Trusted Computing Group's specification, + say Yes and it will be accessible from within Linux. For + more information see <http://www.trustedcomputinggroup.org>. + An implementation of the Trusted Software Stack (TSS), the + userspace enablement piece of the specification, can be + obtained at: <http://sourceforge.net/projects/trousers>. To + compile this driver as a module, choose M here; the module + will be called tpm. If unsure, say N. + +config TCG_TIS + tristate "TPM Interface Specification 1.2 Interface" + depends on TCG_TPM + ---help--- + If you have a TPM security chip that is compliant with the + TCG TIS 1.2 TPM specification say Yes and it will be accessible + from within Linux. To compile this driver as a module, choose + M here; the module will be called tpm_tis. + +config TCG_NSC + tristate "National Semiconductor TPM Interface" + depends on TCG_TPM + ---help--- + If you have a TPM security chip from National Semicondutor + say Yes and it will be accessible from within Linux. To + compile this driver as a module, choose M here; the module + will be called tpm_nsc. + +config TCG_ATMEL + tristate "Atmel TPM Interface" + depends on TCG_TPM + ---help--- + If you have a TPM security chip from Atmel say Yes and it + will be accessible from within Linux. To compile this driver + as a module, choose M here; the module will be called tpm_atmel. + +config TCG_XEN + tristate "XEN TPM Interface" + depends on TCG_TPM && ARCH_XEN && XEN_TPMDEV_FRONTEND + ---help--- + If you want to make TPM support available to a Xen + user domain, say Yes and it will + be accessible from within Linux. To compile this driver + as a module, choose M here; the module will be called + tpm_xen. + +endmenu + diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/char/tpm/tpm.c --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c Sat Oct 8 20:28:24 2005 @@ -0,0 +1,657 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * Note, the TPM chip is not interrupt driven (only polling) + * and can have very long timeouts (minutes!). Hence the unusual + * calls to msleep. + * + */ + +#include <linux/sched.h> +#include <linux/poll.h> +#include <linux/spinlock.h> +#include "tpm.h" + +#define TPM_CHIP_NUM_MASK 0x0000ffff +#define TPM_CHIP_TYPE_SHIFT 16 + +enum tpm_const { + TPM_MINOR = 224, /* officially assigned */ + TPM_MIN_BUFSIZE = 2048, + TPM_MAX_BUFSIZE = 64 * 1024, + TPM_NUM_DEVICES = 256, + TPM_NUM_MASK_ENTRIES = TPM_NUM_DEVICES / (8 * sizeof(int)) +}; + +static LIST_HEAD(tpm_chip_list); +static DEFINE_SPINLOCK(driver_lock); +static int dev_mask[TPM_NUM_MASK_ENTRIES]; + +static void user_reader_timeout(unsigned long ptr) +{ + struct tpm_chip *chip = (struct tpm_chip *) ptr; + + down(&chip->buffer_mutex); + atomic_set(&chip->data_pending, 0); + memset(chip->data_buffer, 0, chip->vendor->buffersize); + up(&chip->buffer_mutex); +} + +/* + * Internal kernel interface to transmit TPM commands + */ +static ssize_t tpm_transmit(struct tpm_chip * chip, const char *buf, + size_t bufsiz) +{ + ssize_t rc; + u32 count; + unsigned long stop; + + if (!chip) + return -ENODEV; + + if ( !chip ) + return -ENODEV; + + count = be32_to_cpu(*((__be32 *) (buf + 2))); + + if (count == 0) + return -ENODATA; + if (count > bufsiz) { + dev_err(chip->dev, + "invalid count value %x %zx \n", count, bufsiz); + return -E2BIG; + } + + down(&chip->tpm_mutex); + + if ((rc = chip->vendor->send(chip, (u8 *) buf, count)) < 0) { + dev_err(chip->dev, + "tpm_transmit: tpm_send: error %zd\n", rc); + goto out; + } + + stop = jiffies + 2 * 60 * HZ; + do { + u8 status = chip->vendor->status(chip); + if ((status & chip->vendor->req_complete_mask) == + chip->vendor->req_complete_val) { + goto out_recv; + } + + if ((status == chip->vendor->req_canceled)) { + dev_err(chip->dev, "Operation Canceled\n"); + rc = -ECANCELED; + goto out; + } + + msleep(TPM_TIMEOUT); /* CHECK */ + rmb(); + } while (time_before(jiffies, stop)); + + + chip->vendor->cancel(chip); + dev_err(chip->dev, "Operation Timed out\n"); + rc = -ETIME; + goto out; + +out_recv: + rc = chip->vendor->recv(chip, (u8 *) buf, bufsiz); + if (rc < 0) + dev_err(chip->dev, + "tpm_transmit: tpm_recv: error %zd\n", rc); +out: + up(&chip->tpm_mutex); + return rc; +} + +#define TPM_DIGEST_SIZE 20 +#define CAP_PCR_RESULT_SIZE 18 +static const u8 cap_pcr[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 22, /* length */ + 0, 0, 0, 101, /* TPM_ORD_GetCapability */ + 0, 0, 0, 5, + 0, 0, 0, 4, + 0, 0, 1, 1 +}; + +#define READ_PCR_RESULT_SIZE 30 +static const u8 pcrread[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 14, /* length */ + 0, 0, 0, 21, /* TPM_ORD_PcrRead */ + 0, 0, 0, 0 /* PCR index */ +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) +ssize_t tpm_show_pcrs(struct device *dev, struct device_attribute *attr, + char *buf) +#else +ssize_t tpm_show_pcrs(struct device *dev, + char *buf) +#endif +{ + u8 data[READ_PCR_RESULT_SIZE]; + ssize_t len; + int i, j, num_pcrs; + __be32 index; + char *str = buf; + + struct tpm_chip *chip = dev_get_drvdata(dev); + if (chip == NULL) + return -ENODEV; + + memcpy(data, cap_pcr, sizeof(cap_pcr)); + if ((len = tpm_transmit(chip, data, sizeof(data))) + < CAP_PCR_RESULT_SIZE) { + dev_dbg(chip->dev, "A TPM error (%d) occurred " + "attempting to determine the number of PCRS\n", + be32_to_cpu(*((__be32 *) (data + 6)))); + return 0; + } + + num_pcrs = be32_to_cpu(*((__be32 *) (data + 14))); + + for (i = 0; i < num_pcrs; i++) { + memcpy(data, pcrread, sizeof(pcrread)); + index = cpu_to_be32(i); + memcpy(data + 10, &index, 4); + if ((len = tpm_transmit(chip, data, sizeof(data))) + < READ_PCR_RESULT_SIZE){ + dev_dbg(chip->dev, "A TPM error (%d) occurred" + " attempting to read PCR %d of %d\n", + be32_to_cpu(*((__be32 *) (data + 6))), i, num_pcrs); + goto out; + } + str += sprintf(str, "PCR-%02d: ", i); + for (j = 0; j < TPM_DIGEST_SIZE; j++) + str += sprintf(str, "%02X ", *(data + 10 + j)); + str += sprintf(str, "\n"); + } +out: + return str - buf; +} +EXPORT_SYMBOL_GPL(tpm_show_pcrs); + +#define READ_PUBEK_RESULT_SIZE 314 +static const u8 readpubek[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 30, /* length */ + 0, 0, 0, 124, /* TPM_ORD_ReadPubek */ +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) +ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr, + char *buf) +#else +ssize_t tpm_show_pubek(struct device *dev, + char *buf) +#endif +{ + u8 *data; + ssize_t len; + int i, rc; + char *str = buf; + + struct tpm_chip *chip = dev_get_drvdata(dev); + if (chip == NULL) + return -ENODEV; + + data = kmalloc(READ_PUBEK_RESULT_SIZE, GFP_KERNEL); + if (!data) + return -ENOMEM; + + memcpy(data, readpubek, sizeof(readpubek)); + memset(data + sizeof(readpubek), 0, 20); /* zero nonce */ + + if ((len = tpm_transmit(chip, data, READ_PUBEK_RESULT_SIZE)) < + READ_PUBEK_RESULT_SIZE) { + dev_dbg(chip->dev, "A TPM error (%d) occurred " + "attempting to read the PUBEK\n", + be32_to_cpu(*((__be32 *) (data + 6)))); + rc = 0; + goto out; + } + + /* + ignore header 10 bytes + algorithm 32 bits (1 == RSA ) + encscheme 16 bits + sigscheme 16 bits + parameters (RSA 12->bytes: keybit, #primes, expbit) + keylenbytes 32 bits + 256 byte modulus + ignore checksum 20 bytes + */ + + str += + sprintf(str, + "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n" + "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X" + " %02X %02X %02X %02X %02X %02X %02X %02X\n" + "Modulus length: %d\nModulus: \n", + data[10], data[11], data[12], data[13], data[14], + data[15], data[16], data[17], data[22], data[23], + data[24], data[25], data[26], data[27], data[28], + data[29], data[30], data[31], data[32], data[33], + be32_to_cpu(*((__be32 *) (data + 34)))); + + for (i = 0; i < 256; i++) { + str += sprintf(str, "%02X ", data[i + 38]); + if ((i + 1) % 16 == 0) + str += sprintf(str, "\n"); + } + rc = str - buf; +out: + kfree(data); + return rc; +} + +EXPORT_SYMBOL_GPL(tpm_show_pubek); + +#define CAP_VER_RESULT_SIZE 18 +static const u8 cap_version[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 18, /* length */ + 0, 0, 0, 101, /* TPM_ORD_GetCapability */ + 0, 0, 0, 6, + 0, 0, 0, 0 +}; + +#define CAP_MANUFACTURER_RESULT_SIZE 18 +static const u8 cap_manufacturer[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 22, /* length */ + 0, 0, 0, 101, /* TPM_ORD_GetCapability */ + 0, 0, 0, 5, + 0, 0, 0, 4, + 0, 0, 1, 3 +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) +ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr, + char *buf) +#else +ssize_t tpm_show_caps(struct device *dev, + char *buf) +#endif +{ + u8 data[sizeof(cap_manufacturer)]; + ssize_t len; + char *str = buf; + + struct tpm_chip *chip = dev_get_drvdata(dev); + if (chip == NULL) + return -ENODEV; + + memcpy(data, cap_manufacturer, sizeof(cap_manufacturer)); + + if ((len = tpm_transmit(chip, data, sizeof(data))) < + CAP_MANUFACTURER_RESULT_SIZE) + return len; + + str += sprintf(str, "Manufacturer: 0x%x\n", + be32_to_cpu(*((__be32 *) (data + 14)))); + + memcpy(data, cap_version, sizeof(cap_version)); + + if ((len = tpm_transmit(chip, data, sizeof(data))) < + CAP_VER_RESULT_SIZE) + return len; + + str += + sprintf(str, "TCG version: %d.%d\nFirmware version: %d.%d\n", + (int) data[14], (int) data[15], (int) data[16], + (int) data[17]); + + return str - buf; +} +EXPORT_SYMBOL_GPL(tpm_show_caps); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) +ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +#else +ssize_t tpm_store_cancel(struct device *dev, + const char *buf, size_t count) +#endif +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + if (chip == NULL) + return 0; + + chip->vendor->cancel(chip); + return count; +} +EXPORT_SYMBOL_GPL(tpm_store_cancel); + + +/* + * Device file system interface to the TPM + */ +int tpm_open(struct inode *inode, struct file *file) +{ + int rc = 0, minor = iminor(inode); + struct tpm_chip *chip = NULL, *pos; + + spin_lock(&driver_lock); + + list_for_each_entry(pos, &tpm_chip_list, list) { + if (pos->vendor->miscdev.minor == minor) { + chip = pos; + break; + } + } + + if (chip == NULL) { + rc = -ENODEV; + goto err_out; + } + + if (chip->num_opens) { + dev_dbg(chip->dev, + "Another process owns this TPM\n"); + rc = -EBUSY; + goto err_out; + } + + chip->num_opens++; + get_device(chip->dev); + + spin_unlock(&driver_lock); + + chip->data_buffer = kmalloc(chip->vendor->buffersize * sizeof(u8), GFP_KERNEL); + if (chip->data_buffer == NULL) { + chip->num_opens--; + put_device(chip->dev); + return -ENOMEM; + } + + atomic_set(&chip->data_pending, 0); + + file->private_data = chip; + return 0; + +err_out: + spin_unlock(&driver_lock); + return rc; +} + +EXPORT_SYMBOL_GPL(tpm_open); + +int tpm_release(struct inode *inode, struct file *file) +{ + struct tpm_chip *chip = file->private_data; + + spin_lock(&driver_lock); + file->private_data = NULL; + chip->num_opens--; + del_singleshot_timer_sync(&chip->user_read_timer); + atomic_set(&chip->data_pending, 0); + put_device(chip->dev); + kfree(chip->data_buffer); + spin_unlock(&driver_lock); + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_release); + +ssize_t tpm_write(struct file * file, const char __user * buf, + size_t size, loff_t * off) +{ + struct tpm_chip *chip = file->private_data; + int in_size = size, out_size; + + /* cannot perform a write until the read has cleared + either via tpm_read or a user_read_timer timeout */ + while (atomic_read(&chip->data_pending) != 0) + msleep(TPM_TIMEOUT); + + down(&chip->buffer_mutex); + + if (in_size > chip->vendor->buffersize) + in_size = chip->vendor->buffersize; + + if (copy_from_user + (chip->data_buffer, (void __user *) buf, in_size)) { + up(&chip->buffer_mutex); + return -EFAULT; + } + + /* atomic tpm command send and result receive */ + out_size = tpm_transmit(chip, chip->data_buffer, + chip->vendor->buffersize); + + atomic_set(&chip->data_pending, out_size); + atomic_set(&chip->data_position, 0); + up(&chip->buffer_mutex); + + /* Set a timeout by which the reader must come claim the result */ + mod_timer(&chip->user_read_timer, jiffies + (60 * HZ)); + + return in_size; +} + +EXPORT_SYMBOL_GPL(tpm_write); + +ssize_t tpm_read(struct file * file, char __user * buf, + size_t size, loff_t * off) +{ + struct tpm_chip *chip = file->private_data; + int ret_size; + int pos, pending = 0; + + ret_size = atomic_read(&chip->data_pending); + if (ret_size > 0) { /* relay data */ + if (size < ret_size) + ret_size = size; + + pos = atomic_read(&chip->data_position); + + down(&chip->buffer_mutex); + if (copy_to_user + ((void __user *) buf, &chip->data_buffer[pos], ret_size)) { + ret_size = -EFAULT; + } else { + pending = atomic_read(&chip->data_pending) - ret_size; + if ( pending ) { + atomic_set( &chip->data_pending, pending ); + atomic_set( &chip->data_position, pos+ret_size ); + } + } + up(&chip->buffer_mutex); + } + + if ( ret_size <= 0 || pending == 0 ) { + atomic_set( &chip->data_pending, 0 ); + del_singleshot_timer_sync(&chip->user_read_timer); + } + + return ret_size; +} + +EXPORT_SYMBOL_GPL(tpm_read); + +void tpm_remove_hardware(struct device *dev) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + + if (chip == NULL) { + dev_err(dev, "No device data found\n"); + return; + } + + spin_lock(&driver_lock); + + list_del(&chip->list); + + spin_unlock(&driver_lock); + + dev_set_drvdata(dev, NULL); + misc_deregister(&chip->vendor->miscdev); + kfree(chip->vendor->miscdev.name); + + sysfs_remove_group(&dev->kobj, chip->vendor->attr_group); + + dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &= !(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES)); + + kfree(chip); + + put_device(dev); +} + +EXPORT_SYMBOL_GPL(tpm_remove_hardware); + +static u8 savestate[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 10, /* blob length (in bytes) */ + 0, 0, 0, 152 /* TPM_ORD_SaveState */ +}; + +/* + * We are about to suspend. Save the TPM state + * so that it can be restored. + */ +int tpm_pm_suspend(struct pci_dev *pci_dev, pm_message_t pm_state) +{ + struct tpm_chip *chip = pci_get_drvdata(pci_dev); + if (chip == NULL) + return -ENODEV; + + tpm_transmit(chip, savestate, sizeof(savestate)); + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_pm_suspend); + +/* + * Resume from a power safe. The BIOS already restored + * the TPM state. + */ +int tpm_pm_resume(struct pci_dev *pci_dev) +{ + struct tpm_chip *chip = pci_get_drvdata(pci_dev); + + if (chip == NULL) + return -ENODEV; + + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_pm_resume); + +/* + * Called from tpm_<specific>.c probe function only for devices + * the driver has determined it should claim. Prior to calling + * this function the specific probe function has called pci_enable_device + * upon errant exit from this function specific probe function should call + * pci_disable_device + */ +int tpm_register_hardware(struct device *dev, + struct tpm_vendor_specific *entry) +{ +#define DEVNAME_SIZE 7 + + char *devname; + struct tpm_chip *chip; + int i, j; + + /* Driver specific per-device data */ + chip = kmalloc(sizeof(*chip), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + + memset(chip, 0, sizeof(struct tpm_chip)); + + init_MUTEX(&chip->buffer_mutex); + init_MUTEX(&chip->tpm_mutex); + INIT_LIST_HEAD(&chip->list); + + init_timer(&chip->user_read_timer); + chip->user_read_timer.function = user_reader_timeout; + chip->user_read_timer.data = (unsigned long) chip; + + chip->vendor = entry; + + if (entry->buffersize < TPM_MIN_BUFSIZE) { + entry->buffersize = TPM_MIN_BUFSIZE; + } else if (entry->buffersize > TPM_MAX_BUFSIZE) { + entry->buffersize = TPM_MAX_BUFSIZE; + } + + chip->dev_num = -1; + + for (i = 0; i < TPM_NUM_MASK_ENTRIES; i++) + for (j = 0; j < 8 * sizeof(int); j++) + if ((dev_mask[i] & (1 << j)) == 0) { + chip->dev_num = + i * TPM_NUM_MASK_ENTRIES + j; + dev_mask[i] |= 1 << j; + goto dev_num_search_complete; + } + +dev_num_search_complete: + if (chip->dev_num < 0) { + dev_err(dev, + "No available tpm device numbers\n"); + kfree(chip); + return -ENODEV; + } else if (chip->dev_num == 0) + chip->vendor->miscdev.minor = TPM_MINOR; + else + chip->vendor->miscdev.minor = MISC_DYNAMIC_MINOR; + + devname = kmalloc(DEVNAME_SIZE, GFP_KERNEL); + scnprintf(devname, DEVNAME_SIZE, "%s%d", "tpm", chip->dev_num); + chip->vendor->miscdev.name = devname; + + chip->vendor->miscdev.dev = dev; + chip->dev = get_device(dev); + + if (misc_register(&chip->vendor->miscdev)) { + dev_err(chip->dev, + "unable to misc_register %s, minor %d\n", + chip->vendor->miscdev.name, + chip->vendor->miscdev.minor); + put_device(dev); + kfree(chip); + dev_mask[i] &= !(1 << j); + return -ENODEV; + } + + spin_lock(&driver_lock); + + dev_set_drvdata(dev, chip); + + list_add(&chip->list, &tpm_chip_list); + + spin_unlock(&driver_lock); + + sysfs_create_group(&dev->kobj, chip->vendor->attr_group); + + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_register_hardware); + +MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)"); +MODULE_DESCRIPTION("TPM Driver"); +MODULE_VERSION("2.0"); +MODULE_LICENSE("GPL"); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/char/tpm/tpm.h --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h Sat Oct 8 20:28:24 2005 @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ +#include <linux/module.h> +#include <linux/version.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> + +enum tpm_timeout { + TPM_TIMEOUT = 5, /* msecs */ +}; + +/* TPM addresses */ +enum tpm_addr { + TPM_SUPERIO_ADDR = 0x2E, + TPM_ADDR = 0x4E, +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) +extern ssize_t tpm_show_pubek(struct device *, struct device_attribute *attr, + char *); +extern ssize_t tpm_show_pcrs(struct device *, struct device_attribute *attr, + char *); +extern ssize_t tpm_show_caps(struct device *, struct device_attribute *attr, + char *); +extern ssize_t tpm_store_cancel(struct device *, struct device_attribute *attr, + const char *, size_t); +#else +extern ssize_t tpm_show_pubek(struct device *, + char *); +extern ssize_t tpm_show_pcrs(struct device *, + char *); +extern ssize_t tpm_show_caps(struct device *, + char *); +extern ssize_t tpm_store_cancel(struct device *, + const char *, size_t); +#endif + +struct tpm_chip; + +struct tpm_vendor_specific { + u8 req_complete_mask; + u8 req_complete_val; + u8 req_canceled; + u16 base; /* TPM base address */ + int drv_type; + u32 buffersize; + + int (*recv) (struct tpm_chip *, u8 *, size_t); + int (*send) (struct tpm_chip *, u8 *, size_t); + void (*cancel) (struct tpm_chip *); + u8 (*status) (struct tpm_chip *); + struct miscdevice miscdev; + struct attribute_group *attr_group; +}; + +struct tpm_chip { + struct device *dev; /* Device stuff */ + + int dev_num; /* /dev/tpm# */ + int num_opens; /* only one allowed */ + int time_expired; + + /* Data passed to and from the tpm via the read/write calls */ + u8 *data_buffer; + atomic_t data_pending; + atomic_t data_position; + struct semaphore buffer_mutex; + + struct timer_list user_read_timer; /* user needs to claim result */ + struct semaphore tpm_mutex; /* tpm is processing */ + + struct tpm_vendor_specific *vendor; + + struct list_head list; +}; + +static inline int tpm_read_index(int base, int index) +{ + outb(index, base); + return inb(base+1) & 0xFF; +} + +static inline void tpm_write_index(int base, int index, int value) +{ + outb(index, base); + outb(value & 0xFF, base+1); +} + +extern int tpm_register_hardware(struct device *, + struct tpm_vendor_specific *); +extern int tpm_open(struct inode *, struct file *); +extern int tpm_release(struct inode *, struct file *); +extern ssize_t tpm_write(struct file *, const char __user *, size_t, + loff_t *); +extern ssize_t tpm_read(struct file *, char __user *, size_t, loff_t *); +extern void tpm_remove_hardware(struct device *); +extern int tpm_pm_suspend(struct pci_dev *, pm_message_t); +extern int tpm_pm_resume(struct pci_dev *); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c Sat Oct 8 20:28:24 2005 @@ -0,0 +1,263 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ + +#include "tpm.h" + +/* Atmel definitions */ +enum tpm_atmel_addr { + TPM_ATMEL_BASE_ADDR_LO = 0x08, + TPM_ATMEL_BASE_ADDR_HI = 0x09 +}; + +/* write status bits */ +enum tpm_atmel_write_status { + ATML_STATUS_ABORT = 0x01, + ATML_STATUS_LASTBYTE = 0x04 +}; +/* read status bits */ +enum tpm_atmel_read_status { + ATML_STATUS_BUSY = 0x01, + ATML_STATUS_DATA_AVAIL = 0x02, + ATML_STATUS_REWRITE = 0x04, + ATML_STATUS_READY = 0x08 +}; + +static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count) +{ + u8 status, *hdr = buf; + u32 size; + int i; + __be32 *native_size; + + /* start reading header */ + if (count < 6) + return -EIO; + + for (i = 0; i < 6; i++) { + status = inb(chip->vendor->base + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { + dev_err(chip->dev, + "error reading header\n"); + return -EIO; + } + *buf++ = inb(chip->vendor->base); + } + + /* size of the data received */ + native_size = (__force __be32 *) (hdr + 2); + size = be32_to_cpu(*native_size); + + if (count < size) { + dev_err(chip->dev, + "Recv size(%d) less than available space\n", size); + for (; i < size; i++) { /* clear the waiting data anyway */ + status = inb(chip->vendor->base + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { + dev_err(chip->dev, + "error reading data\n"); + return -EIO; + } + } + return -EIO; + } + + /* read all the data available */ + for (; i < size; i++) { + status = inb(chip->vendor->base + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { + dev_err(chip->dev, + "error reading data\n"); + return -EIO; + } + *buf++ = inb(chip->vendor->base); + } + + /* make sure data available is gone */ + status = inb(chip->vendor->base + 1); + if (status & ATML_STATUS_DATA_AVAIL) { + dev_err(chip->dev, "data available is stuck\n"); + return -EIO; + } + + return size; +} + +static int tpm_atml_send(struct tpm_chip *chip, u8 * buf, size_t count) +{ + int i; + + dev_dbg(chip->dev, "tpm_atml_send:\n"); + for (i = 0; i < count; i++) { + dev_dbg(chip->dev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); + outb(buf[i], chip->vendor->base); + } + + return count; +} + +static void tpm_atml_cancel(struct tpm_chip *chip) +{ + outb(ATML_STATUS_ABORT, chip->vendor->base + 1); +} + +static u8 tpm_atml_status(struct tpm_chip *chip) +{ + return inb(chip->vendor->base + 1); +} + +static struct file_operations atmel_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel); + +static struct attribute* atmel_attrs[] = { + &dev_attr_pubek.attr, + &dev_attr_pcrs.attr, + &dev_attr_caps.attr, + &dev_attr_cancel.attr, + 0, +}; + +static struct attribute_group atmel_attr_grp = { .attrs = atmel_attrs }; + +static struct tpm_vendor_specific tpm_atmel = { + .recv = tpm_atml_recv, + .send = tpm_atml_send, + .cancel = tpm_atml_cancel, + .status = tpm_atml_status, + .req_complete_mask = ATML_STATUS_BUSY | ATML_STATUS_DATA_AVAIL, + .req_complete_val = ATML_STATUS_DATA_AVAIL, + .req_canceled = ATML_STATUS_READY, + .attr_group = &atmel_attr_grp, + .miscdev = { .fops = &atmel_ops, }, +}; + +static int __devinit tpm_atml_init(struct pci_dev *pci_dev, + const struct pci_device_id *pci_id) +{ + u8 version[4]; + int rc = 0; + int lo, hi; + + if (pci_enable_device(pci_dev)) + return -EIO; + + lo = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_LO); + hi = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_HI); + + tpm_atmel.base = (hi<<8)|lo; + dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base); + + /* verify that it is an Atmel part */ + if (tpm_read_index(TPM_ADDR, 4) != 'A' || tpm_read_index(TPM_ADDR, 5) != 'T' + || tpm_read_index(TPM_ADDR, 6) != 'M' || tpm_read_index(TPM_ADDR, 7) != 'L') { + rc = -ENODEV; + goto out_err; + } + + /* query chip for its version number */ + if ((version[0] = tpm_read_index(TPM_ADDR, 0x00)) != 0xFF) { + version[1] = tpm_read_index(TPM_ADDR, 0x01); + version[2] = tpm_read_index(TPM_ADDR, 0x02); + version[3] = tpm_read_index(TPM_ADDR, 0x03); + } else { + dev_info(&pci_dev->dev, "version query failed\n"); + rc = -ENODEV; + goto out_err; + } + + if ((rc = tpm_register_hardware(&pci_dev->dev, &tpm_atmel)) < 0) + goto out_err; + + dev_info(&pci_dev->dev, + "Atmel TPM version %d.%d.%d.%d\n", version[0], version[1], + version[2], version[3]); + + return 0; +out_err: + pci_disable_device(pci_dev); + return rc; +} + +static void __devexit tpm_atml_remove(struct pci_dev *pci_dev) +{ + struct tpm_chip *chip = pci_get_drvdata(pci_dev); + + if ( chip ) + tpm_remove_hardware(chip->dev); +} + +static struct pci_device_id tpm_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_LPC)}, +#ifndef PCI_DEVICE_ID_SERVERWORKS_CSB6LPC +#define PCI_DEVICE_ID_SERVERWORKS_CSB6LPC 0x0227 +#else +#warning Remove the define of PCI_DEVICE_ID_SERVERWORKS_CSB6LPC +#endif + {PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB6LPC)}, + {0,} +}; + +MODULE_DEVICE_TABLE(pci, tpm_pci_tbl); + +static struct pci_driver atmel_pci_driver = { + .name = "tpm_atmel", + .id_table = tpm_pci_tbl, + .probe = tpm_atml_init, + .remove = __devexit_p(tpm_atml_remove), + .suspend = tpm_pm_suspend, + .resume = tpm_pm_resume, +}; + +static int __init init_atmel(void) +{ + return pci_register_driver(&atmel_pci_driver); +} + +static void __exit cleanup_atmel(void) +{ + pci_unregister_driver(&atmel_pci_driver); +} + +fs_initcall(init_atmel); +module_exit(cleanup_atmel); + +MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)"); +MODULE_DESCRIPTION("TPM Driver"); +MODULE_VERSION("2.0"); +MODULE_LICENSE("GPL"); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/char/tpm/tpm_nsc.c --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_nsc.c Sat Oct 8 20:28:24 2005 @@ -0,0 +1,386 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ + +#include "tpm.h" + +/* National definitions */ +enum tpm_nsc_addr{ + TPM_NSC_IRQ = 0x07, + TPM_NSC_BASE0_HI = 0x60, + TPM_NSC_BASE0_LO = 0x61, + TPM_NSC_BASE1_HI = 0x62, + TPM_NSC_BASE1_LO = 0x63 +}; + +enum tpm_nsc_index { + NSC_LDN_INDEX = 0x07, + NSC_SID_INDEX = 0x20, + NSC_LDC_INDEX = 0x30, + NSC_DIO_INDEX = 0x60, + NSC_CIO_INDEX = 0x62, + NSC_IRQ_INDEX = 0x70, + NSC_ITS_INDEX = 0x71 +}; + +enum tpm_nsc_status_loc { + NSC_STATUS = 0x01, + NSC_COMMAND = 0x01, + NSC_DATA = 0x00 +}; + +/* status bits */ +enum tpm_nsc_status { + NSC_STATUS_OBF = 0x01, /* output buffer full */ + NSC_STATUS_IBF = 0x02, /* input buffer full */ + NSC_STATUS_F0 = 0x04, /* F0 */ + NSC_STATUS_A2 = 0x08, /* A2 */ + NSC_STATUS_RDY = 0x10, /* ready to receive command */ + NSC_STATUS_IBR = 0x20 /* ready to receive data */ +}; + +/* command bits */ +enum tpm_nsc_cmd_mode { + NSC_COMMAND_NORMAL = 0x01, /* normal mode */ + NSC_COMMAND_EOC = 0x03, + NSC_COMMAND_CANCEL = 0x22 +}; +/* + * Wait for a certain status to appear + */ +static int wait_for_stat(struct tpm_chip *chip, u8 mask, u8 val, u8 * data) +{ + unsigned long stop; + + /* status immediately available check */ + *data = inb(chip->vendor->base + NSC_STATUS); + if ((*data & mask) == val) + return 0; + + /* wait for status */ + stop = jiffies + 10 * HZ; + do { + msleep(TPM_TIMEOUT); + *data = inb(chip->vendor->base + 1); + if ((*data & mask) == val) + return 0; + } + while (time_before(jiffies, stop)); + + return -EBUSY; +} + +static int nsc_wait_for_ready(struct tpm_chip *chip) +{ + int status; + unsigned long stop; + + /* status immediately available check */ + status = inb(chip->vendor->base + NSC_STATUS); + if (status & NSC_STATUS_OBF) + status = inb(chip->vendor->base + NSC_DATA); + if (status & NSC_STATUS_RDY) + return 0; + + /* wait for status */ + stop = jiffies + 100; + do { + msleep(TPM_TIMEOUT); + status = inb(chip->vendor->base + NSC_STATUS); + if (status & NSC_STATUS_OBF) + status = inb(chip->vendor->base + NSC_DATA); + if (status & NSC_STATUS_RDY) + return 0; + } + while (time_before(jiffies, stop)); + + dev_info(chip->dev, "wait for ready failed\n"); + return -EBUSY; +} + + +static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) +{ + u8 *buffer = buf; + u8 data, *p; + u32 size; + __be32 *native_size; + + if (count < 6) + return -EIO; + + if (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0) { + dev_err(chip->dev, "F0 timeout\n"); + return -EIO; + } + if ((data = + inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_NORMAL) { + dev_err(chip->dev, "not in normal mode (0x%x)\n", + data); + return -EIO; + } + + /* read the whole packet */ + for (p = buffer; p < &buffer[count]; p++) { + if (wait_for_stat + (chip, NSC_STATUS_OBF, NSC_STATUS_OBF, &data) < 0) { + dev_err(chip->dev, + "OBF timeout (while reading data)\n"); + return -EIO; + } + if (data & NSC_STATUS_F0) + break; + *p = inb(chip->vendor->base + NSC_DATA); + } + + if ((data & NSC_STATUS_F0) == 0 && + (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0)) { + dev_err(chip->dev, "F0 not set\n"); + return -EIO; + } + if ((data = inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_EOC) { + dev_err(chip->dev, + "expected end of command(0x%x)\n", data); + return -EIO; + } + + native_size = (__force __be32 *) (buf + 2); + size = be32_to_cpu(*native_size); + + if (count < size) + return -EIO; + + return size; +} + +static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count) +{ + u8 data; + int i; + + /* + * If we hit the chip with back to back commands it locks up + * and never set IBF. Hitting it with this "hammer" seems to + * fix it. Not sure why this is needed, we followed the flow + * chart in the manual to the letter. + */ + outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND); + + if (nsc_wait_for_ready(chip) != 0) + return -EIO; + + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { + dev_err(chip->dev, "IBF timeout\n"); + return -EIO; + } + + outb(NSC_COMMAND_NORMAL, chip->vendor->base + NSC_COMMAND); + if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) { + dev_err(chip->dev, "IBR timeout\n"); + return -EIO; + } + + for (i = 0; i < count; i++) { + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { + dev_err(chip->dev, + "IBF timeout (while writing data)\n"); + return -EIO; + } + outb(buf[i], chip->vendor->base + NSC_DATA); + } + + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { + dev_err(chip->dev, "IBF timeout\n"); + return -EIO; + } + outb(NSC_COMMAND_EOC, chip->vendor->base + NSC_COMMAND); + + return count; +} + +static void tpm_nsc_cancel(struct tpm_chip *chip) +{ + outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND); +} + +static u8 tpm_nsc_status(struct tpm_chip *chip) +{ + return inb(chip->vendor->base + NSC_STATUS); +} + +static struct file_operations nsc_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); +static DEVICE_ATTR(cancel, S_IWUSR|S_IWGRP, NULL, tpm_store_cancel); + +static struct attribute * nsc_attrs[] = { + &dev_attr_pubek.attr, + &dev_attr_pcrs.attr, + &dev_attr_caps.attr, + &dev_attr_cancel.attr, + 0, +}; + +static struct attribute_group nsc_attr_grp = { .attrs = nsc_attrs }; + +static struct tpm_vendor_specific tpm_nsc = { + .recv = tpm_nsc_recv, + .send = tpm_nsc_send, + .cancel = tpm_nsc_cancel, + .status = tpm_nsc_status, + .req_complete_mask = NSC_STATUS_OBF, + .req_complete_val = NSC_STATUS_OBF, + .req_canceled = NSC_STATUS_RDY, + .attr_group = &nsc_attr_grp, + .miscdev = { .fops = &nsc_ops, }, +}; + +static int __devinit tpm_nsc_init(struct pci_dev *pci_dev, + const struct pci_device_id *pci_id) +{ + int rc = 0; + int lo, hi; + int nscAddrBase = TPM_ADDR; + + + if (pci_enable_device(pci_dev)) + return -EIO; + + /* select PM channel 1 */ + tpm_write_index(nscAddrBase,NSC_LDN_INDEX, 0x12); + + /* verify that it is a National part (SID) */ + if (tpm_read_index(TPM_ADDR, NSC_SID_INDEX) != 0xEF) { + nscAddrBase = (tpm_read_index(TPM_SUPERIO_ADDR, 0x2C)<<8)| + (tpm_read_index(TPM_SUPERIO_ADDR, 0x2B)&0xFE); + if (tpm_read_index(nscAddrBase, NSC_SID_INDEX) != 0xF6) { + rc = -ENODEV; + goto out_err; + } + } + + hi = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_HI); + lo = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_LO); + tpm_nsc.base = (hi<<8) | lo; + + dev_dbg(&pci_dev->dev, "NSC TPM detected\n"); + dev_dbg(&pci_dev->dev, + "NSC LDN 0x%x, SID 0x%x, SRID 0x%x\n", + tpm_read_index(nscAddrBase,0x07), tpm_read_index(nscAddrBase,0x20), + tpm_read_index(nscAddrBase,0x27)); + dev_dbg(&pci_dev->dev, + "NSC SIOCF1 0x%x SIOCF5 0x%x SIOCF6 0x%x SIOCF8 0x%x\n", + tpm_read_index(nscAddrBase,0x21), tpm_read_index(nscAddrBase,0x25), + tpm_read_index(nscAddrBase,0x26), tpm_read_index(nscAddrBase,0x28)); + dev_dbg(&pci_dev->dev, "NSC IO Base0 0x%x\n", + (tpm_read_index(nscAddrBase,0x60) << 8) | tpm_read_index(nscAddrBase,0x61)); + dev_dbg(&pci_dev->dev, "NSC IO Base1 0x%x\n", + (tpm_read_index(nscAddrBase,0x62) << 8) | tpm_read_index(nscAddrBase,0x63)); + dev_dbg(&pci_dev->dev, "NSC Interrupt number and wakeup 0x%x\n", + tpm_read_index(nscAddrBase,0x70)); + dev_dbg(&pci_dev->dev, "NSC IRQ type select 0x%x\n", + tpm_read_index(nscAddrBase,0x71)); + dev_dbg(&pci_dev->dev, + "NSC DMA channel select0 0x%x, select1 0x%x\n", + tpm_read_index(nscAddrBase,0x74), tpm_read_index(nscAddrBase,0x75)); + dev_dbg(&pci_dev->dev, + "NSC Config " + "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + tpm_read_index(nscAddrBase,0xF0), tpm_read_index(nscAddrBase,0xF1), + tpm_read_index(nscAddrBase,0xF2), tpm_read_index(nscAddrBase,0xF3), + tpm_read_index(nscAddrBase,0xF4), tpm_read_index(nscAddrBase,0xF5), + tpm_read_index(nscAddrBase,0xF6), tpm_read_index(nscAddrBase,0xF7), + tpm_read_index(nscAddrBase,0xF8), tpm_read_index(nscAddrBase,0xF9)); + + dev_info(&pci_dev->dev, + "NSC TPM revision %d\n", + tpm_read_index(nscAddrBase, 0x27) & 0x1F); + + /* enable the DPM module */ + tpm_write_index(nscAddrBase, NSC_LDC_INDEX, 0x01); + + if ((rc = tpm_register_hardware(&pci_dev->dev, &tpm_nsc)) < 0) + goto out_err; + + return 0; + +out_err: + pci_disable_device(pci_dev); + return rc; +} + +static void __devexit tpm_nsc_remove(struct pci_dev *pci_dev) +{ + struct tpm_chip *chip = pci_get_drvdata(pci_dev); + + if ( chip ) + tpm_remove_hardware(chip->dev); +} + +static struct pci_device_id tpm_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_LPC)}, + {0,} +}; + +MODULE_DEVICE_TABLE(pci, tpm_pci_tbl); + +static struct pci_driver nsc_pci_driver = { + .name = "tpm_nsc", + .id_table = tpm_pci_tbl, + .probe = tpm_nsc_init, + .remove = __devexit_p(tpm_nsc_remove), + .suspend = tpm_pm_suspend, + .resume = tpm_pm_resume, +}; + +static int __init init_nsc(void) +{ + return pci_register_driver(&nsc_pci_driver); +} + +static void __exit cleanup_nsc(void) +{ + pci_unregister_driver(&nsc_pci_driver); +} + +fs_initcall(init_nsc); +module_exit(cleanup_nsc); + +MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)"); +MODULE_DESCRIPTION("TPM Driver"); +MODULE_VERSION("2.0"); +MODULE_LICENSE("GPL"); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/include/asm-xen/linux-public/evtchn.h --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/evtchn.h Sat Oct 8 20:28:24 2005 @@ -0,0 +1,98 @@ +/****************************************************************************** + * evtchn.h + * + * Interface to /dev/xen/evtchn. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LINUX_PUBLIC_EVTCHN_H__ +#define __LINUX_PUBLIC_EVTCHN_H__ + +/* /dev/xen/evtchn resides at device number major=10, minor=201 */ +#define EVTCHN_MINOR 201 + +/* + * Bind a fresh port to VIRQ @virq. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) +struct ioctl_evtchn_bind_virq { + unsigned int virq; +}; + +/* + * Bind a fresh port to remote <@remote_domain, @remote_port>. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; +}; + +/* + * Allocate a fresh port for binding to @remote_domain. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_UNBIND \ + _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) +struct ioctl_evtchn_unbind { + unsigned int port; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) +struct ioctl_evtchn_notify { + unsigned int port; +}; + +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IOC(_IOC_NONE, 'E', 5, 0) + +#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/block --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/tools/examples/block Sat Oct 8 20:28:24 2005 @@ -0,0 +1,82 @@ +#!/bin/sh + +set -e + +export PATH=/sbin:/bin:/usr/bin:/usr/sbin:$PATH + +expand_dev() { + local dev + case $1 in + /*) + dev=$1 + ;; + *) + dev=/dev/$1 + ;; + esac + echo -n $dev +} + +write_dev() { + local major + local minor + local pdev + + major=$(stat -L -c %t "$1") + minor=$(stat -L -c %T "$1") + pdev=$(printf "0x%02x%02x" 0x$major 0x$minor) + xenstore-write "$XENBUS_PATH"/physical-device $pdev \ + "$XENBUS_PATH"/node $1 +} + +t=$(xenstore-read "$XENBUS_PATH"/type) + +case $1 in + bind) + p=$(xenstore-read "$XENBUS_PATH"/params) + case $t in + phy) + dev=$(expand_dev $p) + write_dev "$dev" + exit 0 + ;; + + file) + for dev in /dev/loop* ; do + echo "dev is $dev, p is $p" + if losetup $dev $p; then + write_dev "$dev" + exit 0 + fi + done + exit 1 + ;; + + *) + [ -x /etc/xen/scripts/block-"$t" ] && \ + /etc/xen/scripts/block-"$t" bind $p + ;; + esac + ;; + + unbind) + node=$(xenstore-read "$XENBUS_PATH"/node) + case $t in + phy) + exit 0 + ;; + + file) + losetup -d $node + exit 0 + ;; + + *) + [ -x /etc/xen/scripts/block-"$t" ] && \ + /etc/xen/scripts/block-"$t" unbind $node + ;; + + esac + ;; + +esac diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/xen-backend.rules --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/tools/examples/xen-backend.rules Sat Oct 8 20:28:24 2005 @@ -0,0 +1,4 @@ +SUBSYSTEM=="xen-backend", KERNEL=="vbd*", ACTION=="add", RUN+="/etc/xen/scripts/block bind" +SUBSYSTEM=="xen-backend", KERNEL=="vbd*", ACTION=="remove", RUN+="/etc/xen/scripts/block unbind" +SUBSYSTEM=="xen-backend", KERNEL=="vif*", ENV{PHYSDEVDRIVER}=="vif", ACTION=="online", RUN+="$env{script} up" +SUBSYSTEM=="xen-backend", ACTION=="remove", RUN+="/usr/bin/xenstore-rm $env{XENBUS_PATH}" diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/README --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/tools/python/README Sat Oct 8 20:28:24 2005 @@ -0,0 +1,3 @@ +The file test.py here is from the Zope project, and is Copyright (c) 2001, +2002 Zope Corporation and Contributors. This file is released under the Zope +Public License, version 2.0, a copy of which is in the file ZPL-2.0. diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/ZPL-2.0 --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/tools/python/ZPL-2.0 Sat Oct 8 20:28:24 2005 @@ -0,0 +1,59 @@ +Zope Public License (ZPL) Version 2.0 +----------------------------------------------- + +This software is Copyright (c) Zope Corporation (tm) and +Contributors. All rights reserved. + +This license has been certified as open source. It has also +been designated as GPL compatible by the Free Software +Foundation (FSF). + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions in source code must retain the above + copyright notice, this list of conditions, and the following + disclaimer. + +2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions, and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + +3. The name Zope Corporation (tm) must not be used to + endorse or promote products derived from this software + without prior written permission from Zope Corporation. + +4. The right to distribute this software or to use it for + any purpose does not give you the right to use Servicemarks + (sm) or Trademarks (tm) of Zope Corporation. Use of them is + covered in a separate agreement (see + http://www.zope.com/Marks). + +5. If any files are modified, you must cause the modified + files to carry prominent notices stating that you changed + the files and the date of any change. + +Disclaimer + + THIS SOFTWARE IS PROVIDED BY ZOPE CORPORATION ``AS IS'' + AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT + NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + NO EVENT SHALL ZOPE CORPORATION OR ITS CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + DAMAGE. + + +This software consists of contributions made by Zope +Corporation and many individuals on behalf of Zope +Corporation. Specific attributions are listed in the +accompanying credits file. \ No newline at end of file diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/test.py --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/tools/python/test.py Sat Oct 8 20:28:24 2005 @@ -0,0 +1,1094 @@ +#! /usr/bin/env python2.3 +############################################################################## +# +# Copyright (c) 2001, 2002 Zope Corporation and Contributors. +# All Rights Reserved. +# +# This software is subject to the provisions of the Zope Public License, +# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. +# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED +# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS +# FOR A PARTICULAR PURPOSE. +# +############################################################################## +""" +test.py [-abBcdDfFgGhklLmMPprstTuUv] [modfilter [testfilter]] + +Find and run tests written using the unittest module. + +The test runner searches for Python modules that contain test suites. +It collects those suites, and runs the tests. There are many options +for controlling how the tests are run. There are options for using +the debugger, reporting code coverage, and checking for refcount problems. + +The test runner uses the following rules for finding tests to run. It +searches for packages and modules that contain "tests" as a component +of the name, e.g. "frob.tests.nitz" matches this rule because tests is +a sub-package of frob. Within each "tests" package, it looks for +modules that begin with the name "test." For each test module, it +imports the module and calls the module's test_suite() function, which must +return a unittest TestSuite object. + +Options can be specified as command line arguments (see below). However, +options may also be specified in a file named 'test.config', a Python +script which, if found, will be executed before the command line +arguments are processed. + +The test.config script should specify options by setting zero or more of the +global variables: LEVEL, BUILD, and other capitalized variable names found in +the test runner script (see the list of global variables in process_args().). + + +-a level +--at-level level +--all + Run the tests at the given level. Any test at a level at or below + this is run, any test at a level above this is not run. Level 0 + runs all tests. The default is to run tests at level 1. --all is + a shortcut for -a 0. + +-b +--build + Run "python setup.py build" before running tests, where "python" + is the version of python used to run test.py. Highly recommended. + Tests will be run from the build directory. + +-B +--build-inplace + Run "python setup.py build_ext -i" before running tests. Tests will be + run from the source directory. + +-c +--pychecker + use pychecker + +-d +--debug + Instead of the normal test harness, run a debug version which + doesn't catch any exceptions. This is occasionally handy when the + unittest code catching the exception doesn't work right. + Unfortunately, the debug harness doesn't print the name of the + test, so Use With Care. + +-D +--debug-inplace + Works like -d, except that it loads pdb when an exception occurs. + +--dir directory +-s directory + Option to limit where tests are searched for. This is important + when you *really* want to limit the code that gets run. This can + be specified more than once to run tests in two different parts of + the source tree. + For example, if refactoring interfaces, you don't want to see the way + you have broken setups for tests in other packages. You *just* want to + run the interface tests. + +-f +--skip-unit + Run functional tests but not unit tests. + Note that functional tests will be skipped if the module + zope.app.tests.functional cannot be imported. + Functional tests also expect to find the file ftesting.zcml, + which is used to configure the functional-test run. + +-F + DEPRECATED. Run both unit and functional tests. + This option is deprecated, because this is the new default mode. + Note that functional tests will be skipped if the module + zope.app.tests.functional cannot be imported. + +-g threshold +--gc-threshold threshold + Set the garbage collector generation0 threshold. This can be used + to stress memory and gc correctness. Some crashes are only + reproducible when the threshold is set to 1 (agressive garbage + collection). Do "-g 0" to disable garbage collection altogether. + +-G gc_option +--gc-option gc_option + Set the garbage collection debugging flags. The argument must be one + of the DEBUG_ flags defined bythe Python gc module. Multiple options + can be specified by using "-G OPTION1 -G OPTION2." + +-k +--keepbytecode + Do not delete all stale bytecode before running tests + +-l test_root +--libdir test_root + Search for tests starting in the specified start directory + (useful for testing components being developed outside the main + "src" or "build" trees). + +-L +--loop + Keep running the selected tests in a loop. You may experience + memory leakage. + +-m +-M minimal GUI. See -U. + +-P +--profile + Run the tests under hotshot and display the top 50 stats, sorted by + cumulative time and number of calls. + +-p +--progress + Show running progress. It can be combined with -v or -vv. + +-r +--refcount + Look for refcount problems. + This requires that Python was built --with-pydebug. + +-t +--top-fifty + Time the individual tests and print a list of the top 50, sorted from + longest to shortest. + +--times n +--times outfile + With an integer argument, time the tests and print a list of the top <n> + tests, sorted from longest to shortest. + With a non-integer argument, specifies a file to which timing information + is to be printed. + +-T +--trace + Use the trace module from Python for code coverage. The current + utility writes coverage files to a directory named `coverage' that + is parallel to `build'. It also prints a summary to stdout. + +-u +--skip-functional + CHANGED. Run unit tests but not functional tests. + Note that the meaning of -u is changed from its former meaning, + which is now specified by -U or --gui. + +-U +--gui + Use the PyUnit GUI instead of output to the command line. The GUI + imports tests on its own, taking care to reload all dependencies + on each run. The debug (-d), verbose (-v), progress (-p), and + Loop (-L) options will be ignored. The testfilter filter is also + not applied. + +-m +-M +--minimal-gui + Note: -m is DEPRECATED in favour of -M or --minimal-gui. + -m starts the gui minimized. Double-clicking the progress bar + will start the import and run all tests. + + +-v +--verbose + Verbose output. With one -v, unittest prints a dot (".") for each + test run. With -vv, unittest prints the name of each test (for + some definition of "name" ...). With no -v, unittest is silent + until the end of the run, except when errors occur. + + When -p is also specified, the meaning of -v is slightly + different. With -p and no -v only the percent indicator is + displayed. With -p and -v the test name of the current test is + shown to the right of the percent indicator. With -p and -vv the + test name is not truncated to fit into 80 columns and it is not + cleared after the test finishes. + + +modfilter +testfilter + Case-sensitive regexps to limit which tests are run, used in search + (not match) mode. + In an extension of Python regexp notation, a leading "!" is stripped + and causes the sense of the remaining regexp to be negated (so "!bc" + matches any string that does not match "bc", and vice versa). + By default these act like ".", i.e. nothing is excluded. + + modfilter is applied to a test file's path, starting at "build" and + including (OS-dependent) path separators. + + testfilter is applied to the (method) name of the unittest methods + contained in the test files whose paths modfilter matched. + +Extreme (yet useful) examples: + + test.py -vvb . "^testWriteClient$" + + Builds the project silently, then runs unittest in verbose mode on all + tests whose names are precisely "testWriteClient". Useful when + debugging a specific test. + + test.py -vvb . "!^testWriteClient$" + + As before, but runs all tests whose names aren't precisely + "testWriteClient". Useful to avoid a specific failing test you don't + want to deal with just yet. + + test.py -M . "!^testWriteClient$" + + As before, but now opens up a minimized PyUnit GUI window (only showing + the progress bar). Useful for refactoring runs where you continually want + to make sure all tests still pass. +""" + +import gc +import hotshot, hotshot.stats +import os +import re +import pdb +import sys +import threading # just to get at Thread objects created by tests +import time +import traceback +import unittest +import warnings + +def set_trace_doctest(stdin=sys.stdin, stdout=sys.stdout, trace=pdb.set_trace): + sys.stdin = stdin + sys.stdout = stdout + trace() + +pdb.set_trace_doctest = set_trace_doctest + +from distutils.util import get_platform + +PLAT_SPEC = "%s-%s" % (get_platform(), sys.version[0:3]) + +class ImmediateTestResult(unittest._TextTestResult): + + __super_init = unittest._TextTestResult.__init__ + __super_startTest = unittest._TextTestResult.startTest + __super_printErrors = unittest._TextTestResult.printErrors + + def __init__(self, stream, descriptions, verbosity, debug=False, + count=None, progress=False): + self.__super_init(stream, descriptions, verbosity) + self._debug = debug + self._progress = progress + self._progressWithNames = False + self.count = count + self._testtimes = {} + if progress and verbosity == 1: + self.dots = False + self._progressWithNames = True + self._lastWidth = 0 + self._maxWidth = 80 + try: + import curses + except ImportError: + pass + else: + curses.setupterm() + self._maxWidth = curses.tigetnum('cols') + self._maxWidth -= len("xxxx/xxxx (xxx.x%): ") + 1 + + def stopTest(self, test): + self._testtimes[test] = time.time() - self._testtimes[test] + if gc.garbage: + print "The following test left garbage:" + print test + print gc.garbage + # XXX Perhaps eat the garbage here, so that the garbage isn't + # printed for every subsequent test. + + # Did the test leave any new threads behind? + new_threads = [t for t in threading.enumerate() + if (t.isAlive() + and + t not in self._threads)] + if new_threads: + print "The following test left new threads behind:" + print test + print "New thread(s):", new_threads + + def print_times(self, stream, count=None): + results = self._testtimes.items() + results.sort(lambda x, y: cmp(y[1], x[1])) + if count: + n = min(count, len(results)) + if n: + print >>stream, "Top %d longest tests:" % n + else: + n = len(results) + if not n: + return + for i in range(n): + print >>stream, "%6dms" % int(results[i][1] * 1000), results[i][0] + + def _print_traceback(self, msg, err, test, errlist): + if self.showAll or self.dots or self._progress: + self.stream.writeln("\n") + self._lastWidth = 0 + + tb = "".join(traceback.format_exception(*err)) + self.stream.writeln(msg) + self.stream.writeln(tb) + errlist.append((test, tb)) + + def startTest(self, test): + if self._progress: + self.stream.write("\r%4d" % (self.testsRun + 1)) + if self.count: + self.stream.write("/%d (%5.1f%%)" % (self.count, + (self.testsRun + 1) * 100.0 / self.count)) + if self.showAll: + self.stream.write(": ") + elif self._progressWithNames: + # XXX will break with multibyte strings + name = self.getShortDescription(test) + width = len(name) + if width < self._lastWidth: + name += " " * (self._lastWidth - width) + self.stream.write(": %s" % name) + self._lastWidth = width + self.stream.flush() + self._threads = threading.enumerate() + self.__super_startTest(test) + self._testtimes[test] = time.time() + + def getShortDescription(self, test): + s = self.getDescription(test) + if len(s) > self._maxWidth: + pos = s.find(" (") + if pos >= 0: + w = self._maxWidth - (pos + 5) + if w < 1: + # first portion (test method name) is too long + s = s[:self._maxWidth-3] + "..." + else: + pre = s[:pos+2] + post = s[-w:] + s = "%s...%s" % (pre, post) + return s[:self._maxWidth] + + def addError(self, test, err): + if self._progress: + self.stream.write("\r") + if self._debug: + raise err[0], err[1], err[2] + self._print_traceback("Error in test %s" % test, err, + test, self.errors) + + def addFailure(self, test, err): + if self._progress: + self.stream.write("\r") + if self._debug: + raise err[0], err[1], err[2] + self._print_traceback("Failure in test %s" % test, err, + test, self.failures) + + def printErrors(self): + if self._progress and not (self.dots or self.showAll): + self.stream.writeln() + self.__super_printErrors() + + def printErrorList(self, flavor, errors): + for test, err in errors: + self.stream.writeln(self.separator1) + self.stream.writeln("%s: %s" % (flavor, self.getDescription(test))) + self.stream.writeln(self.separator2) + self.stream.writeln(err) + + +class ImmediateTestRunner(unittest.TextTestRunner): + + __super_init = unittest.TextTestRunner.__init__ + + def __init__(self, **kwarg): + debug = kwarg.get("debug") + if debug is not None: + del kwarg["debug"] + progress = kwarg.get("progress") + if progress is not None: + del kwarg["progress"] + profile = kwarg.get("profile") + if profile is not None: + del kwarg["profile"] + self.__super_init(**kwarg) + self._debug = debug + self._progress = progress + self._profile = profile + # Create the test result here, so that we can add errors if + # the test suite search process has problems. The count + # attribute must be set in run(), because we won't know the + # count until all test suites have been found. + self.result = ImmediateTestResult( + self.stream, self.descriptions, self.verbosity, debug=self._debug, + progress=self._progress) + + def _makeResult(self): + # Needed base class run method. + return self.result + + def run(self, test): + self.result.count = test.countTestCases() + if self._debug: + club_debug(test) + if self._profile: + prof = hotshot.Profile("tests_profile.prof") + args = (self, test) + r = prof.runcall(unittest.TextTestRunner.run, *args) + prof.close() + stats = hotshot.stats.load("tests_profile.prof") + stats.sort_stats('cumulative', 'calls') + stats.print_stats(50) + return r + return unittest.TextTestRunner.run(self, test) + +def club_debug(test): + # Beat a debug flag into debug-aware test cases + setDebugModeOn = getattr(test, 'setDebugModeOn', None) + if setDebugModeOn is not None: + setDebugModeOn() + + for subtest in getattr(test, '_tests', ()): + club_debug(subtest) + +# setup list of directories to put on the path +class PathInit: + def __init__(self, build, build_inplace, libdir=None): + self.inplace = None + # Figure out if we should test in-place or test in-build. If the -b + # or -B option was given, test in the place we were told to build in. + # Otherwise, we'll look for a build directory and if we find one, + # we'll test there, otherwise we'll test in-place. + if build: + self.inplace = build_inplace + if self.inplace is None: + # Need to figure it out + if os.path.isdir(os.path.join("build", "lib.%s" % PLAT_SPEC)): + self.inplace = False + else: + self.inplace = True + # Calculate which directories we're going to add to sys.path, and cd + # to the appropriate working directory + self.org_cwd = os.getcwd() + if self.inplace: + self.libdir = "src" + else: + self.libdir = "lib.%s" % PLAT_SPEC + os.chdir("build") + # Hack sys.path + self.cwd = os.getcwd() + sys.path.insert(0, os.path.join(self.cwd, self.libdir)) + # Hack again for external products. + global functional + kind = functional and "FUNCTIONAL" or "UNIT" + if libdir: + extra = os.path.join(self.org_cwd, libdir) + print "Running %s tests from %s" % (kind, extra) + self.libdir = extra + sys.path.insert(0, extra) + else: + print "Running %s tests from %s" % (kind, self.cwd) + # Make sure functional tests find ftesting.zcml + if functional: + config_file = 'ftesting.zcml' + if not self.inplace: + # We chdired into build, so ftesting.zcml is in the + # parent directory + config_file = os.path.join('..', 'ftesting.zcml') + print "Parsing %s" % config_file + from zope.app.tests.functional import FunctionalTestSetup + FunctionalTestSetup(config_file) + +def match(rx, s): + if not rx: + return True + if rx[0] == "!": + return re.search(rx[1:], s) is None + else: + return re.search(rx, s) is not None + +class TestFileFinder: + def __init__(self, prefix): + self.files = [] + self._plen = len(prefix) + if not prefix.endswith(os.sep): + self._plen += 1 + global functional + if functional: + self.dirname = "ftests" + else: + self.dirname = "tests" + + def visit(self, rx, dir, files): + if os.path.split(dir)[1] != self.dirname: + # Allow tests/ftests module rather than package. + modfname = self.dirname + '.py' + if modfname in files: + path = os.path.join(dir, modfname) + if match(rx, path): + self.files.append(path) + return + return + # ignore tests that aren't in packages + if not "__init__.py" in files: + if not files or files == ["CVS"]: + return + print "not a package", dir + return + + # Put matching files in matches. If matches is non-empty, + # then make sure that the package is importable. + matches = [] + for file in files: + if file.startswith('test') and os.path.splitext(file)[-1] == '.py': + path = os.path.join(dir, file) + if match(rx, path): + matches.append(path) + + # ignore tests when the package can't be imported, possibly due to + # dependency failures. + pkg = dir[self._plen:].replace(os.sep, '.') + try: + __import__(pkg) + # We specifically do not want to catch ImportError since that's useful + # information to know when running the tests. + except RuntimeError, e: + if VERBOSE: + print "skipping %s because: %s" % (pkg, e) + return + else: + self.files.extend(matches) + + def module_from_path(self, path): + """Return the Python package name indicated by the filesystem path.""" + assert path.endswith(".py") + path = path[self._plen:-3] + mod = path.replace(os.sep, ".") + return mod + +def walk_with_symlinks(top, func, arg): + """Like os.path.walk, but follows symlinks on POSIX systems. + + This could theoreticaly result in an infinite loop, if you create symlink + cycles in your Zope sandbox, so don't do that. + """ + try: + names = os.listdir(top) + except os.error: + return + func(arg, top, names) + exceptions = ('.', '..') + for name in names: + if name not in exceptions: + name = os.path.join(top, name) + if os.path.isdir(name): + walk_with_symlinks(name, func, arg) + +def find_test_dir(dir): + if os.path.exists(dir): + return dir + d = os.path.join(pathinit.libdir, dir) + if os.path.exists(d): + if os.path.isdir(d): + return d + raise ValueError("%s does not exist and %s is not a directory" + % (dir, d)) + raise ValueError("%s does not exist!" % dir) + +def find_tests(rx): + global finder + finder = TestFileFinder(pathinit.libdir) + + if TEST_DIRS: + for d in TEST_DIRS: + d = find_test_dir(d) + walk_with_symlinks(d, finder.visit, rx) + else: + walk_with_symlinks(pathinit.libdir, finder.visit, rx) + return finder.files + +def package_import(modname): + mod = __import__(modname) + for part in modname.split(".")[1:]: + mod = getattr(mod, part) + return mod + +class PseudoTestCase: + """Minimal test case objects to create error reports. + + If test.py finds something that looks like it should be a test but + can't load it or find its test suite, it will report an error + using a PseudoTestCase. + """ + + def __init__(self, name, descr=None): + self.name = name + self.descr = descr + + def shortDescription(self): + return self.descr + + def __str__(self): + return "Invalid Test (%s)" % self.name + +def get_suite(file, result): + modname = finder.module_from_path(file) + try: + mod = package_import(modname) + return mod.test_suite() + except: + result.addError(PseudoTestCase(modname), sys.exc_info()) + return None + +def filter_testcases(s, rx): + new = unittest.TestSuite() + for test in s._tests: + # See if the levels match + dolevel = (LEVEL == 0) or LEVEL >= getattr(test, "level", 0) + if not dolevel: + continue + if isinstance(test, unittest.TestCase): + name = test.id() # Full test name: package.module.class.method + name = name[1 + name.rfind("."):] # extract method name + if not rx or match(rx, name): + new.addTest(test) + else: + filtered = filter_testcases(test, rx) + if filtered: + new.addTest(filtered) + return new + +def gui_runner(files, test_filter): + if BUILD_INPLACE: + utildir = os.path.join(os.getcwd(), "utilities") + else: + utildir = os.path.join(os.getcwd(), "..", "utilities") + sys.path.append(utildir) + import unittestgui + suites = [] + for file in files: + suites.append(finder.module_from_path(file) + ".test_suite") + + suites = ", ".join(suites) + minimal = (GUI == "minimal") + unittestgui.main(suites, minimal) + +class TrackRefs: + """Object to track reference counts across test runs.""" + + def __init__(self): + self.type2count = {} + self.type2all = {} + + def update(self): + obs = sys.getobjects(0) + type2count = {} + type2all = {} + for o in obs: + all = sys.getrefcount(o) + + if type(o) is str and o == '<dummy key>': + # avoid dictionary madness + continue + t = type(o) + if t in type2count: + type2count[t] += 1 + type2all[t] += all + else: + type2count[t] = 1 + type2all[t] = all + + ct = [(type2count[t] - self.type2count.get(t, 0), + type2all[t] - self.type2all.get(t, 0), + t) + for t in type2count.iterkeys()] + ct.sort() + ct.reverse() + printed = False + for delta1, delta2, t in ct: + if delta1 or delta2: + if not printed: + print "%-55s %8s %8s" % ('', 'insts', 'refs') + printed = True + print "%-55s %8d %8d" % (t, delta1, delta2) + + self.type2count = type2count + self.type2all = type2all + +def runner(files, test_filter, debug): + runner = ImmediateTestRunner(verbosity=VERBOSE, debug=DEBUG, + progress=PROGRESS, profile=PROFILE, + descriptions=False) + suite = unittest.TestSuite() + for file in files: + s = get_suite(file, runner.result) + # See if the levels match + dolevel = (LEVEL == 0) or LEVEL >= getattr(s, "level", 0) + if s is not None and dolevel: + s = filter_testcases(s, test_filter) + suite.addTest(s) + try: + r = runner.run(suite) + if TIMESFN: + r.print_times(open(TIMESFN, "w")) + if VERBOSE: + print "Wrote timing data to", TIMESFN + if TIMETESTS: + r.print_times(sys.stdout, TIMETESTS) + except: + if DEBUGGER: + print "%s:" % (sys.exc_info()[0], ) + print sys.exc_info()[1] + pdb.post_mortem(sys.exc_info()[2]) + else: + raise + +def remove_stale_bytecode(arg, dirname, names): + names = map(os.path.normcase, names) + for name in names: + if name.endswith(".pyc") or name.endswith(".pyo"): + srcname = name[:-1] + if srcname not in names: + fullname = os.path.join(dirname, name) + print "Removing stale bytecode file", fullname + os.unlink(fullname) + +def main(module_filter, test_filter, libdir): + if not KEEP_STALE_BYTECODE: + os.path.walk(os.curdir, remove_stale_bytecode, None) + + configure_logging() + + # Initialize the path and cwd + global pathinit + pathinit = PathInit(BUILD, BUILD_INPLACE, libdir) + + files = find_tests(module_filter) + files.sort() + + if GUI: + gui_runner(files, test_filter) + elif LOOP: + if REFCOUNT: + rc = sys.gettotalrefcount() + track = TrackRefs() + while True: + runner(files, test_filter, DEBUG) + gc.collect() + if gc.garbage: + print "GARBAGE:", len(gc.garbage), gc.garbage + return + if REFCOUNT: + prev = rc + rc = sys.gettotalrefcount() + print "totalrefcount=%-8d change=%-6d" % (rc, rc - prev) + track.update() + else: + runner(files, test_filter, DEBUG) + + os.chdir(pathinit.org_cwd) + + +def configure_logging(): + """Initialize the logging module.""" + import logging.config + + # Get the log.ini file from the current directory instead of possibly + # buried in the build directory. XXX This isn't perfect because if + # log.ini specifies a log file, it'll be relative to the build directory. + # Hmm... + logini = os.path.abspath("log.ini") + + if os.path.exists(logini): + logging.config.fileConfig(logini) + else: + logging.basicConfig() + + if os.environ.has_key("LOGGING"): + level = int(os.environ["LOGGING"]) + logging.getLogger().setLevel(level) + + +def process_args(argv=None): + import getopt + global MODULE_FILTER + global TEST_FILTER + global VERBOSE + global LOOP + global GUI + global TRACE + global REFCOUNT + global DEBUG + global DEBUGGER + global BUILD + global LEVEL + global LIBDIR + global TIMESFN + global TIMETESTS + global PROGRESS + global BUILD_INPLACE + global KEEP_STALE_BYTECODE + global TEST_DIRS + global PROFILE + global GC_THRESHOLD + global GC_FLAGS + global RUN_UNIT + global RUN_FUNCTIONAL + global PYCHECKER + + if argv is None: + argv = sys.argv + + MODULE_FILTER = None + TEST_FILTER = None + VERBOSE = 0 + LOOP = False + GUI = False + TRACE = False + REFCOUNT = False + DEBUG = False # Don't collect test results; simply let tests crash + DEBUGGER = False + BUILD = False + BUILD_INPLACE = False + GC_THRESHOLD = None + gcdebug = 0 + GC_FLAGS = [] + LEVEL = 1 + LIBDIR = None + PROGRESS = False + TIMESFN = None + TIMETESTS = 0 + KEEP_STALE_BYTECODE = 0 + RUN_UNIT = True + RUN_FUNCTIONAL = True + TEST_DIRS = [] + PROFILE = False + PYCHECKER = False + config_filename = 'test.config' + + # import the config file + if os.path.isfile(config_filename): + print 'Configuration file found.' + execfile(config_filename, globals()) + + + try: + opts, args = getopt.getopt(argv[1:], "a:bBcdDfFg:G:hkl:LmMPprs:tTuUv", + ["all", "help", "libdir=", "times=", + "keepbytecode", "dir=", "build", + "build-inplace", + "at-level=", + "pychecker", "debug", "pdebug", + "gc-threshold=", "gc-option=", + "loop", "gui", "minimal-gui", + "profile", "progress", "refcount", "trace", + "top-fifty", "verbose", + ]) + # fixme: add the long names + # fixme: add the extra documentation + # fixme: test for functional first! + except getopt.error, msg: + print msg + print "Try `python %s -h' for more information." % argv[0] + sys.exit(2) + + for k, v in opts: + if k in ("-a", "--at-level"): + LEVEL = int(v) + elif k == "--all": + LEVEL = 0 + os.environ["COMPLAIN_IF_TESTS_MISSED"]='1' + elif k in ("-b", "--build"): + BUILD = True + elif k in ("-B", "--build-inplace"): + BUILD = BUILD_INPLACE = True + elif k in("-c", "--pychecker"): + PYCHECKER = True + elif k in ("-d", "--debug"): + DEBUG = True + elif k in ("-D", "--pdebug"): + DEBUG = True + DEBUGGER = True + elif k in ("-f", "--skip-unit"): + RUN_UNIT = False + elif k in ("-u", "--skip-functional"): + RUN_FUNCTIONAL = False + elif k == "-F": + message = 'Unit plus functional is the default behaviour.' + warnings.warn(message, DeprecationWarning) + RUN_UNIT = True + RUN_FUNCTIONAL = True + elif k in ("-h", "--help"): + print __doc__ + sys.exit(0) + elif k in ("-g", "--gc-threshold"): + GC_THRESHOLD = int(v) + elif k in ("-G", "--gc-option"): + if not v.startswith("DEBUG_"): + print "-G argument must be DEBUG_ flag, not", repr(v) + sys.exit(1) + GC_FLAGS.append(v) + elif k in ('-k', '--keepbytecode'): + KEEP_STALE_BYTECODE = 1 + elif k in ('-l', '--libdir'): + LIBDIR = v + elif k in ("-L", "--loop"): + LOOP = 1 + elif k == "-m": + GUI = "minimal" + msg = "Use -M or --minimal-gui instead of -m." + warnings.warn(msg, DeprecationWarning) + elif k in ("-M", "--minimal-gui"): + GUI = "minimal" + elif k in ("-P", "--profile"): + PROFILE = True + elif k in ("-p", "--progress"): + PROGRESS = True + elif k in ("-r", "--refcount"): + REFCOUNT = True + elif k in ("-T", "--trace"): + TRACE = True + elif k in ("-t", "--top-fifty"): + if not TIMETESTS: + TIMETESTS = 50 + elif k in ("-u", "--gui"): + GUI = 1 + elif k in ("-v", "--verbose"): + VERBOSE += 1 + elif k == "--times": + try: + TIMETESTS = int(v) + except ValueError: + # must be a filename to write + TIMESFN = v + elif k in ('-s', '--dir'): + TEST_DIRS.append(v) + + if PYCHECKER: + # make sure you have a recent version of pychecker + if not os.environ.get("PYCHECKER"): + os.environ["PYCHECKER"] = "-q" + import pychecker.checker + + if REFCOUNT and not hasattr(sys, "gettotalrefcount"): + print "-r ignored, because it needs a debug build of Python" + REFCOUNT = False + + if sys.version_info < ( 2,3,2 ): + print """\ + ERROR: Your python version is not supported by Zope3. + Zope3 needs Python 2.3.2 or greater. You are running:""" + sys.version + sys.exit(1) + + if GC_THRESHOLD is not None: + if GC_THRESHOLD == 0: + gc.disable() + print "gc disabled" + else: + gc.set_threshold(GC_THRESHOLD) + print "gc threshold:", gc.get_threshold() + + if GC_FLAGS: + val = 0 + for flag in GC_FLAGS: + v = getattr(gc, flag, None) + if v is None: + print "Unknown gc flag", repr(flag) + print gc.set_debug.__doc__ + sys.exit(1) + val |= v + gcdebug |= v + + if gcdebug: + gc.set_debug(gcdebug) + + if BUILD: + # Python 2.3 is more sane in its non -q output + if sys.hexversion >= 0x02030000: + qflag = "" + else: + qflag = "-q" + cmd = sys.executable + " setup.py " + qflag + " build" + if BUILD_INPLACE: + cmd += "_ext -i" + if VERBOSE: + print cmd + sts = os.system(cmd) + if sts: + print "Build failed", hex(sts) + sys.exit(1) + + k = [] + if RUN_UNIT: + k.append(False) + if RUN_FUNCTIONAL: + k.append(True) + + global functional + for functional in k: + + if VERBOSE: + kind = functional and "FUNCTIONAL" or "UNIT" + if LEVEL == 0: + print "Running %s tests at all levels" % kind + else: + print "Running %s tests at level %d" % (kind, LEVEL) + +# This was to avoid functional tests outside of z3, but this doesn't really +# work right. +## if functional: +## try: +## from zope.app.tests.functional import FunctionalTestSetup +## except ImportError: +## raise +## print ('Skipping functional tests: could not import ' +## 'zope.app.tests.functional') +## continue + + # XXX We want to change *visible* warnings into errors. The next + # line changes all warnings into errors, including warnings we + # normally never see. In particular, test_datetime does some + # short-integer arithmetic that overflows to long ints, and, by + # default, Python doesn't display the overflow warning that can + # be enabled when this happens. The next line turns that into an + # error instead. Guido suggests that a better to get what we're + # after is to replace warnings.showwarning() with our own thing + # that raises an error. + ## warnings.filterwarnings("error") + warnings.filterwarnings("ignore", module="logging") + + if args: + if len(args) > 1: + TEST_FILTER = args[1] + MODULE_FILTER = args[0] + try: + if TRACE: + # if the trace module is used, then we don't exit with + # status if on a false return value from main. + coverdir = os.path.join(os.getcwd(), "coverage") + import trace + ignoremods = ["os", "posixpath", "stat"] + tracer = trace.Trace(ignoredirs=[sys.prefix, sys.exec_prefix], + ignoremods=ignoremods, + trace=False, count=True) + + tracer.runctx("main(MODULE_FILTER, TEST_FILTER, LIBDIR)", + globals=globals(), locals=vars()) + r = tracer.results() + path = "/tmp/trace.%s" % os.getpid() + import cPickle + f = open(path, "wb") + cPickle.dump(r, f) + f.close() + print path + r.write_results(show_missing=True, + summary=True, coverdir=coverdir) + else: + bad = main(MODULE_FILTER, TEST_FILTER, LIBDIR) + if bad: + sys.exit(1) + except ImportError, err: + print err + print sys.path + raise + + +if __name__ == "__main__": + process_args() diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/tests/test_sxp.py --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/tools/python/xen/xend/tests/test_sxp.py Sat Oct 8 20:28:24 2005 @@ -0,0 +1,18 @@ +import unittest + +import xen.xend.sxp + + +class test_sxp(unittest.TestCase): + + def testAllFromString(self): + def t(input, expected): + self.assertEqual(xen.xend.sxp.all_from_string(input), expected) + + t('String', ['String']) + t('(String Thing)', [['String', 'Thing']]) + t('(String) (Thing)', [['String'], ['Thing']]) + + +def test_suite(): + return unittest.makeSuite(test_sxp) diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/public/io/xs_wire.h --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/xen/include/public/io/xs_wire.h Sat Oct 8 20:28:24 2005 @@ -0,0 +1,95 @@ +/* + * Details of the "wire" protocol between Xen Store Daemon and client + * library or guest kernel. + * Copyright (C) 2005 Rusty Russell IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XS_WIRE_H +#define _XS_WIRE_H + +enum xsd_sockmsg_type +{ + XS_DEBUG, + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_WATCH_ACK, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_OP_READ_ONLY = XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors +{ + int errnum; + const char *errstring; +}; +#define XSD_ERROR(x) { x, #x } +static struct xsd_errors xsd_errors[] __attribute__((unused)) = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(EAGAIN), + XSD_ERROR(EISCONN), +}; +struct xsd_sockmsg +{ + u32 type; + u32 len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +enum xs_watch_type +{ + XS_WATCH_PATH = 0, + XS_WATCH_TOKEN, +}; + +#endif /* _XS_WIRE_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/public/sched.h --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/xen/include/public/sched.h Sat Oct 8 20:28:24 2005 @@ -0,0 +1,50 @@ +/****************************************************************************** + * sched.h + * + * Scheduler state interactions + * + * Copyright (c) 2005, Keir Fraser <keir@xxxxxxxxxxxxx> + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +/* + * Prototype for this hypercall is: + * int sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s). + */ + +/* + * Voluntarily yield the CPU. + * @arg == 0. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == 0. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == SHUTDOWN_??? (reason for shutdown). + */ +#define SCHEDOP_shutdown 2 + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by controller + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ diff -r 0ba10f7fef51 -r 4e0c94871be2 xen/include/public/vcpu.h --- /dev/null Sat Oct 8 17:37:45 2005 +++ b/xen/include/public/vcpu.h Sat Oct 8 20:28:24 2005 @@ -0,0 +1,54 @@ +/****************************************************************************** + * vcpu.h + * + * VCPU initialisation, query, and hotplug. + * + * Copyright (c) 2005, Keir Fraser <keir@xxxxxxxxxxxxx> + */ + +#ifndef __XEN_PUBLIC_VCPU_H__ +#define __XEN_PUBLIC_VCPU_H__ + +/* + * Prototype for this hypercall is: + * int vcpu_op(int cmd, int vcpuid, void *extra_args) + * @cmd == VCPUOP_??? (VCPU operation). + * @vcpuid == VCPU to operate on. + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Initialise a VCPU. Each VCPU can be initialised only once. A + * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. + * + * @extra_arg == pointer to vcpu_guest_context structure containing initial + * state for the VCPU. + */ +#define VCPUOP_initialise 0 + +/* + * Bring up a VCPU. This makes the VCPU runnable. This operation will fail + * if the VCPU has not been initialised (VCPUOP_initialise). + */ +#define VCPUOP_up 1 + +/* + * Bring down a VCPU (i.e., make it non-runnable). + * There are a few caveats that callers should observe: + * 1. This operation may return, and VCPU_is_up may return false, before the + * VCPU stops running (i.e., the command is asynchronous). It is a good + * idea to ensure that the VCPU has entered a non-critical loop before + * bringing it down. Alternatively, this operation is guaranteed + * synchronous if invoked by the VCPU itself. + * 2. After a VCPU is initialised, there is currently no way to drop all its + * references to domain memory. Even a VCPU that is down still holds + * memory references via its pagetable base pointer and GDT. It is good + * practise to move a VCPU onto an 'idle' or default page table, LDT and + * GDT before bringing it down. + */ +#define VCPUOP_down 2 + +/* Returns 1 if the given VCPU is up. */ +#define VCPUOP_is_up 3 + +#endif /* __XEN_PUBLIC_VCPU_H__ */ diff -r 0ba10f7fef51 -r 4e0c94871be2 buildconfigs/mk.linux-2.4-xenU --- a/buildconfigs/mk.linux-2.4-xenU Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,51 +0,0 @@ - -OS = linux - -LINUX_SERIES = 2.4 -LINUX_VER = 2.4.30 - -EXTRAVERSION = xenU - -LINUX_DIR = $(OS)-$(LINUX_VER)-$(EXTRAVERSION) - -include buildconfigs/Rules.mk - -.PHONY: build clean delete - -# The real action starts here! -build: $(LINUX_DIR)/include/linux/autoconf.h - if grep "^CONFIG_MODULES=" $(LINUX_DIR)/.config ; then \ - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) modules ; \ - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_MOD_PATH=$(DESTDIR) modules_install ; \ - fi - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_PATH=$(DESTDIR) install - -$(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref - rm -rf $(LINUX_DIR) - cp -al $(<D) $(LINUX_DIR) - # Apply arch-xen patches - ( cd linux-$(LINUX_SERIES)-xen-sparse ; \ - LINUX_ARCH=$(LINUX_ARCH) ./mkbuildtree ../$(LINUX_DIR) ) - # Re-use config from install dir if one exits else use default config - CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \ - [ -r $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \ - cp $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \ - || cp $(LINUX_DIR)/arch/xen/defconfig-$(EXTRAVERSION) \ - $(LINUX_DIR)/.config - # Patch kernel Makefile to set EXTRAVERSION - ( cd $(LINUX_DIR) ; \ - sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST = -$(EXTRAVERSION)/' Makefile >Mk.tmp ; \ - rm -f Makefile ; mv Mk.tmp Makefile ) - make -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) oldconfig - make -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) dep - -config: CONFIGMODE = menuconfig -config: $(LINUX_DIR)/include/linux/autoconf.h - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) $(CONFIGMODE) - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) dep - -clean:: - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) clean - -delete: - rm -rf tmp-$(OS)-$(LINUX_VER) $(LINUX_DIR) diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/Makefile --- a/linux-2.4-xen-sparse/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,592 +0,0 @@ -VERSION = 2 -PATCHLEVEL = 4 -SUBLEVEL = 30 -EXTRAVERSION = - -KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) - -# SUBARCH always tells us the underlying machine architecture. -# Unless overridden, by default ARCH is equivalent to SUBARCH. -# This will be overriden for Xen and UML builds. -SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -ARCH ?= $(SUBARCH) - -## XXX The following hack can be discarded after users have adjusted to the -## architectural name change 'xeno' -> 'xen'. -ifeq ($(ARCH),xeno) - ARCH := xen -endif - -KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//g") - -CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ - else if [ -x /bin/bash ]; then echo /bin/bash; \ - else echo sh; fi ; fi) -TOPDIR := $(shell /bin/pwd) - -HPATH = $(TOPDIR)/include -FINDHPATH = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net $(HPATH)/math-emu - -HOSTCC = gcc -HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer - -CROSS_COMPILE = - -# -# Include the make variables (CC, etc...) -# - -AS = $(CROSS_COMPILE)as -LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc -CPP = $(CC) -E -AR = $(CROSS_COMPILE)ar -NM = $(CROSS_COMPILE)nm -STRIP = $(CROSS_COMPILE)strip -OBJCOPY = $(CROSS_COMPILE)objcopy -OBJDUMP = $(CROSS_COMPILE)objdump -MAKEFILES = $(TOPDIR)/.config -GENKSYMS = /sbin/genksyms -DEPMOD = /sbin/depmod -MODFLAGS = -DMODULE -CFLAGS_KERNEL = -PERL = perl -AWK = awk -RPM := $(shell if [ -x "/usr/bin/rpmbuild" ]; then echo rpmbuild; \ - else echo rpm; fi) - -export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ - CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ - CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL AWK - -all: do-it-all - -# -# Make "config" the default target if there is no configuration file or -# "depend" the target if there is no top-level dependency information. -# - -ifeq (.config,$(wildcard .config)) -include .config -ifeq (.depend,$(wildcard .depend)) -include .depend -do-it-all: Version vmlinux -else -CONFIGURATION = depend -do-it-all: depend -endif -else -CONFIGURATION = config -do-it-all: config -endif - -# -# INSTALL_PATH specifies where to place the updated kernel and system map -# images. Uncomment if you want to place them anywhere other than root. -# - -#export INSTALL_PATH=/boot - -# -# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory -# relocations required by build roots. This is not defined in the -# makefile but the arguement can be passed to make if needed. -# - -MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) -export MODLIB - -# -# standard CFLAGS -# - -CPPFLAGS := -D__KERNEL__ -I$(HPATH) - -CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ - -fno-strict-aliasing -fno-common -ifndef CONFIG_FRAME_POINTER -CFLAGS += -fomit-frame-pointer -endif -AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) - -# -# ROOT_DEV specifies the default root-device when making the image. -# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case -# the default of FLOPPY is used by 'build'. -# This is i386 specific. -# - -export ROOT_DEV = CURRENT - -# -# If you want to preset the SVGA mode, uncomment the next line and -# set SVGA_MODE to whatever number you want. -# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. -# The number is the same as you would ordinarily press at bootup. -# This is i386 specific. -# - -export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA - -# -# If you want the RAM disk device, define this to be the size in blocks. -# This is i386 specific. -# - -#export RAMDISK = -DRAMDISK=512 - -CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o -NETWORKS =net/network.o - -LIBS =$(TOPDIR)/lib/lib.a -SUBDIRS =kernel drivers mm fs net ipc lib crypto - -DRIVERS-n := -DRIVERS-y := -DRIVERS-m := -DRIVERS- := - -DRIVERS-$(CONFIG_ACPI_BOOT) += drivers/acpi/acpi.o -DRIVERS-$(CONFIG_PARPORT) += drivers/parport/driver.o -DRIVERS-y += drivers/char/char.o \ - drivers/block/block.o \ - drivers/misc/misc.o \ - drivers/net/net.o -DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o -DRIVERS-$(CONFIG_DRM_NEW) += drivers/char/drm/drm.o -DRIVERS-$(CONFIG_DRM_OLD) += drivers/char/drm-4.0/drm.o -DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a -DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o -DRIVERS-$(CONFIG_DEV_APPLETALK) += drivers/net/appletalk/appletalk.o -DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.o -DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o -DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnetdrv.o -DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o -DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o -DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a -DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o -DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o -DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o - -ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),) -DRIVERS-y += drivers/cdrom/driver.o -endif - -DRIVERS-$(CONFIG_SOUND) += drivers/sound/sounddrivers.o -DRIVERS-$(CONFIG_PCI) += drivers/pci/driver.o -DRIVERS-$(CONFIG_MTD) += drivers/mtd/mtdlink.o -DRIVERS-$(CONFIG_PCMCIA) += drivers/pcmcia/pcmcia.o -DRIVERS-$(CONFIG_NET_PCMCIA) += drivers/net/pcmcia/pcmcia_net.o -DRIVERS-$(CONFIG_NET_WIRELESS) += drivers/net/wireless/wireless_net.o -DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o -DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a -DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o -DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o -DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a -DRIVERS-$(CONFIG_PPC32) += drivers/macintosh/macintosh.o -DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o -DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o -DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o -DRIVERS-$(CONFIG_VT) += drivers/video/video.o -DRIVERS-$(CONFIG_PARIDE) += drivers/block/paride/paride.a -DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o -DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a -DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o -DRIVERS-$(CONFIG_USB_GADGET) += drivers/usb/gadget/built-in.o -DRIVERS-y +=drivers/media/media.o -DRIVERS-$(CONFIG_INPUT) += drivers/input/inputdrv.o -DRIVERS-$(CONFIG_HIL) += drivers/hil/hil.o -DRIVERS-$(CONFIG_I2O) += drivers/message/i2o/i2o.o -DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o -DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o -DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o -DRIVERS-$(CONFIG_GSC) += drivers/gsc/gscbus.o -DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o -DRIVERS-$(CONFIG_HOTPLUG_PCI) += drivers/hotplug/vmlinux-obj.o -DRIVERS-$(CONFIG_ISDN_BOOL) += drivers/isdn/vmlinux-obj.o -DRIVERS-$(CONFIG_CRYPTO) += crypto/crypto.o - -DRIVERS := $(DRIVERS-y) - - -# files removed with 'make clean' -CLEAN_FILES = \ - kernel/ksyms.lst include/linux/compile.h \ - vmlinux System.map \ - .tmp* \ - drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \ - drivers/char/conmakehash \ - drivers/char/drm/*-mod.c \ - drivers/pci/devlist.h drivers/pci/classlist.h drivers/pci/gen-devlist \ - drivers/zorro/devlist.h drivers/zorro/gen-devlist \ - drivers/sound/bin2hex drivers/sound/hex2hex \ - drivers/atm/fore200e_mkfirm drivers/atm/{pca,sba}*{.bin,.bin1,.bin2} \ - drivers/scsi/aic7xxx/aicasm/aicasm \ - drivers/scsi/aic7xxx/aicasm/aicasm_gram.c \ - drivers/scsi/aic7xxx/aicasm/aicasm_gram.h \ - drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.c \ - drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.h \ - drivers/scsi/aic7xxx/aicasm/aicasm_macro_scan.c \ - drivers/scsi/aic7xxx/aicasm/aicasm_scan.c \ - drivers/scsi/aic7xxx/aicasm/aicdb.h \ - drivers/scsi/aic7xxx/aicasm/y.tab.h \ - drivers/scsi/53c700_d.h \ - drivers/tc/lk201-map.c \ - net/khttpd/make_times_h \ - net/khttpd/times.h \ - submenu* \ - drivers/ieee1394/oui.c -# directories removed with 'make clean' -CLEAN_DIRS = \ - modules - -# files removed with 'make mrproper' -MRPROPER_FILES = \ - include/linux/autoconf.h include/linux/version.h \ - lib/crc32table.h lib/gen_crc32table \ - drivers/net/hamradio/soundmodem/sm_tbl_{afsk1200,afsk2666,fsk9600}.h \ - drivers/net/hamradio/soundmodem/sm_tbl_{hapn4800,psk4800}.h \ - drivers/net/hamradio/soundmodem/sm_tbl_{afsk2400_7,afsk2400_8}.h \ - drivers/net/hamradio/soundmodem/gentbl \ - drivers/sound/*_boot.h drivers/sound/.*.boot \ - drivers/sound/msndinit.c \ - drivers/sound/msndperm.c \ - drivers/sound/pndsperm.c \ - drivers/sound/pndspini.c \ - drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \ - .version .config* config.in config.old \ - scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \ - scripts/lxdialog/*.o scripts/lxdialog/lxdialog \ - .menuconfig.log \ - include/asm \ - .hdepend scripts/mkdep scripts/split-include scripts/docproc \ - $(TOPDIR)/include/linux/modversions.h \ - kernel.spec - -# directories removed with 'make mrproper' -MRPROPER_DIRS = \ - include/config \ - $(TOPDIR)/include/linux/modules - - -include arch/$(ARCH)/Makefile - -# Extra cflags for kbuild 2.4. The default is to forbid includes by kernel code -# from user space headers. Some UML code requires user space headers, in the -# UML Makefiles add 'kbuild_2_4_nostdinc :=' before include Rules.make. No -# other kernel code should include user space headers, if you need -# 'kbuild_2_4_nostdinc :=' or -I/usr/include for kernel code and you are not UML -# then your code is broken! KAO. - -kbuild_2_4_nostdinc := -nostdinc -iwithprefix include -export kbuild_2_4_nostdinc - -export CPPFLAGS CFLAGS CFLAGS_KERNEL AFLAGS AFLAGS_KERNEL - -export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS - -.S.s: - $(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -o $*.s $< -.S.o: - $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -c -o $*.o $< - -Version: dummy - @rm -f include/linux/compile.h - -boot: vmlinux - @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot - -vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o init/do_mounts.o linuxsubdirs - $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \ - --start-group \ - $(CORE_FILES) \ - $(DRIVERS) \ - $(NETWORKS) \ - $(LIBS) \ - --end-group \ - -o vmlinux - $(NM) vmlinux | grep -v '$compiled$\|$\.o$$$\|$ [aUw] $\|$\.\.ng$$$\|$LASH[RL]DI$' | sort > System.map - -symlinks: - rm -f include/asm - ( cd include ; ln -sf asm-$(ARCH) asm) - @if [ ! -d include/linux/modules ]; then \ - mkdir include/linux/modules; \ - fi - -oldconfig: symlinks - $(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in - -xconfig: symlinks - $(MAKE) -C scripts kconfig.tk - wish -f scripts/kconfig.tk - -menuconfig: include/linux/version.h symlinks - $(MAKE) -C scripts/lxdialog all - $(CONFIG_SHELL) scripts/Menuconfig arch/$(ARCH)/config.in - -config: symlinks - $(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in - -include/config/MARKER: scripts/split-include include/linux/autoconf.h - scripts/split-include include/linux/autoconf.h include/config - @ touch include/config/MARKER - -linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS)) - -$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER - $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@) - -$(TOPDIR)/include/linux/version.h: include/linux/version.h -$(TOPDIR)/include/linux/compile.h: include/linux/compile.h - -newversion: - . scripts/mkversion > .tmpversion - @mv -f .tmpversion .version - -uts_len := 64 -uts_truncate := sed -e 's/$.\{1,$(uts_len)\}$.*/\1/' - -include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion - @echo -n \#`cat .version` > .ver1 - @if [ -n "$(CONFIG_SMP)" ] ; then echo -n " SMP" >> .ver1; fi - @if [ -f .name ]; then echo -n \-`cat .name` >> .ver1; fi - @LANG=C echo ' '`date` >> .ver1 - @echo \#define UTS_VERSION \"`cat .ver1 | $(uts_truncate)`\" > .ver - @LANG=C echo \#define LINUX_COMPILE_TIME \"`date +%T`\" >> .ver - @echo \#define LINUX_COMPILE_BY \"`whoami`\" >> .ver - @echo \#define LINUX_COMPILE_HOST \"`hostname | $(uts_truncate)`\" >> .ver - @([ -x /bin/dnsdomainname ] && /bin/dnsdomainname > .ver1) || \ - ([ -x /bin/domainname ] && /bin/domainname > .ver1) || \ - echo > .ver1 - @echo \#define LINUX_COMPILE_DOMAIN \"`cat .ver1 | $(uts_truncate)`\" >> .ver - @echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -n 1`\" >> .ver - @mv -f .ver $@ - @rm -f .ver1 - -include/linux/version.h: ./Makefile - @expr length "$(KERNELRELEASE)" \<= $(uts_len) > /dev/null || \ - (echo KERNELRELEASE \"$(KERNELRELEASE)\" exceeds $(uts_len) characters >&2; false) - @echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver - @echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver - @echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver - @mv -f .ver $@ - -comma := , - -init/version.o: init/version.c include/linux/compile.h include/config/MARKER - $(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DUTS_MACHINE='"$(SUBARCH)"' -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o init/version.o init/version.c - -init/main.o: init/main.c include/config/MARKER - $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o $@ $< - -init/do_mounts.o: init/do_mounts.c include/config/MARKER - $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o $@ $< - -fs lib mm ipc kernel drivers net: dummy - $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@) - -TAGS: dummy - { find include/asm-${ARCH} -name '*.h' -print ; \ - find include -type d $ -name "asm-*" -o -name config $ -prune -o -name '*.h' -print ; \ - find $(SUBDIRS) init arch/${ARCH} -name '*.[chS]' ; } | grep -v SCCS | grep -v '\.svn' | etags - - -# Exuberant ctags works better with -I -tags: dummy - CTAGSF=`ctags --version | grep -i exuberant >/dev/null && echo "-I __initdata,__exitdata,EXPORT_SYMBOL,EXPORT_SYMBOL_NOVERS"`; \ - ctags $$CTAGSF `find include/asm-$(ARCH) -name '*.h'` && \ - find include -type d $ -name "asm-*" -o -name config $ -prune -o -name '*.h' -print | xargs ctags $$CTAGSF -a && \ - find $(SUBDIRS) init -name '*.[ch]' | xargs ctags $$CTAGSF -a - -ifdef CONFIG_MODULES -ifdef CONFIG_MODVERSIONS -MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h -endif - -.PHONY: modules -modules: $(patsubst %, _mod_%, $(SUBDIRS)) - -.PHONY: $(patsubst %, _mod_%, $(SUBDIRS)) -$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h include/config/MARKER - $(MAKE) -C $(patsubst _mod_%, %, $@) CFLAGS="$(CFLAGS) $(MODFLAGS)" MAKING_MODULES=1 modules - -.PHONY: modules_install -modules_install: _modinst_ $(patsubst %, _modinst_%, $(SUBDIRS)) _modinst_post - -.PHONY: _modinst_ -_modinst_: - @rm -rf $(MODLIB)/kernel - @rm -f $(MODLIB)/build - @mkdir -p $(MODLIB)/kernel - @ln -s $(TOPDIR) $(MODLIB)/build - -# If System.map exists, run depmod. This deliberately does not have a -# dependency on System.map since that would run the dependency tree on -# vmlinux. This depmod is only for convenience to give the initial -# boot a modules.dep even before / is mounted read-write. However the -# boot script depmod is the master version. -ifeq "$(strip $(INSTALL_MOD_PATH))" "" -depmod_opts := -else -depmod_opts := -b $(INSTALL_MOD_PATH) -r -endif -.PHONY: _modinst_post -_modinst_post: _modinst_post_pcmcia - if [ -r System.map ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi - -# Backwards compatibilty symlinks for people still using old versions -# of pcmcia-cs with hard coded pathnames on insmod. Remove -# _modinst_post_pcmcia for kernel 2.4.1. -.PHONY: _modinst_post_pcmcia -_modinst_post_pcmcia: - cd $(MODLIB); \ - mkdir -p pcmcia; \ - find kernel -path '*/pcmcia/*' -name '*.o' | xargs -i -r ln -sf ../{} pcmcia - -.PHONY: $(patsubst %, _modinst_%, $(SUBDIRS)) -$(patsubst %, _modinst_%, $(SUBDIRS)) : - $(MAKE) -C $(patsubst _modinst_%, %, $@) modules_install - -# modules disabled.... - -else -modules modules_install: dummy - @echo - @echo "The present kernel configuration has modules disabled." - @echo "Type 'make config' and enable loadable module support." - @echo "Then build a kernel with module support enabled." - @echo - @exit 1 -endif - -clean: archclean - find . $ -name '*.[oas]' -o -name core -o -name '.*.flags' $ -type f -print \ - | grep -v lxdialog/ | xargs rm -f - rm -f $(CLEAN_FILES) - rm -rf $(CLEAN_DIRS) - $(MAKE) -C Documentation/DocBook clean - -mrproper: clean archmrproper - find . $ -size 0 -o -name .depend $ -type f -print | xargs rm -f - rm -f $(MRPROPER_FILES) - rm -rf $(MRPROPER_DIRS) - $(MAKE) -C Documentation/DocBook mrproper - -distclean: mrproper - rm -f core `find . $ -not -type d $ -and \ - $ -name '*.orig' -o -name '*.rej' -o -name '*~' \ - -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ - -o -name '.*.rej' -o -name '.SUMS' -o -size 0 $ -type f -print` TAGS tags - -backup: mrproper - cd .. && tar cf - linux/ | gzip -9 > backup.gz - sync - -sgmldocs: - chmod 755 $(TOPDIR)/scripts/docgen - chmod 755 $(TOPDIR)/scripts/gen-all-syms - chmod 755 $(TOPDIR)/scripts/kernel-doc - $(MAKE) -C $(TOPDIR)/Documentation/DocBook books - -psdocs: sgmldocs - $(MAKE) -C Documentation/DocBook ps - -pdfdocs: sgmldocs - $(MAKE) -C Documentation/DocBook pdf - -htmldocs: sgmldocs - $(MAKE) -C Documentation/DocBook html - -mandocs: - chmod 755 $(TOPDIR)/scripts/kernel-doc - chmod 755 $(TOPDIR)/scripts/split-man - $(MAKE) -C Documentation/DocBook man - -sums: - find . -type f -print | sort | xargs sum > .SUMS - -dep-files: scripts/mkdep archdep include/linux/version.h - rm -f .depend .hdepend - $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" -ifdef CONFIG_MODVERSIONS - $(MAKE) update-modverfile -endif - scripts/mkdep -- `find $(FINDHPATH) $ -name SCCS -o -name .svn $ -prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend - scripts/mkdep -- init/*.c > .depend - -ifdef CONFIG_MODVERSIONS -MODVERFILE := $(TOPDIR)/include/linux/modversions.h -else -MODVERFILE := -endif -export MODVERFILE - -depend dep: dep-files - -checkconfig: - find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkconfig.pl - -checkhelp: - find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w scripts/checkhelp.pl - -checkincludes: - find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkincludes.pl - -ifdef CONFIGURATION -..$(CONFIGURATION): - @echo - @echo "You have a bad or nonexistent" .$(CONFIGURATION) ": running 'make" $(CONFIGURATION)"'" - @echo - $(MAKE) $(CONFIGURATION) - @echo - @echo "Successful. Try re-making (ignore the error that follows)" - @echo - exit 1 - -#dummy: ..$(CONFIGURATION) -dummy: - -else - -dummy: - -endif - -include Rules.make - -# -# This generates dependencies for the .h files. -# - -scripts/mkdep: scripts/mkdep.c - $(HOSTCC) $(HOSTCFLAGS) -o scripts/mkdep scripts/mkdep.c - -scripts/split-include: scripts/split-include.c - $(HOSTCC) $(HOSTCFLAGS) -o scripts/split-include scripts/split-include.c - -# -# RPM target -# -# If you do a make spec before packing the tarball you can rpm -ta it -# -spec: - . scripts/mkspec >kernel.spec - -# -# Build a tar ball, generate an rpm from it and pack the result -# There arw two bits of magic here -# 1) The use of /. to avoid tar packing just the symlink -# 2) Removing the .dep files as they have source paths in them that -# will become invalid -# -rpm: clean spec - find . $ -size 0 -o -name .depend -o -name .hdepend $ -type f -print | xargs rm -f - set -e; \ - cd $(TOPDIR)/.. ; \ - ln -sf $(TOPDIR) $(KERNELPATH) ; \ - tar -cvz --exclude CVS -f $(KERNELPATH).tar.gz $(KERNELPATH)/. ; \ - rm $(KERNELPATH) ; \ - cd $(TOPDIR) ; \ - . scripts/mkversion > .version ; \ - $(RPM) -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \ - rm $(TOPDIR)/../$(KERNELPATH).tar.gz diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/Makefile --- a/linux-2.4-xen-sparse/arch/xen/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,137 +0,0 @@ -# -# xen/Makefile -# -# This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1994 by Linus Torvalds -# -# 19990713 Artur Skawina <skawina@xxxxxxxxxxxxx> -# Added '-march' and '-mpreferred-stack-boundary' support -# - -# If no .config file exists then use the appropriate defconfig-* file -ifneq (.config,$(wildcard .config)) -DUMMYX:=$(shell cp $(TOPDIR)/arch/xen/defconfig$(EXTRAVERSION) $(TOPDIR)/.config) --include $(TOPDIR)/.config -endif - -LD=$(CROSS_COMPILE)ld -m elf_i386 -OBJCOPY=$(CROSS_COMPILE)objcopy -R .note -R .comment -S -LDFLAGS=-e stext -LINKFLAGS =-T $(TOPDIR)/arch/xen/vmlinux.lds $(LDFLAGS) - -CFLAGS += -pipe - -check_gcc = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi) - -# prevent gcc from keeping the stack 16 byte aligned -CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,) - -ifdef CONFIG_M686 -CFLAGS += -march=i686 -endif - -ifdef CONFIG_MPENTIUMIII -CFLAGS += -march=i686 -endif - -ifdef CONFIG_MPENTIUM4 -CFLAGS += -march=i686 -endif - -ifdef CONFIG_MK7 -CFLAGS += $(call check_gcc,-march=athlon,-march=i686 -malign-functions=4) -endif - -# Disable unit-at-a-time mode, it makes gcc use a lot more stack -# due to the lack of sharing of stacklots. -CFLAGS += $(call check_gcc,-fno-unit-at-a-time,) - -HEAD := arch/xen/kernel/head.o arch/xen/kernel/init_task.o - -SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib -SUBDIRS += arch/xen/drivers/console -SUBDIRS += arch/xen/drivers/evtchn -SUBDIRS += arch/xen/drivers/blkif -SUBDIRS += arch/xen/drivers/netif -SUBDIRS += arch/xen/drivers/balloon -ifdef CONFIG_XEN_PRIVILEGED_GUEST -SUBDIRS += arch/xen/drivers/dom0 -endif - -CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o -CORE_FILES += arch/xen/drivers/evtchn/drv.o -CORE_FILES += arch/xen/drivers/console/drv.o -DRIVERS += arch/xen/drivers/blkif/drv.o -DRIVERS += arch/xen/drivers/netif/drv.o -ifdef CONFIG_XEN_PRIVILEGED_GUEST -CORE_FILES += arch/xen/drivers/dom0/drv.o -endif -CORE_FILES += arch/xen/drivers/balloon/drv.o -LIBS := $(TOPDIR)/arch/xen/lib/lib.a $(LIBS) $(TOPDIR)/arch/xen/lib/lib.a - -arch/xen/kernel: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/kernel - -arch/xen/mm: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/mm - -arch/xen/drivers/console: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/console - -arch/xen/drivers/network: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/network - -arch/xen/drivers/block: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/block - -arch/xen/drivers/dom0: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/dom0 - -arch/xen/drivers/balloon: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/balloon - -MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot - -vmlinux: arch/xen/vmlinux.lds - -FORCE: ; - -.PHONY: bzImage compressed clean archclean archmrproper archdep - - -bzImage: vmlinux - @$(MAKEBOOT) bzImage - -INSTALL_NAME ?= $(KERNELRELEASE) -install: bzImage - mkdir -p $(INSTALL_PATH)/boot - ln -f -s vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(XENGUEST)$(INSTALL_SUFFIX) - rm -f $(INSTALL_PATH)/boot/vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) - install -m0644 arch/$(ARCH)/boot/bzImage $(INSTALL_PATH)/boot/vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) - install -m0644 vmlinux $(INSTALL_PATH)/boot/vmlinux-syms-$(INSTALL_NAME)$(INSTALL_SUFFIX) - install -m0664 .config $(INSTALL_PATH)/boot/config-$(INSTALL_NAME)$(INSTALL_SUFFIX) - install -m0664 System.map $(INSTALL_PATH)/boot/System.map-$(INSTALL_NAME)$(INSTALL_SUFFIX) - ln -f -s vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(XENGUEST)$(INSTALL_SUFFIX) - -%_config: arch/xen/defconfig-% - rm -f .config arch/xen/defconfig - cp -f arch/xen/defconfig-$(@:_config=) arch/xen/defconfig - cp -f arch/xen/defconfig-$(@:_config=) .config - - -archclean: - @$(MAKEBOOT) clean - -archmrproper: - rm -f include/asm-xen/xen-public/arch - -archdep: - @$(MAKEBOOT) dep diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/boot/Makefile --- a/linux-2.4-xen-sparse/arch/xen/boot/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,13 +0,0 @@ -# -# arch/xen/boot/Makefile -# - -bzImage: $(TOPDIR)/vmlinux - $(OBJCOPY) $< Image - gzip -f -9 < Image > $@ - rm -f Image - -dep: - -clean: - rm -f bzImage Image diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/config.in --- a/linux-2.4-xen-sparse/arch/xen/config.in Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,327 +0,0 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/config-language.txt. -# -mainmenu_name "Linux Kernel Configuration" - -define_bool CONFIG_XEN y - -define_bool CONFIG_X86 y -define_bool CONFIG_ISA y -define_bool CONFIG_SBUS n - -define_bool CONFIG_UID16 y - -mainmenu_option next_comment -comment 'Xen' -bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST -bool 'Device-driver domain (physical device access)' CONFIG_XEN_PHYSDEV_ACCESS -bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES -bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND -bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND -bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT -endmenu -# The IBM S/390 patch needs this. -define_bool CONFIG_NO_IDLE_HZ y - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - define_bool CONFIG_FOREIGN_PAGES y -else - define_bool CONFIG_FOREIGN_PAGES n - define_bool CONFIG_NETDEVICES y - define_bool CONFIG_VT n -fi - -mainmenu_option next_comment -comment 'Code maturity level options' -bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL -endmenu - -mainmenu_option next_comment -comment 'Loadable module support' -bool 'Enable loadable module support' CONFIG_MODULES -if [ "$CONFIG_MODULES" = "y" ]; then - bool ' Set version information on all module symbols' CONFIG_MODVERSIONS - bool ' Kernel module loader' CONFIG_KMOD -fi -endmenu - -mainmenu_option next_comment -comment 'Processor type and features' -choice 'Processor family' \ - "Pentium-Pro/Celeron/Pentium-II CONFIG_M686 \ - Pentium-III/Celeron(Coppermine) CONFIG_MPENTIUMIII \ - Pentium-4 CONFIG_MPENTIUM4 \ - Athlon/Duron/K7 CONFIG_MK7 \ - Opteron/Athlon64/Hammer/K8 CONFIG_MK8 \ - VIA-C3-2 CONFIG_MVIAC3_2" Pentium-Pro - - define_bool CONFIG_X86_WP_WORKS_OK y - define_bool CONFIG_X86_INVLPG y - define_bool CONFIG_X86_CMPXCHG y - define_bool CONFIG_X86_XADD y - define_bool CONFIG_X86_BSWAP y - define_bool CONFIG_X86_POPAD_OK y - define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n - define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y - - define_bool CONFIG_X86_GOOD_APIC y - define_bool CONFIG_X86_PGE y - define_bool CONFIG_X86_USE_PPRO_CHECKSUM y - define_bool CONFIG_X86_TSC y - -if [ "$CONFIG_M686" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 5 -fi -if [ "$CONFIG_MPENTIUMIII" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 5 -fi -if [ "$CONFIG_MPENTIUM4" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 7 -fi -if [ "$CONFIG_MK8" = "y" ]; then - define_bool CONFIG_MK7 y -fi -if [ "$CONFIG_MK7" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 6 - define_bool CONFIG_X86_USE_3DNOW y -fi -if [ "$CONFIG_MVIAC3_2" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 5 -fi - -#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -# tristate 'BIOS Enhanced Disk Drive calls determine boot disk (EXPERIMENTAL)' CONFIG_EDD -#fi - -choice 'High Memory Support' \ - "off CONFIG_NOHIGHMEM \ - 4GB CONFIG_HIGHMEM4G" off -# 64GB CONFIG_HIGHMEM64G" off -if [ "$CONFIG_HIGHMEM4G" = "y" ]; then - define_bool CONFIG_HIGHMEM y -fi -if [ "$CONFIG_HIGHMEM64G" = "y" ]; then - define_bool CONFIG_HIGHMEM y - define_bool CONFIG_X86_PAE y -fi - -if [ "$CONFIG_HIGHMEM" = "y" ]; then - bool 'HIGHMEM I/O support' CONFIG_HIGHIO -fi - -define_int CONFIG_FORCE_MAX_ZONEORDER 11 - -#bool 'Symmetric multi-processing support' CONFIG_SMP -#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then -# define_bool CONFIG_HAVE_DEC_LOCK y -#fi -endmenu - -mainmenu_option next_comment -comment 'General setup' - -bool 'Networking support' CONFIG_NET - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - bool 'PCI support' CONFIG_PCI - source drivers/pci/Config.in - - bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG - - if [ "$CONFIG_HOTPLUG" = "y" ] ; then - source drivers/pcmcia/Config.in - source drivers/hotplug/Config.in - else - define_bool CONFIG_PCMCIA n - define_bool CONFIG_HOTPLUG_PCI n - fi -fi - -bool 'System V IPC' CONFIG_SYSVIPC -bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT -bool 'Sysctl support' CONFIG_SYSCTL -if [ "$CONFIG_PROC_FS" = "y" ]; then - choice 'Kernel core (/proc/kcore) format' \ - "ELF CONFIG_KCORE_ELF \ - A.OUT CONFIG_KCORE_AOUT" ELF -fi -tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF -tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC -bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER - -endmenu - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - source drivers/mtd/Config.in - - source drivers/parport/Config.in - - source drivers/pnp/Config.in - - source drivers/block/Config.in - - source drivers/md/Config.in -fi - -if [ "$CONFIG_NET" = "y" ]; then - source net/Config.in -fi - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - mainmenu_option next_comment - comment 'ATA/IDE/MFM/RLL support' - - tristate 'ATA/IDE/MFM/RLL support' CONFIG_IDE - - if [ "$CONFIG_IDE" != "n" ]; then - source drivers/ide/Config.in - else - define_bool CONFIG_BLK_DEV_HD n - fi - endmenu -fi - -mainmenu_option next_comment -comment 'SCSI support' - -tristate 'SCSI support' CONFIG_SCSI - -if [ "$CONFIG_SCSI" != "n" ]; then - source drivers/scsi/Config.in -fi -endmenu - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - source drivers/message/fusion/Config.in - - source drivers/ieee1394/Config.in - - source drivers/message/i2o/Config.in - - if [ "$CONFIG_NET" = "y" ]; then - mainmenu_option next_comment - comment 'Network device support' - - bool 'Network device support' CONFIG_NETDEVICES - if [ "$CONFIG_NETDEVICES" = "y" ]; then - source drivers/net/Config.in - if [ "$CONFIG_ATM" = "y" -o "$CONFIG_ATM" = "m" ]; then - source drivers/atm/Config.in - fi - fi - endmenu - fi - - source net/ax25/Config.in - - source net/irda/Config.in - - mainmenu_option next_comment - comment 'ISDN subsystem' - if [ "$CONFIG_NET" != "n" ]; then - tristate 'ISDN support' CONFIG_ISDN - if [ "$CONFIG_ISDN" != "n" ]; then - source drivers/isdn/Config.in - fi - fi - endmenu - - if [ "$CONFIG_ISA" = "y" ]; then - mainmenu_option next_comment - comment 'Old CD-ROM drivers (not SCSI, not IDE)' - - bool 'Support non-SCSI/IDE/ATAPI CDROM drives' CONFIG_CD_NO_IDESCSI - if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then - source drivers/cdrom/Config.in - fi - endmenu - fi - - # - # input before char - char/joystick depends on it. As does USB. - # - source drivers/input/Config.in -else - # - # Block device driver configuration - # - mainmenu_option next_comment - comment 'Block devices' - tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP - dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET - tristate 'RAM disk support' CONFIG_BLK_DEV_RAM - if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then - int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 - fi - dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM - bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS - define_bool CONFIG_BLK_DEV_HD n - endmenu -fi - -source drivers/char/Config.in - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - source drivers/media/Config.in -fi - -source fs/Config.in - -mainmenu_option next_comment -comment 'Console drivers' - -define_bool CONFIG_XEN_CONSOLE y - -if [ "$CONFIG_VT" = "y" ]; then - bool 'VGA text console' CONFIG_VGA_CONSOLE - bool 'Dummy console' CONFIG_DUMMY_CONSOLE - if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - bool 'Video mode selection support' CONFIG_VIDEO_SELECT - if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - tristate 'MDA text console (dual-headed) (EXPERIMENTAL)' CONFIG_MDA_CONSOLE - source drivers/video/Config.in - fi - fi -fi -endmenu - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - mainmenu_option next_comment - comment 'Sound' - - tristate 'Sound card support' CONFIG_SOUND - if [ "$CONFIG_SOUND" != "n" ]; then - source drivers/sound/Config.in - fi - endmenu -fi - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - source drivers/usb/Config.in - source net/bluetooth/Config.in -fi - -mainmenu_option next_comment -comment 'Kernel hacking' - -bool 'Kernel debugging' CONFIG_DEBUG_KERNEL -if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then - bool ' Check for stack overflows' CONFIG_DEBUG_STACKOVERFLOW - bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM - bool ' Debug memory allocations' CONFIG_DEBUG_SLAB - bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT - bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ - bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK - bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE - bool ' Load all symbols for debugging' CONFIG_KALLSYMS - bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER -fi - -int 'Kernel messages buffer length shift (0 = default)' CONFIG_LOG_BUF_SHIFT 0 - -endmenu - -source crypto/Config.in -source lib/Config.in diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/defconfig-xen0 --- a/linux-2.4-xen-sparse/arch/xen/defconfig-xen0 Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,927 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_XEN=y -CONFIG_X86=y -CONFIG_ISA=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Xen -# -CONFIG_XEN_PRIVILEGED_GUEST=y -CONFIG_XEN_PHYSDEV_ACCESS=y -# CONFIG_XEN_USB_BACKEND is not set -CONFIG_XEN_SCRUB_PAGES=y -CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_BLKDEV_FRONTEND=y -CONFIG_XEN_BLKDEV_GRANT=y -# CONFIG_XEN_USB_FRONTEND is not set -CONFIG_NO_IDLE_HZ=y -CONFIG_FOREIGN_PAGES=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MVIAC3_2 is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_TSC=y -CONFIG_X86_L1_CACHE_SHIFT=5 -CONFIG_NOHIGHMEM=y -# CONFIG_HIGHMEM4G is not set -CONFIG_FORCE_MAX_ZONEORDER=11 - -# -# General setup -# -CONFIG_NET=y -CONFIG_PCI=y -CONFIG_PCI_NAMES=y -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -# CONFIG_PCMCIA is not set - -# -# PCI Hotplug Support -# -# CONFIG_HOTPLUG_PCI is not set -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_SHPC is not set -# CONFIG_HOTPLUG_PCI_SHPC_POLL_EVENT_MODE is not set -# CONFIG_HOTPLUG_PCI_PCIE is not set -# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_OOM_KILLER is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Plug and Play configuration -# -CONFIG_PNP=y -# CONFIG_ISAPNP is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_XD is not set -# CONFIG_PARIDE is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_CISS_SCSI_TAPE is not set -# CONFIG_CISS_MONITOR_THREAD is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -# CONFIG_BLK_DEV_SX8 is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_BLK_STATS is not set - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=y -# CONFIG_MD_RAID0 is not set -CONFIG_MD_RAID1=y -# CONFIG_MD_RAID5 is not set -# CONFIG_MD_MULTIPATH is not set -CONFIG_BLK_DEV_LVM=y - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -# CONFIG_SYN_COOKIES is not set - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -# CONFIG_IP_NF_AMANDA is not set -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -# CONFIG_IP_NF_QUEUE is not set -CONFIG_IP_NF_IPTABLES=y -# CONFIG_IP_NF_MATCH_LIMIT is not set -# CONFIG_IP_NF_MATCH_MAC is not set -# CONFIG_IP_NF_MATCH_PKTTYPE is not set -# CONFIG_IP_NF_MATCH_MARK is not set -# CONFIG_IP_NF_MATCH_MULTIPORT is not set -# CONFIG_IP_NF_MATCH_TOS is not set -# CONFIG_IP_NF_MATCH_RECENT is not set -# CONFIG_IP_NF_MATCH_ECN is not set -# CONFIG_IP_NF_MATCH_DSCP is not set -# CONFIG_IP_NF_MATCH_AH_ESP is not set -# CONFIG_IP_NF_MATCH_LENGTH is not set -# CONFIG_IP_NF_MATCH_TTL is not set -# CONFIG_IP_NF_MATCH_TCPMSS is not set -# CONFIG_IP_NF_MATCH_HELPER is not set -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -# CONFIG_IP_NF_MATCH_UNCLEAN is not set -# CONFIG_IP_NF_MATCH_OWNER is not set -CONFIG_IP_NF_MATCH_PHYSDEV=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_REJECT=y -# CONFIG_IP_NF_TARGET_MIRROR is not set -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -# CONFIG_IP_NF_NAT_SNMP_BASIC is not set -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -# CONFIG_IP_NF_MANGLE is not set -CONFIG_IP_NF_TARGET_LOG=y -CONFIG_IP_NF_TARGET_ULOG=y -# CONFIG_IP_NF_TARGET_TCPMSS is not set -# CONFIG_IP_NF_ARPTABLES is not set - -# -# IP: Virtual Server Configuration -# -# CONFIG_IP_VS is not set -# CONFIG_IPV6 is not set -# CONFIG_KHTTPD is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -CONFIG_VLAN_8021Q=y -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_DECNET is not set -CONFIG_BRIDGE=y -CONFIG_BRIDGE_NF_EBTABLES=m -CONFIG_BRIDGE_EBT_T_FILTER=m -CONFIG_BRIDGE_EBT_T_NAT=m -CONFIG_BRIDGE_EBT_BROUTE=m -CONFIG_BRIDGE_EBT_LOG=m -CONFIG_BRIDGE_EBT_IPF=m -CONFIG_BRIDGE_EBT_ARPF=m -CONFIG_BRIDGE_EBT_AMONG=m -CONFIG_BRIDGE_EBT_LIMIT=m -CONFIG_BRIDGE_EBT_VLANF=m -CONFIG_BRIDGE_EBT_802_3=m -CONFIG_BRIDGE_EBT_PKTTYPE=m -CONFIG_BRIDGE_EBT_STP=m -CONFIG_BRIDGE_EBT_MARKF=m -CONFIG_BRIDGE_EBT_ARPREPLY=m -CONFIG_BRIDGE_EBT_SNAT=m -CONFIG_BRIDGE_EBT_DNAT=m -CONFIG_BRIDGE_EBT_REDIRECT=m -CONFIG_BRIDGE_EBT_MARK_T=m -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -# CONFIG_BLK_DEV_IDE_SATA is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_IDEDISK_STROKE=y -# CONFIG_BLK_DEV_IDECS is not set -# CONFIG_BLK_DEV_DELKIN is not set -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_IDETAPE=y -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=y -CONFIG_IDE_TASK_IOCTL=y -CONFIG_BLK_DEV_CMD640=y -CONFIG_BLK_DEV_CMD640_ENHANCED=y -# CONFIG_BLK_DEV_ISAPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -CONFIG_BLK_DEV_OFFBOARD=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -CONFIG_WDC_ALI15X3=y -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_AMD74XX_OVERRIDE=y -# CONFIG_BLK_DEV_ATIIXP is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_BLK_DEV_NS87415=y -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -CONFIG_PDC202XX_BURST=y -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -CONFIG_BLK_DEV_SC1200=y -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -CONFIG_BLK_DEV_TRM290=y -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_IDE_CHIPSETS=y -# CONFIG_BLK_DEV_4DRIVES is not set -# CONFIG_BLK_DEV_ALI14XX is not set -# CONFIG_BLK_DEV_DTC2278 is not set -# CONFIG_BLK_DEV_HT6560B is not set -# CONFIG_BLK_DEV_PDC4030 is not set -# CONFIG_BLK_DEV_QD65XX is not set -# CONFIG_BLK_DEV_UMC8672 is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -# CONFIG_BLK_DEV_ATARAID is not set -# CONFIG_BLK_DEV_ATARAID_PDC is not set -# CONFIG_BLK_DEV_ATARAID_HPT is not set -# CONFIG_BLK_DEV_ATARAID_MEDLEY is not set -# CONFIG_BLK_DEV_ATARAID_SII is not set - -# -# SCSI support -# -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -# CONFIG_CHR_DEV_ST is not set -# CONFIG_CHR_DEV_OSST is not set -# CONFIG_BLK_DEV_SR is not set -CONFIG_CHR_DEV_SG=y -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -# CONFIG_SCSI_CONSTANTS is not set -# CONFIG_SCSI_LOGGING is not set - -# -# SCSI low-level drivers -# -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=y -CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=y -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -CONFIG_SCSI_MEGARAID=y -# CONFIG_SCSI_MEGARAID2 is not set -CONFIG_SCSI_SATA=y -# CONFIG_SCSI_SATA_AHCI is not set -# CONFIG_SCSI_SATA_SVW is not set -CONFIG_SCSI_ATA_PIIX=y -# CONFIG_SCSI_SATA_NV is not set -# CONFIG_SCSI_SATA_QSTOR is not set -CONFIG_SCSI_SATA_PROMISE=y -CONFIG_SCSI_SATA_SX4=y -CONFIG_SCSI_SATA_SIL=y -CONFIG_SCSI_SATA_SIS=y -# CONFIG_SCSI_SATA_ULI is not set -CONFIG_SCSI_SATA_VIA=y -CONFIG_SCSI_SATA_VITESSE=y -CONFIG_SCSI_BUSLOGIC=y -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_QLOGIC_ISP is not set -# CONFIG_SCSI_QLOGIC_FC is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_SEAGATE is not set -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -# CONFIG_SCSI_ULTRASTOR is not set -# CONFIG_SCSI_NSP32 is not set -# CONFIG_SCSI_DEBUG is not set - -# -# Fusion MPT device support -# -# CONFIG_FUSION is not set -# CONFIG_FUSION_BOOT is not set -# CONFIG_FUSION_ISENSE is not set -# CONFIG_FUSION_CTL is not set -# CONFIG_FUSION_LAN is not set - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set -# CONFIG_I2O_PCI is not set -# CONFIG_I2O_BLOCK is not set -# CONFIG_I2O_LAN is not set -# CONFIG_I2O_SCSI is not set -# CONFIG_I2O_PROC is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -# CONFIG_SUNGEM is not set -CONFIG_NET_VENDOR_3COM=y -# CONFIG_EL1 is not set -# CONFIG_EL2 is not set -# CONFIG_ELPLUS is not set -# CONFIG_EL16 is not set -# CONFIG_EL3 is not set -# CONFIG_3C515 is not set -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=y -# CONFIG_TYPHOON is not set -# CONFIG_LANCE is not set -# CONFIG_NET_VENDOR_SMC is not set -# CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_AT1700 is not set -# CONFIG_DEPCA is not set -# CONFIG_HP100 is not set -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -CONFIG_PCNET32=y -# CONFIG_AMD8111_ETH is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_AC3200 is not set -# CONFIG_APRICOT is not set -# CONFIG_B44 is not set -# CONFIG_CS89x0 is not set -# CONFIG_TULIP is not set -# CONFIG_DE4X5 is not set -# CONFIG_DGRS is not set -# CONFIG_DM9102 is not set -# CONFIG_EEPRO100 is not set -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=y -# CONFIG_LNE390 is not set -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set -CONFIG_NE2K_PCI=y -# CONFIG_FORCEDETH is not set -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_OLD_RX_RESET is not set -# CONFIG_SIS900 is not set -# CONFIG_EPIC100 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_SUNDANCE_MMIO is not set -# CONFIG_TLAN is not set -# CONFIG_VIA_RHINE is not set -# CONFIG_VIA_RHINE_MMIO is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_NET_POCKET is not set - -# -# Ethernet (1000 Mbit) -# -# CONFIG_ACENIC is not set -# CONFIG_DL2K is not set -CONFIG_E1000=y -# CONFIG_E1000_NAPI is not set -# CONFIG_MYRI_SBUS is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set -# CONFIG_SK98LIN is not set -CONFIG_TIGON3=y -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_PLIP is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices -# -# CONFIG_TR is not set -# CONFIG_NET_FC is not set -# CONFIG_RCPCI is not set -# CONFIG_SHAPER is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# IrDA (infrared) support -# -# CONFIG_IRDA is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -# CONFIG_INPUT is not set -# CONFIG_INPUT_KEYBDEV is not set -# CONFIG_INPUT_MOUSEDEV is not set -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_EVDEV is not set -# CONFIG_INPUT_UINPUT is not set - -# -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -# CONFIG_SERIAL is not set -# CONFIG_SERIAL_EXTENDED is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -# CONFIG_MK712_MOUSE is not set - -# -# Joysticks -# -# CONFIG_INPUT_GAMEPORT is not set -# CONFIG_QIC02_TAPE is not set -# CONFIG_IPMI_HANDLER is not set -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_DEVICE_INTERFACE is not set -# CONFIG_IPMI_KCS is not set -# CONFIG_IPMI_WATCHDOG is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_SCx200 is not set -# CONFIG_SCx200_GPIO is not set -# CONFIG_AMD_RNG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set - -# -# Direct Rendering Manager (XFree86 DRI support) -# -# CONFIG_DRM is not set -# CONFIG_MWAVE is not set -# CONFIG_OBMOUSE is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# File systems -# -# CONFIG_QUOTA is not set -# CONFIG_QFMT_V2 is not set -CONFIG_AUTOFS_FS=y -CONFIG_AUTOFS4_FS=y -# CONFIG_REISERFS_FS is not set -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y -CONFIG_UMSDOS_FS=y -CONFIG_VFAT_FS=y -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -# CONFIG_CRAMFS is not set -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set -# CONFIG_XFS_FS is not set -# CONFIG_XFS_QUOTA is not set -# CONFIG_XFS_RT is not set -# CONFIG_XFS_TRACE is not set -# CONFIG_XFS_DEBUG is not set - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_DIRECTIO is not set -CONFIG_ROOT_NFS=y -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -# CONFIG_SMB_FS is not set -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -CONFIG_ZISOFS_FS=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_EFI_PARTITION is not set -# CONFIG_SMB_NLS is not set -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8559-1" -# CONFIG_NLS_CODEPAGE_437 is not set -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set -CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_UTF8 is not set - -# -# Console drivers -# -CONFIG_XEN_CONSOLE=y -CONFIG_VGA_CONSOLE=y -CONFIG_DUMMY_CONSOLE=y -# CONFIG_VIDEO_SELECT is not set -# CONFIG_MDA_CONSOLE is not set - -# -# Frame-buffer support -# -# CONFIG_FB is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB is not set - -# -# Support for USB gadgets -# -# CONFIG_USB_GADGET is not set - -# -# Bluetooth support -# -# CONFIG_BLUEZ is not set - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_STACKOVERFLOW is not set -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_KALLSYMS=y -# CONFIG_FRAME_POINTER is not set -CONFIG_LOG_BUF_SHIFT=0 - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=m -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -# CONFIG_CRYPTO_WP512 is not set -CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_DEFLATE=m -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_TEST is not set - -# -# Library routines -# -# CONFIG_CRC32 is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -# CONFIG_FW_LOADER is not set diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/defconfig-xenU --- a/linux-2.4-xen-sparse/arch/xen/defconfig-xenU Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,562 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_XEN=y -CONFIG_X86=y -CONFIG_ISA=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Xen -# -# CONFIG_XEN_PRIVILEGED_GUEST is not set -# CONFIG_XEN_PHYSDEV_ACCESS is not set -CONFIG_XEN_SCRUB_PAGES=y -CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_BLKDEV_FRONTEND=y -CONFIG_XEN_BLKDEV_GRANT=y -# CONFIG_XEN_USB_FRONTEND is not set -CONFIG_NO_IDLE_HZ=y -# CONFIG_FOREIGN_PAGES is not set -CONFIG_NETDEVICES=y -# CONFIG_VT is not set - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MVIAC3_2 is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_TSC=y -CONFIG_X86_L1_CACHE_SHIFT=5 -CONFIG_NOHIGHMEM=y -# CONFIG_HIGHMEM4G is not set -CONFIG_FORCE_MAX_ZONEORDER=11 - -# -# General setup -# -CONFIG_NET=y -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_OOM_KILLER is not set - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -# CONFIG_SYN_COOKIES is not set - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=y -CONFIG_IP_NF_FTP=y -# CONFIG_IP_NF_AMANDA is not set -CONFIG_IP_NF_TFTP=y -CONFIG_IP_NF_IRC=y -# CONFIG_IP_NF_QUEUE is not set -CONFIG_IP_NF_IPTABLES=y -# CONFIG_IP_NF_MATCH_LIMIT is not set -# CONFIG_IP_NF_MATCH_MAC is not set -# CONFIG_IP_NF_MATCH_PKTTYPE is not set -# CONFIG_IP_NF_MATCH_MARK is not set -# CONFIG_IP_NF_MATCH_MULTIPORT is not set -# CONFIG_IP_NF_MATCH_TOS is not set -# CONFIG_IP_NF_MATCH_RECENT is not set -# CONFIG_IP_NF_MATCH_ECN is not set -# CONFIG_IP_NF_MATCH_DSCP is not set -# CONFIG_IP_NF_MATCH_AH_ESP is not set -# CONFIG_IP_NF_MATCH_LENGTH is not set -# CONFIG_IP_NF_MATCH_TTL is not set -# CONFIG_IP_NF_MATCH_TCPMSS is not set -# CONFIG_IP_NF_MATCH_HELPER is not set -CONFIG_IP_NF_MATCH_STATE=y -CONFIG_IP_NF_MATCH_CONNTRACK=y -# CONFIG_IP_NF_MATCH_UNCLEAN is not set -# CONFIG_IP_NF_MATCH_OWNER is not set -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_REJECT=y -# CONFIG_IP_NF_TARGET_MIRROR is not set -CONFIG_IP_NF_NAT=y -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=y -CONFIG_IP_NF_TARGET_REDIRECT=y -# CONFIG_IP_NF_NAT_SNMP_BASIC is not set -CONFIG_IP_NF_NAT_IRC=y -CONFIG_IP_NF_NAT_FTP=y -CONFIG_IP_NF_NAT_TFTP=y -# CONFIG_IP_NF_MANGLE is not set -CONFIG_IP_NF_TARGET_LOG=y -CONFIG_IP_NF_TARGET_ULOG=y -# CONFIG_IP_NF_TARGET_TCPMSS is not set -# CONFIG_IP_NF_ARPTABLES is not set - -# -# IP: Virtual Server Configuration -# -# CONFIG_IP_VS is not set -# CONFIG_IPV6 is not set -# CONFIG_KHTTPD is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -CONFIG_VLAN_8021Q=y - -# -# -# -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# SCSI support -# -CONFIG_SCSI=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -# CONFIG_CHR_DEV_ST is not set -# CONFIG_CHR_DEV_OSST is not set -# CONFIG_BLK_DEV_SR is not set -CONFIG_CHR_DEV_SG=y - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -# CONFIG_SCSI_CONSTANTS is not set -# CONFIG_SCSI_LOGGING is not set - -# -# SCSI low-level drivers -# -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -# CONFIG_SCSI_AACRAID is not set -# CONFIG_SCSI_AIC7XXX is not set -# CONFIG_SCSI_AIC79XX is not set -# CONFIG_SCSI_AIC7XXX_OLD is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -# CONFIG_SCSI_MEGARAID is not set -# CONFIG_SCSI_MEGARAID2 is not set -# CONFIG_SCSI_SATA is not set -# CONFIG_SCSI_SATA_AHCI is not set -# CONFIG_SCSI_SATA_SVW is not set -# CONFIG_SCSI_ATA_PIIX is not set -# CONFIG_SCSI_SATA_NV is not set -# CONFIG_SCSI_SATA_QSTOR is not set -# CONFIG_SCSI_SATA_PROMISE is not set -# CONFIG_SCSI_SATA_SX4 is not set -# CONFIG_SCSI_SATA_SIL is not set -# CONFIG_SCSI_SATA_SIS is not set -# CONFIG_SCSI_SATA_ULI is not set -# CONFIG_SCSI_SATA_VIA is not set -# CONFIG_SCSI_SATA_VITESSE is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_PPA is not set -# CONFIG_SCSI_IMM is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_SEAGATE is not set -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -# CONFIG_SCSI_ULTRASTOR is not set -# CONFIG_SCSI_NSP32 is not set -# CONFIG_SCSI_DEBUG is not set - -# -# Block devices -# -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_BLK_STATS is not set -# CONFIG_BLK_DEV_HD is not set - -# -# Character devices -# -# CONFIG_VT is not set -# CONFIG_SERIAL is not set -# CONFIG_SERIAL_EXTENDED is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -# CONFIG_PRINTER is not set -# CONFIG_PPDEV is not set -# CONFIG_TIPAR is not set - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -# CONFIG_MK712_MOUSE is not set - -# -# Joysticks -# -# CONFIG_INPUT_GAMEPORT is not set -# CONFIG_INPUT_NS558 is not set -# CONFIG_INPUT_LIGHTNING is not set -# CONFIG_INPUT_PCIGAME is not set -# CONFIG_INPUT_CS461X is not set -# CONFIG_INPUT_EMU10K1 is not set -# CONFIG_INPUT_SERIO is not set -# CONFIG_INPUT_SERPORT is not set - -# -# Joysticks -# -# CONFIG_INPUT_ANALOG is not set -# CONFIG_INPUT_A3D is not set -# CONFIG_INPUT_ADI is not set -# CONFIG_INPUT_COBRA is not set -# CONFIG_INPUT_GF2K is not set -# CONFIG_INPUT_GRIP is not set -# CONFIG_INPUT_INTERACT is not set -# CONFIG_INPUT_TMDC is not set -# CONFIG_INPUT_SIDEWINDER is not set -# CONFIG_INPUT_IFORCE_USB is not set -# CONFIG_INPUT_IFORCE_232 is not set -# CONFIG_INPUT_WARRIOR is not set -# CONFIG_INPUT_MAGELLAN is not set -# CONFIG_INPUT_SPACEORB is not set -# CONFIG_INPUT_SPACEBALL is not set -# CONFIG_INPUT_STINGER is not set -# CONFIG_INPUT_DB9 is not set -# CONFIG_INPUT_GAMECON is not set -# CONFIG_INPUT_TURBOGRAFX is not set -# CONFIG_QIC02_TAPE is not set -# CONFIG_IPMI_HANDLER is not set -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_DEVICE_INTERFACE is not set -# CONFIG_IPMI_KCS is not set -# CONFIG_IPMI_WATCHDOG is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_SCx200 is not set -# CONFIG_SCx200_GPIO is not set -# CONFIG_AMD_RNG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set - -# -# Direct Rendering Manager (XFree86 DRI support) -# -# CONFIG_DRM is not set -# CONFIG_MWAVE is not set -# CONFIG_OBMOUSE is not set - -# -# File systems -# -# CONFIG_QUOTA is not set -# CONFIG_QFMT_V2 is not set -CONFIG_AUTOFS_FS=y -CONFIG_AUTOFS4_FS=y -# CONFIG_REISERFS_FS is not set -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y -CONFIG_UMSDOS_FS=y -CONFIG_VFAT_FS=y -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -# CONFIG_CRAMFS is not set -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set -# CONFIG_XFS_FS is not set -# CONFIG_XFS_QUOTA is not set -# CONFIG_XFS_RT is not set -# CONFIG_XFS_TRACE is not set -# CONFIG_XFS_DEBUG is not set - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_DIRECTIO is not set -CONFIG_ROOT_NFS=y -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -# CONFIG_SMB_FS is not set -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -CONFIG_ZISOFS_FS=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_EFI_PARTITION is not set -# CONFIG_SMB_NLS is not set -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8559-1" -# CONFIG_NLS_CODEPAGE_437 is not set -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set -CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_UTF8 is not set - -# -# Console drivers -# -CONFIG_XEN_CONSOLE=y - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_STACKOVERFLOW is not set -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_KALLSYMS=y -# CONFIG_FRAME_POINTER is not set -CONFIG_LOG_BUF_SHIFT=0 - -# -# Cryptographic options -# -# CONFIG_CRYPTO is not set - -# -# Library routines -# -# CONFIG_CRC32 is not set -CONFIG_ZLIB_INFLATE=y -# CONFIG_ZLIB_DEFLATE is not set diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/balloon/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/balloon/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,4 +0,0 @@ -O_TARGET := drv.o -export-objs := balloon.o -obj-y := balloon.o -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/blkif/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,10 +0,0 @@ - -O_TARGET := drv.o - -subdir-$(CONFIG_XEN_BLKDEV_FRONTEND) += frontend -obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += frontend/drv.o - -subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend -obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o - -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/blkif/backend/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/backend/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := main.o control.o interface.o vbd.o -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := blkfront.o vbd.o -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/common.h --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/common.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,93 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/blkif/frontend/common.h - * - * Shared definitions between all levels of XenoLinux Virtual block devices. - */ - -#ifndef __XEN_DRIVERS_COMMON_H__ -#define __XEN_DRIVERS_COMMON_H__ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/hdreg.h> -#include <linux/blkdev.h> -#include <linux/major.h> -#include <asm-xen/xen-public/xen.h> -#include <asm/io.h> -#include <asm/atomic.h> -#include <asm/uaccess.h> -#include <asm-xen/xen-public/io/blkif.h> - -#if 1 -#define IPRINTK(fmt, args...) \ - printk(KERN_INFO "xen_blk: " fmt, ##args) -#else -#define IPRINTK(fmt, args...) ((void)0) -#endif - -#if 1 -#define WPRINTK(fmt, args...) \ - printk(KERN_WARNING "xen_blk: " fmt, ##args) -#else -#define WPRINTK(fmt, args...) ((void)0) -#endif - -#if 0 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) -#else -#define DPRINTK(_f, _a...) ((void)0) -#endif - -#if 0 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) -#else -#define DPRINTK_IOCTL(_f, _a...) ((void)0) -#endif - -/* Private gendisk->flags[] values. */ -#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */ -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ - -/* - * We have one of these per vbd, whether ide, scsi or 'other'. - * They hang in an array off the gendisk structure. We may end up putting - * all kinds of interesting stuff here :-) - */ -typedef struct xl_disk { - int usage; -} xl_disk_t; - -extern int blkif_open(struct inode *inode, struct file *filep); -extern int blkif_release(struct inode *inode, struct file *filep); -extern int blkif_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument); -extern int blkif_check(kdev_t dev); -extern int blkif_revalidate(kdev_t dev); -extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); -extern void do_blkif_request (request_queue_t *rq); - -extern void xlvbd_update_vbds(void); - -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) -{ - struct gendisk *gd = get_gendisk(xldev); - - if ( gd == NULL ) - return NULL; - - return (xl_disk_t *)gd->real_devices + - (MINOR(xldev) >> gd->minor_shift); -} - - -/* Virtual block-device subsystem. */ -extern int xlvbd_init(void); -extern void xlvbd_cleanup(void); - -#endif /* __XEN_DRIVERS_COMMON_H__ */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,540 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/blkif/frontend/vbd.c - * - * Xenolinux virtual block-device driver. - * - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge - */ - -#include "common.h" -#include <linux/blk.h> - -/* - * For convenience we distinguish between ide, scsi and 'other' (i.e. - * potentially combinations of the two) in the naming scheme and in a few - * other places (like default readahead, etc). - */ -#define XLIDE_MAJOR_NAME "hd" -#define XLSCSI_MAJOR_NAME "sd" -#define XLVBD_MAJOR_NAME "xvd" - -#define XLIDE_DEVS_PER_MAJOR 2 -#define XLSCSI_DEVS_PER_MAJOR 16 -#define XLVBD_DEVS_PER_MAJOR 16 - -#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ -#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */ - -#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ -#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */ - -#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ -#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ - -/* The below are for the generic drivers/block/ll_rw_block.c code. */ -static int xlide_blksize_size[256]; -static int xlide_hardsect_size[256]; -static int xlide_max_sectors[256]; -static int xlscsi_blksize_size[256]; -static int xlscsi_hardsect_size[256]; -static int xlscsi_max_sectors[256]; -static int xlvbd_blksize_size[256]; -static int xlvbd_hardsect_size[256]; -static int xlvbd_max_sectors[256]; - -/* Information about our VBDs. */ -#define MAX_VBDS 64 -static int nr_vbds; -static vdisk_t *vbd_info; - -static struct block_device_operations xlvbd_block_fops = -{ - open: blkif_open, - release: blkif_release, - ioctl: blkif_ioctl, - check_media_change: blkif_check, - revalidate: blkif_revalidate, -}; - -static int xlvbd_get_vbd_info(vdisk_t *disk_info) -{ - vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL); - blkif_request_t req; - blkif_response_t rsp; - int nr; - - memset(&req, 0, sizeof(req)); - req.operation = BLKIF_OP_PROBE; - req.nr_segments = 1; -#ifdef CONFIG_XEN_BLKDEV_GRANT - blkif_control_probe_send(&req, &rsp, - (unsigned long)(virt_to_machine(buf))); -#else - req.frame_and_sects[0] = virt_to_machine(buf) | 7; - - blkif_control_send(&req, &rsp); -#endif - - if ( rsp.status <= 0 ) - { - printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status); - return -1; - } - - if ( (nr = rsp.status) > MAX_VBDS ) - nr = MAX_VBDS; - memcpy(disk_info, buf, nr * sizeof(vdisk_t)); - - return nr; -} - -/* - * xlvbd_init_device - initialise a VBD device - * @disk: a vdisk_t describing the VBD - * - * Takes a vdisk_t * that describes a VBD the domain has access to. - * Performs appropriate initialisation and registration of the device. - * - * Care needs to be taken when making re-entrant calls to ensure that - * corruption does not occur. Also, devices that are in use should not have - * their details updated. This is the caller's responsibility. - */ -static int xlvbd_init_device(vdisk_t *xd) -{ - int device = xd->device; - int major = MAJOR(device); - int minor = MINOR(device); - int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ - int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ - char *major_name; - struct gendisk *gd; - struct block_device *bd; - xl_disk_t *disk; - int i, rc = 0, max_part, partno; - unsigned long capacity; - - unsigned char buf[64]; - - if ( (bd = bdget(device)) == NULL ) - return -1; - - if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) ) - { - printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device); - rc = -1; - goto out; - } - - if ( is_ide ) { - - major_name = XLIDE_MAJOR_NAME; - max_part = XLIDE_MAX_PART; - - } else if ( is_scsi ) { - - major_name = XLSCSI_MAJOR_NAME; - max_part = XLSCSI_MAX_PART; - - } else { - - /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */ - printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", - major, minor); - is_scsi = 1; - major_name = "cciss"; - max_part = XLSCSI_MAX_PART; - - } - - partno = minor & (max_part - 1); - - if ( (gd = get_gendisk(device)) == NULL ) - { - rc = register_blkdev(major, major_name, &xlvbd_block_fops); - if ( rc < 0 ) - { - printk(KERN_ALERT "XL VBD: can't get major %d\n", major); - goto out; - } - - if ( is_ide ) - { - blksize_size[major] = xlide_blksize_size; - hardsect_size[major] = xlide_hardsect_size; - max_sectors[major] = xlide_max_sectors; - read_ahead[major] = 8; - } - else if ( is_scsi ) - { - blksize_size[major] = xlscsi_blksize_size; - hardsect_size[major] = xlscsi_hardsect_size; - max_sectors[major] = xlscsi_max_sectors; - read_ahead[major] = 8; - } - else - { - blksize_size[major] = xlvbd_blksize_size; - hardsect_size[major] = xlvbd_hardsect_size; - max_sectors[major] = xlvbd_max_sectors; - read_ahead[major] = 8; - } - - blk_init_queue(BLK_DEFAULT_QUEUE(major), do_blkif_request); - - /* - * Turn off barking 'headactive' mode. We dequeue buffer heads as - * soon as we pass them to the back-end driver. - */ - blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); - - /* Construct an appropriate gendisk structure. */ - gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); - gd->major = major; - gd->major_name = major_name; - - gd->max_p = max_part; - if ( is_ide ) - { - gd->minor_shift = XLIDE_PARTN_SHIFT; - gd->nr_real = XLIDE_DEVS_PER_MAJOR; - } - else if ( is_scsi ) - { - gd->minor_shift = XLSCSI_PARTN_SHIFT; - gd->nr_real = XLSCSI_DEVS_PER_MAJOR; - } - else - { - gd->minor_shift = XLVBD_PARTN_SHIFT; - gd->nr_real = XLVBD_DEVS_PER_MAJOR; - } - - /* - ** The sizes[] and part[] arrays hold the sizes and other - ** information about every partition with this 'major' (i.e. - ** every disk sharing the 8 bit prefix * max partns per disk) - */ - gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); - gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), - GFP_KERNEL); - memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); - memset(gd->part, 0, max_part * gd->nr_real - * sizeof(struct hd_struct)); - - - gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), - GFP_KERNEL); - memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); - - gd->next = NULL; - gd->fops = &xlvbd_block_fops; - - gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), - GFP_KERNEL); - gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); - - memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); - memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); - - add_gendisk(gd); - - blk_size[major] = gd->sizes; - } - - if ( xd->info & VDISK_READONLY ) - set_device_ro(device, 1); - - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN; - - /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */ - capacity = (unsigned long)xd->capacity; - - if ( partno != 0 ) - { - /* - * If this was previously set up as a real disc we will have set - * up partition-table information. Virtual partitions override - * 'real' partitions, and the two cannot coexist on a device. - */ - if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && - (gd->sizes[minor & ~(max_part-1)] != 0) ) - { - /* - * Any non-zero sub-partition entries must be cleaned out before - * installing 'virtual' partition entries. The two types cannot - * coexist, and virtual partitions are favoured. - */ - kdev_t dev = device & ~(max_part-1); - for ( i = max_part - 1; i > 0; i-- ) - { - invalidate_device(dev+i, 1); - gd->part[MINOR(dev+i)].start_sect = 0; - gd->part[MINOR(dev+i)].nr_sects = 0; - gd->sizes[MINOR(dev+i)] = 0; - } - printk(KERN_ALERT - "Virtual partitions found for /dev/%s - ignoring any " - "real partition information we may have found.\n", - disk_name(gd, MINOR(device), buf)); - } - - /* Need to skankily setup 'partition' information */ - gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = capacity; - gd->sizes[minor] = capacity >>(BLOCK_SIZE_BITS-9); - - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; - } - else - { - gd->part[minor].nr_sects = capacity; - gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9); - - /* Some final fix-ups depending on the device type */ - if ( xd->info & VDISK_REMOVABLE ) - { - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; - printk(KERN_ALERT - "Skipping partition check on %s /dev/%s\n", - (xd->info & VDISK_CDROM) ? "cdrom" : "removable", - disk_name(gd, MINOR(device), buf)); - } - else - { - /* Only check partitions on real discs (not virtual!). */ - if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) - { - printk(KERN_ALERT - "Skipping partition check on virtual /dev/%s\n", - disk_name(gd, MINOR(device), buf)); - break; - } - register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity); - } - } - - out: - bdput(bd); - return rc; -} - - -/* - * xlvbd_remove_device - remove a device node if possible - * @device: numeric device ID - * - * Updates the gendisk structure and invalidates devices. - * - * This is OK for now but in future, should perhaps consider where this should - * deallocate gendisks / unregister devices. - */ -static int xlvbd_remove_device(int device) -{ - int i, rc = 0, minor = MINOR(device); - struct gendisk *gd; - struct block_device *bd; - xl_disk_t *disk = NULL; - - if ( (bd = bdget(device)) == NULL ) - return -1; - - if ( ((gd = get_gendisk(device)) == NULL) || - ((disk = xldev_to_xldisk(device)) == NULL) ) - BUG(); - - if ( disk->usage != 0 ) - { - printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); - rc = -1; - goto out; - } - - if ( (minor & (gd->max_p-1)) != 0 ) - { - /* 1: The VBD is mapped to a partition rather than a whole unit. */ - invalidate_device(device, 1); - gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = 0; - gd->sizes[minor] = 0; - - /* Clear the consists-of-virtual-partitions flag if possible. */ - gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; - for ( i = 1; i < gd->max_p; i++ ) - if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 ) - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; - - /* - * If all virtual partitions are now gone, and a 'whole unit' VBD is - * present, then we can try to grok the unit's real partition table. - */ - if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && - (gd->sizes[minor & ~(gd->max_p-1)] != 0) && - !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) ) - { - register_disk(gd, - device&~(gd->max_p-1), - gd->max_p, - &xlvbd_block_fops, - gd->part[minor&~(gd->max_p-1)].nr_sects); - } - } - else - { - /* - * 2: The VBD is mapped to an entire 'unit'. Clear all partitions. - * NB. The partition entries are only cleared if there are no VBDs - * mapped to individual partitions on this unit. - */ - i = gd->max_p - 1; /* Default: clear subpartitions as well. */ - if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) - i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */ - while ( i >= 0 ) - { - invalidate_device(device+i, 1); - gd->part[minor+i].start_sect = 0; - gd->part[minor+i].nr_sects = 0; - gd->sizes[minor+i] = 0; - i--; - } - } - - out: - bdput(bd); - return rc; -} - -/* - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver - * state. The VBDs need to be updated in this way when the domain is - * initialised and also each time we receive an XLBLK_UPDATE event. - */ -void xlvbd_update_vbds(void) -{ - int i, j, k, old_nr, new_nr; - vdisk_t *old_info, *new_info, *merged_info; - - old_info = vbd_info; - old_nr = nr_vbds; - - new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); - if (!new_info) - return; - - if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) - goto out; - - /* - * Final list maximum size is old list + new list. This occurs only when - * old list and new list do not overlap at all, and we cannot yet destroy - * VBDs in the old list because the usage counts are busy. - */ - merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL); - if (!merged_info) - goto out; - - /* @i tracks old list; @j tracks new list; @k tracks merged list. */ - i = j = k = 0; - - while ( (i < old_nr) && (j < new_nr) ) - { - if ( old_info[i].device < new_info[j].device ) - { - if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); - i++; - } - else if ( old_info[i].device > new_info[j].device ) - { - if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); - j++; - } - else - { - if ( ((old_info[i].capacity == new_info[j].capacity) && - (old_info[i].info == new_info[j].info)) || - (xlvbd_remove_device(old_info[i].device) != 0) ) - memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); - else if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); - i++; j++; - } - } - - for ( ; i < old_nr; i++ ) - { - if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); - } - - for ( ; j < new_nr; j++ ) - { - if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); - } - - vbd_info = merged_info; - nr_vbds = k; - - kfree(old_info); -out: - kfree(new_info); -} - - -/* - * Set up all the linux device goop for the virtual block devices (vbd's) that - * we know about. Note that although from the backend driver's p.o.v. VBDs are - * addressed simply an opaque 16-bit device number, the domain creation tools - * conventionally allocate these numbers to correspond to those used by 'real' - * linux -- this is just for convenience as it means e.g. that the same - * /etc/fstab can be used when booting with or without Xen. - */ -int xlvbd_init(void) -{ - int i; - - /* - * If compiled as a module, we don't support unloading yet. We therefore - * permanently increment the reference count to disallow it. - */ - SET_MODULE_OWNER(&xlvbd_block_fops); - MOD_INC_USE_COUNT; - - /* Initialize the global arrays. */ - for ( i = 0; i < 256; i++ ) - { - xlide_blksize_size[i] = 1024; - xlide_hardsect_size[i] = 512; - xlide_max_sectors[i] = 512; - - xlscsi_blksize_size[i] = 1024; - xlscsi_hardsect_size[i] = 512; - xlscsi_max_sectors[i] = 512; - - xlvbd_blksize_size[i] = 512; - xlvbd_hardsect_size[i] = 512; - xlvbd_max_sectors[i] = 512; - } - - vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); - if (!vbd_info) - return -ENOMEM; - - nr_vbds = xlvbd_get_vbd_info(vbd_info); - - if ( nr_vbds < 0 ) - { - kfree(vbd_info); - vbd_info = NULL; - nr_vbds = 0; - } - else - { - for ( i = 0; i < nr_vbds; i++ ) - xlvbd_init_device(&vbd_info[i]); - } - - return 0; -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/console/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/console/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-$(CONFIG_XEN_CONSOLE) := console.o -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/dom0/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/dom0/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := core.o -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/evtchn/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/evtchn/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := evtchn.o -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/netif/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/netif/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,10 +0,0 @@ - -O_TARGET := drv.o - -subdir-$(CONFIG_XEN_NETDEV_FRONTEND) += frontend -obj-$(CONFIG_XEN_NETDEV_FRONTEND) += frontend/drv.o - -subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend -obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o - -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/netif/backend/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/netif/backend/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,4 +0,0 @@ -O_TARGET := drv.o -export-objs := interface.o -obj-y := main.o control.o interface.o -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/drivers/netif/frontend/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/netif/frontend/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := main.o -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/Makefile --- a/linux-2.4-xen-sparse/arch/xen/kernel/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,20 +0,0 @@ - -.S.o: - $(CC) $(AFLAGS) -traditional -c $< -o $*.o - -all: kernel.o head.o init_task.o - -O_TARGET := kernel.o - -export-objs := i386_ksyms.o gnttab.o skbuff.o ctrl_if.o - -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ - ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \ - i386_ksyms.o i387.o evtchn.o ctrl_if.o pci-dma.o \ - reboot.o fixup.o gnttab.o skbuff.o - -ifdef CONFIG_PCI -obj-y += pci-i386.o pci-pc.o -endif - -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/entry.S --- a/linux-2.4-xen-sparse/arch/xen/kernel/entry.S Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,779 +0,0 @@ -/* - * linux/arch/i386/entry.S - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * entry.S contains the system-call and fault low-level handling routines. - * This also contains the timer-interrupt handler, as well as all interrupts - * and faults that can result in a task-switch. - * - * NOTE: This code handles signal-recognition, which happens every time - * after a timer-interrupt and after each system call. - * - * I changed all the .align's to 4 (16 byte alignment), as that's faster - * on a 486. - * - * Stack layout in 'ret_to_user': - * ptrace needs to have all regs on the stack. - * if the order here is changed, it needs to be - * updated in fork.c:copy_process, signal.c:do_signal, - * ptrace.c and ptrace.h - * - * 0(%esp) - %ebx - * 4(%esp) - %ecx - * 8(%esp) - %edx - * C(%esp) - %esi - * 10(%esp) - %edi - * 14(%esp) - %ebp - * 18(%esp) - %eax - * 1C(%esp) - %ds - * 20(%esp) - %es - * 24(%esp) - orig_eax - * 28(%esp) - %eip - * 2C(%esp) - %cs - * 30(%esp) - %eflags - * 34(%esp) - %oldesp - * 38(%esp) - %oldss - * - * "current" is in register %ebx during any slow entries. - */ - -#include <linux/config.h> -#include <linux/sys.h> -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/smp.h> - -EBX = 0x00 -ECX = 0x04 -EDX = 0x08 -ESI = 0x0C -EDI = 0x10 -EBP = 0x14 -EAX = 0x18 -DS = 0x1C -ES = 0x20 -ORIG_EAX = 0x24 -EIP = 0x28 -CS = 0x2C -EFLAGS = 0x30 -OLDESP = 0x34 -OLDSS = 0x38 - -CF_MASK = 0x00000001 -TF_MASK = 0x00000100 -IF_MASK = 0x00000200 -DF_MASK = 0x00000400 -NT_MASK = 0x00004000 - -/* Offsets into task_struct. */ -state = 0 -flags = 4 -sigpending = 8 -addr_limit = 12 -exec_domain = 16 -need_resched = 20 -tsk_ptrace = 24 -processor = 52 - -/* Offsets into shared_info_t. */ -#define evtchn_upcall_pending /* 0 */ -#define evtchn_upcall_mask 1 - -ENOSYS = 38 - - -#define SAVE_ALL \ - cld; \ - pushl %es; \ - pushl %ds; \ - pushl %eax; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ - pushl %ebx; \ - movl $(__KERNEL_DS),%edx; \ - movl %edx,%ds; \ - movl %edx,%es; - -#define RESTORE_ALL \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ - popl %esi; \ - popl %edi; \ - popl %ebp; \ - popl %eax; \ -1: popl %ds; \ -2: popl %es; \ - addl $4,%esp; \ -3: iret; \ -.section .fixup,"ax"; \ -4: movl $0,(%esp); \ - jmp 1b; \ -5: movl $0,(%esp); \ - jmp 2b; \ -6: pushl %ss; \ - popl %ds; \ - pushl %ss; \ - popl %es; \ - pushl $11; \ - call do_exit; \ -.previous; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ - .long 3b,6b; \ -.previous - -#define GET_CURRENT(reg) \ - movl $-8192, reg; \ - andl %esp, reg - -ENTRY(lcall7) - pushfl # We get a different stack layout with call - pushl %eax # gates, which has to be cleaned up later.. - SAVE_ALL - movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. - movl CS(%esp),%edx # this is eip.. - movl EFLAGS(%esp),%ecx # and this is cs.. - movl %eax,EFLAGS(%esp) # - andl $~(NT_MASK|TF_MASK|DF_MASK), %eax - pushl %eax - popfl - movl %edx,EIP(%esp) # Now we move them to their "normal" places - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx - andl $-8192,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x7 - call *%edx - addl $4, %esp - popl %eax - jmp ret_to_user - -ENTRY(lcall27) - pushfl # We get a different stack layout with call - pushl %eax # gates, which has to be cleaned up later.. - SAVE_ALL - movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. - movl CS(%esp),%edx # this is eip.. - movl EFLAGS(%esp),%ecx # and this is cs.. - movl %eax,EFLAGS(%esp) # - andl $~(NT_MASK|TF_MASK|DF_MASK), %eax - pushl %eax - popfl - movl %edx,EIP(%esp) # Now we move them to their "normal" places - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx - andl $-8192,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x27 - call *%edx - addl $4, %esp - popl %eax - jmp ret_to_user - -ENTRY(ret_from_fork) - pushl %ebx - call SYMBOL_NAME(schedule_tail) - addl $4, %esp - GET_CURRENT(%ebx) - testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS - jne tracesys_exit - jmp ret_to_user - -/* - * Return to user mode is not as complex as all this looks, - * but we want the default path for a system call return to - * go as quickly as possible which is why some of this is - * less clear than it otherwise should be. - */ -ENTRY(system_call) - pushl %eax # save orig_eax - SAVE_ALL - GET_CURRENT(%ebx) - testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS - jne tracesys - cmpl $(NR_syscalls),%eax - jae badsys - call *SYMBOL_NAME(sys_call_table)(,%eax,4) - movl %eax,EAX(%esp) # save the return value -ret_to_user: - movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi - movb $1,evtchn_upcall_mask(%esi) # make tests atomic -ret_to_user_nocli: - cmpl $0,need_resched(%ebx) - jne reschedule - cmpl $0,sigpending(%ebx) - je safesti # ensure need_resched updates are seen -/*signal_return:*/ - movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks - movl %esp,%eax - xorl %edx,%edx - call SYMBOL_NAME(do_signal) - jmp safesti - - ALIGN -restore_all: - RESTORE_ALL - - ALIGN -tracesys: - movl $-ENOSYS,EAX(%esp) - call SYMBOL_NAME(syscall_trace) - movl ORIG_EAX(%esp),%eax - cmpl $(NR_syscalls),%eax - jae tracesys_exit - call *SYMBOL_NAME(sys_call_table)(,%eax,4) - movl %eax,EAX(%esp) # save the return value -tracesys_exit: - call SYMBOL_NAME(syscall_trace) - jmp ret_to_user -badsys: - movl $-ENOSYS,EAX(%esp) - jmp ret_to_user - - ALIGN -ENTRY(ret_from_intr) - GET_CURRENT(%ebx) -ret_from_exception: - movb CS(%esp),%al - testl $2,%eax - jne ret_to_user - jmp restore_all - - ALIGN -reschedule: - movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks - call SYMBOL_NAME(schedule) # test - jmp ret_to_user - -ENTRY(divide_error) - pushl $0 # no error code - pushl $ SYMBOL_NAME(do_divide_error) - ALIGN -error_code: - pushl %ds - pushl %eax - xorl %eax,%eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - decl %eax # eax = -1 - pushl %ecx - pushl %ebx - GET_CURRENT(%ebx) - cld - movl %es,%ecx - movl ORIG_EAX(%esp), %esi # get the error code - movl ES(%esp), %edi # get the function address - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) - movl %esp,%edx - pushl %esi # push the error code - pushl %edx # push the pt_regs pointer - movl $(__KERNEL_DS),%edx - movl %edx,%ds - movl %edx,%es - call *%edi - addl $8,%esp - jmp ret_from_exception - -# A note on the "critical region" in our callback handler. -# We want to avoid stacking callback handlers due to events occurring -# during handling of the last event. To do this, we keep events disabled -# until we've done all processing. HOWEVER, we must enable events before -# popping the stack frame (can't be done atomically) and so it would still -# be possible to get enough handler activations to overflow the stack. -# Although unlikely, bugs of that kind are hard to track down, so we'd -# like to avoid the possibility. -# So, on entry to the handler we detect whether we interrupted an -# existing activation in its critical region -- if so, we pop the current -# activation and restart the handler using the previous one. -ENTRY(hypervisor_callback) - pushl %eax - SAVE_ALL - GET_CURRENT(%ebx) - movl EIP(%esp),%eax - cmpl $scrit,%eax - jb 11f - cmpl $ecrit,%eax - jb critical_region_fixup -11: push %esp - call evtchn_do_upcall - add $4,%esp - movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi - movb CS(%esp),%cl - test $2,%cl # slow return to ring 2 or 3 - jne ret_to_user_nocli -safesti:movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks -scrit: /**** START OF CRITICAL REGION ****/ - testb $0xFF,evtchn_upcall_pending(%esi) - jnz 14f # process more events if necessary... - RESTORE_ALL -14: movb $1,evtchn_upcall_mask(%esi) - jmp 11b -ecrit: /**** END OF CRITICAL REGION ****/ -# [How we do the fixup]. We want to merge the current stack frame with the -# just-interrupted frame. How we do this depends on where in the critical -# region the interrupted handler was executing, and so how many saved -# registers are in each frame. We do this quickly using the lookup table -# 'critical_fixup_table'. For each byte offset in the critical region, it -# provides the number of bytes which have already been popped from the -# interrupted stack frame. -critical_region_fixup: - addl $critical_fixup_table-scrit,%eax - movzbl (%eax),%eax # %eax contains num bytes popped - mov %esp,%esi - add %eax,%esi # %esi points at end of src region - mov %esp,%edi - add $0x34,%edi # %edi points at end of dst region - mov %eax,%ecx - shr $2,%ecx # convert words to bytes - je 16f # skip loop if nothing to copy -15: subl $4,%esi # pre-decrementing copy loop - subl $4,%edi - movl (%esi),%eax - movl %eax,(%edi) - loop 15b -16: movl %edi,%esp # final %edi is top of merged stack - jmp 11b - -critical_fixup_table: - .byte 0x00,0x00,0x00 # testb $0xFF,(%esi) - .byte 0x00,0x00 # jnz 14f - .byte 0x00 # pop %ebx - .byte 0x04 # pop %ecx - .byte 0x08 # pop %edx - .byte 0x0c # pop %esi - .byte 0x10 # pop %edi - .byte 0x14 # pop %ebp - .byte 0x18 # pop %eax - .byte 0x1c # pop %ds - .byte 0x20 # pop %es - .byte 0x24,0x24,0x24 # add $4,%esp - .byte 0x28 # iret - .byte 0x00,0x00,0x00,0x00 # movb $1,4(%esi) - .byte 0x00,0x00 # jmp 11b - -# Hypervisor uses this for application faults while it executes. -ENTRY(failsafe_callback) -1: popl %ds -2: popl %es -3: popl %fs -4: popl %gs -5: iret -.section .fixup,"ax"; \ -6: movl $0,(%esp); \ - jmp 1b; \ -7: movl $0,(%esp); \ - jmp 2b; \ -8: movl $0,(%esp); \ - jmp 3b; \ -9: movl $0,(%esp); \ - jmp 4b; \ -10: pushl %ss; \ - popl %ds; \ - pushl %ss; \ - popl %es; \ - pushl $11; \ - call do_exit; \ -.previous; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,6b; \ - .long 2b,7b; \ - .long 3b,8b; \ - .long 4b,9b; \ - .long 5b,10b; \ -.previous - -ENTRY(coprocessor_error) - pushl $0 - pushl $ SYMBOL_NAME(do_coprocessor_error) - jmp error_code - -ENTRY(simd_coprocessor_error) - pushl $0 - pushl $ SYMBOL_NAME(do_simd_coprocessor_error) - jmp error_code - -ENTRY(device_not_available) - pushl $-1 # mark this as an int - SAVE_ALL - GET_CURRENT(%ebx) - call SYMBOL_NAME(math_state_restore) - jmp ret_from_exception - -ENTRY(debug) - pushl $0 - pushl $ SYMBOL_NAME(do_debug) - jmp error_code - -ENTRY(int3) - pushl $0 - pushl $ SYMBOL_NAME(do_int3) - jmp error_code - -ENTRY(overflow) - pushl $0 - pushl $ SYMBOL_NAME(do_overflow) - jmp error_code - -ENTRY(bounds) - pushl $0 - pushl $ SYMBOL_NAME(do_bounds) - jmp error_code - -ENTRY(invalid_op) - pushl $0 - pushl $ SYMBOL_NAME(do_invalid_op) - jmp error_code - -ENTRY(coprocessor_segment_overrun) - pushl $0 - pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) - jmp error_code - -ENTRY(double_fault) - pushl $ SYMBOL_NAME(do_double_fault) - jmp error_code - -ENTRY(invalid_TSS) - pushl $ SYMBOL_NAME(do_invalid_TSS) - jmp error_code - -ENTRY(segment_not_present) - pushl $ SYMBOL_NAME(do_segment_not_present) - jmp error_code - -ENTRY(stack_segment) - pushl $ SYMBOL_NAME(do_stack_segment) - jmp error_code - -ENTRY(general_protection) - pushl $ SYMBOL_NAME(do_general_protection) - jmp error_code - -ENTRY(alignment_check) - pushl $ SYMBOL_NAME(do_alignment_check) - jmp error_code - -# This handler is special, because it gets an extra value on its stack, -# which is the linear faulting address. -#define PAGE_FAULT_STUB(_name1, _name2) \ -ENTRY(_name1) \ - pushl %ds ; \ - pushl %eax ; \ - xorl %eax,%eax ; \ - pushl %ebp ; \ - pushl %edi ; \ - pushl %esi ; \ - pushl %edx ; \ - decl %eax /* eax = -1 */ ; \ - pushl %ecx ; \ - pushl %ebx ; \ - GET_CURRENT(%ebx) ; \ - cld ; \ - movl %es,%ecx ; \ - movl ORIG_EAX(%esp), %esi /* get the error code */ ; \ - movl ES(%esp), %edi /* get the faulting address */ ; \ - movl %eax, ORIG_EAX(%esp) ; \ - movl %ecx, ES(%esp) ; \ - movl %esp,%edx ; \ - pushl %edi /* push the faulting address */ ; \ - pushl %esi /* push the error code */ ; \ - pushl %edx /* push the pt_regs pointer */ ; \ - movl $(__KERNEL_DS),%edx ; \ - movl %edx,%ds ; \ - movl %edx,%es ; \ - call SYMBOL_NAME(_name2) ; \ - addl $12,%esp ; \ - jmp ret_from_exception ; -PAGE_FAULT_STUB(page_fault, do_page_fault) - -ENTRY(machine_check) - pushl $0 - pushl $ SYMBOL_NAME(do_machine_check) - jmp error_code - -ENTRY(fixup_4gb_segment) - pushl $ SYMBOL_NAME(do_fixup_4gb_segment) - jmp error_code - -.data -ENTRY(sys_call_table) - .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ - .long SYMBOL_NAME(sys_exit) - .long SYMBOL_NAME(sys_fork) - .long SYMBOL_NAME(sys_read) - .long SYMBOL_NAME(sys_write) - .long SYMBOL_NAME(sys_open) /* 5 */ - .long SYMBOL_NAME(sys_close) - .long SYMBOL_NAME(sys_waitpid) - .long SYMBOL_NAME(sys_creat) - .long SYMBOL_NAME(sys_link) - .long SYMBOL_NAME(sys_unlink) /* 10 */ - .long SYMBOL_NAME(sys_execve) - .long SYMBOL_NAME(sys_chdir) - .long SYMBOL_NAME(sys_time) - .long SYMBOL_NAME(sys_mknod) - .long SYMBOL_NAME(sys_chmod) /* 15 */ - .long SYMBOL_NAME(sys_lchown16) - .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ - .long SYMBOL_NAME(sys_stat) - .long SYMBOL_NAME(sys_lseek) - .long SYMBOL_NAME(sys_getpid) /* 20 */ - .long SYMBOL_NAME(sys_mount) - .long SYMBOL_NAME(sys_oldumount) - .long SYMBOL_NAME(sys_setuid16) - .long SYMBOL_NAME(sys_getuid16) - .long SYMBOL_NAME(sys_stime) /* 25 */ - .long SYMBOL_NAME(sys_ptrace) - .long SYMBOL_NAME(sys_alarm) - .long SYMBOL_NAME(sys_fstat) - .long SYMBOL_NAME(sys_pause) - .long SYMBOL_NAME(sys_utime) /* 30 */ - .long SYMBOL_NAME(sys_ni_syscall) /* old stty syscall holder */ - .long SYMBOL_NAME(sys_ni_syscall) /* old gtty syscall holder */ - .long SYMBOL_NAME(sys_access) - .long SYMBOL_NAME(sys_nice) - .long SYMBOL_NAME(sys_ni_syscall) /* 35 */ /* old ftime syscall holder */ - .long SYMBOL_NAME(sys_sync) - .long SYMBOL_NAME(sys_kill) - .long SYMBOL_NAME(sys_rename) - .long SYMBOL_NAME(sys_mkdir) - .long SYMBOL_NAME(sys_rmdir) /* 40 */ - .long SYMBOL_NAME(sys_dup) - .long SYMBOL_NAME(sys_pipe) - .long SYMBOL_NAME(sys_times) - .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ - .long SYMBOL_NAME(sys_brk) /* 45 */ - .long SYMBOL_NAME(sys_setgid16) - .long SYMBOL_NAME(sys_getgid16) - .long SYMBOL_NAME(sys_signal) - .long SYMBOL_NAME(sys_geteuid16) - .long SYMBOL_NAME(sys_getegid16) /* 50 */ - .long SYMBOL_NAME(sys_acct) - .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ - .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ - .long SYMBOL_NAME(sys_ioctl) - .long SYMBOL_NAME(sys_fcntl) /* 55 */ - .long SYMBOL_NAME(sys_ni_syscall) /* old mpx syscall holder */ - .long SYMBOL_NAME(sys_setpgid) - .long SYMBOL_NAME(sys_ni_syscall) /* old ulimit syscall holder */ - .long SYMBOL_NAME(sys_olduname) - .long SYMBOL_NAME(sys_umask) /* 60 */ - .long SYMBOL_NAME(sys_chroot) - .long SYMBOL_NAME(sys_ustat) - .long SYMBOL_NAME(sys_dup2) - .long SYMBOL_NAME(sys_getppid) - .long SYMBOL_NAME(sys_getpgrp) /* 65 */ - .long SYMBOL_NAME(sys_setsid) - .long SYMBOL_NAME(sys_sigaction) - .long SYMBOL_NAME(sys_sgetmask) - .long SYMBOL_NAME(sys_ssetmask) - .long SYMBOL_NAME(sys_setreuid16) /* 70 */ - .long SYMBOL_NAME(sys_setregid16) - .long SYMBOL_NAME(sys_sigsuspend) - .long SYMBOL_NAME(sys_sigpending) - .long SYMBOL_NAME(sys_sethostname) - .long SYMBOL_NAME(sys_setrlimit) /* 75 */ - .long SYMBOL_NAME(sys_old_getrlimit) - .long SYMBOL_NAME(sys_getrusage) - .long SYMBOL_NAME(sys_gettimeofday) - .long SYMBOL_NAME(sys_settimeofday) - .long SYMBOL_NAME(sys_getgroups16) /* 80 */ - .long SYMBOL_NAME(sys_setgroups16) - .long SYMBOL_NAME(old_select) - .long SYMBOL_NAME(sys_symlink) - .long SYMBOL_NAME(sys_lstat) - .long SYMBOL_NAME(sys_readlink) /* 85 */ - .long SYMBOL_NAME(sys_uselib) - .long SYMBOL_NAME(sys_swapon) - .long SYMBOL_NAME(sys_reboot) - .long SYMBOL_NAME(old_readdir) - .long SYMBOL_NAME(old_mmap) /* 90 */ - .long SYMBOL_NAME(sys_munmap) - .long SYMBOL_NAME(sys_truncate) - .long SYMBOL_NAME(sys_ftruncate) - .long SYMBOL_NAME(sys_fchmod) - .long SYMBOL_NAME(sys_fchown16) /* 95 */ - .long SYMBOL_NAME(sys_getpriority) - .long SYMBOL_NAME(sys_setpriority) - .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ - .long SYMBOL_NAME(sys_statfs) - .long SYMBOL_NAME(sys_fstatfs) /* 100 */ - .long SYMBOL_NAME(sys_ioperm) - .long SYMBOL_NAME(sys_socketcall) - .long SYMBOL_NAME(sys_syslog) - .long SYMBOL_NAME(sys_setitimer) - .long SYMBOL_NAME(sys_getitimer) /* 105 */ - .long SYMBOL_NAME(sys_newstat) - .long SYMBOL_NAME(sys_newlstat) - .long SYMBOL_NAME(sys_newfstat) - .long SYMBOL_NAME(sys_uname) - .long SYMBOL_NAME(sys_iopl) /* 110 */ - .long SYMBOL_NAME(sys_vhangup) - .long SYMBOL_NAME(sys_ni_syscall) /* old "idle" system call */ - .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ - .long SYMBOL_NAME(sys_wait4) - .long SYMBOL_NAME(sys_swapoff) /* 115 */ - .long SYMBOL_NAME(sys_sysinfo) - .long SYMBOL_NAME(sys_ipc) - .long SYMBOL_NAME(sys_fsync) - .long SYMBOL_NAME(sys_sigreturn) - .long SYMBOL_NAME(sys_clone) /* 120 */ - .long SYMBOL_NAME(sys_setdomainname) - .long SYMBOL_NAME(sys_newuname) - .long SYMBOL_NAME(sys_modify_ldt) - .long SYMBOL_NAME(sys_adjtimex) - .long SYMBOL_NAME(sys_mprotect) /* 125 */ - .long SYMBOL_NAME(sys_sigprocmask) - .long SYMBOL_NAME(sys_create_module) - .long SYMBOL_NAME(sys_init_module) - .long SYMBOL_NAME(sys_delete_module) - .long SYMBOL_NAME(sys_get_kernel_syms) /* 130 */ - .long SYMBOL_NAME(sys_quotactl) - .long SYMBOL_NAME(sys_getpgid) - .long SYMBOL_NAME(sys_fchdir) - .long SYMBOL_NAME(sys_bdflush) - .long SYMBOL_NAME(sys_sysfs) /* 135 */ - .long SYMBOL_NAME(sys_personality) - .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ - .long SYMBOL_NAME(sys_setfsuid16) - .long SYMBOL_NAME(sys_setfsgid16) - .long SYMBOL_NAME(sys_llseek) /* 140 */ - .long SYMBOL_NAME(sys_getdents) - .long SYMBOL_NAME(sys_select) - .long SYMBOL_NAME(sys_flock) - .long SYMBOL_NAME(sys_msync) - .long SYMBOL_NAME(sys_readv) /* 145 */ - .long SYMBOL_NAME(sys_writev) - .long SYMBOL_NAME(sys_getsid) - .long SYMBOL_NAME(sys_fdatasync) - .long SYMBOL_NAME(sys_sysctl) - .long SYMBOL_NAME(sys_mlock) /* 150 */ - .long SYMBOL_NAME(sys_munlock) - .long SYMBOL_NAME(sys_mlockall) - .long SYMBOL_NAME(sys_munlockall) - .long SYMBOL_NAME(sys_sched_setparam) - .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ - .long SYMBOL_NAME(sys_sched_setscheduler) - .long SYMBOL_NAME(sys_sched_getscheduler) - .long SYMBOL_NAME(sys_sched_yield) - .long SYMBOL_NAME(sys_sched_get_priority_max) - .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ - .long SYMBOL_NAME(sys_sched_rr_get_interval) - .long SYMBOL_NAME(sys_nanosleep) - .long SYMBOL_NAME(sys_mremap) - .long SYMBOL_NAME(sys_setresuid16) - .long SYMBOL_NAME(sys_getresuid16) /* 165 */ - .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ - .long SYMBOL_NAME(sys_query_module) - .long SYMBOL_NAME(sys_poll) - .long SYMBOL_NAME(sys_nfsservctl) - .long SYMBOL_NAME(sys_setresgid16) /* 170 */ - .long SYMBOL_NAME(sys_getresgid16) - .long SYMBOL_NAME(sys_prctl) - .long SYMBOL_NAME(sys_rt_sigreturn) - .long SYMBOL_NAME(sys_rt_sigaction) - .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ - .long SYMBOL_NAME(sys_rt_sigpending) - .long SYMBOL_NAME(sys_rt_sigtimedwait) - .long SYMBOL_NAME(sys_rt_sigqueueinfo) - .long SYMBOL_NAME(sys_rt_sigsuspend) - .long SYMBOL_NAME(sys_pread) /* 180 */ - .long SYMBOL_NAME(sys_pwrite) - .long SYMBOL_NAME(sys_chown16) - .long SYMBOL_NAME(sys_getcwd) - .long SYMBOL_NAME(sys_capget) - .long SYMBOL_NAME(sys_capset) /* 185 */ - .long SYMBOL_NAME(sys_sigaltstack) - .long SYMBOL_NAME(sys_sendfile) - .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ - .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ - .long SYMBOL_NAME(sys_vfork) /* 190 */ - .long SYMBOL_NAME(sys_getrlimit) - .long SYMBOL_NAME(sys_mmap2) - .long SYMBOL_NAME(sys_truncate64) - .long SYMBOL_NAME(sys_ftruncate64) - .long SYMBOL_NAME(sys_stat64) /* 195 */ - .long SYMBOL_NAME(sys_lstat64) - .long SYMBOL_NAME(sys_fstat64) - .long SYMBOL_NAME(sys_lchown) - .long SYMBOL_NAME(sys_getuid) - .long SYMBOL_NAME(sys_getgid) /* 200 */ - .long SYMBOL_NAME(sys_geteuid) - .long SYMBOL_NAME(sys_getegid) - .long SYMBOL_NAME(sys_setreuid) - .long SYMBOL_NAME(sys_setregid) - .long SYMBOL_NAME(sys_getgroups) /* 205 */ - .long SYMBOL_NAME(sys_setgroups) - .long SYMBOL_NAME(sys_fchown) - .long SYMBOL_NAME(sys_setresuid) - .long SYMBOL_NAME(sys_getresuid) - .long SYMBOL_NAME(sys_setresgid) /* 210 */ - .long SYMBOL_NAME(sys_getresgid) - .long SYMBOL_NAME(sys_chown) - .long SYMBOL_NAME(sys_setuid) - .long SYMBOL_NAME(sys_setgid) - .long SYMBOL_NAME(sys_setfsuid) /* 215 */ - .long SYMBOL_NAME(sys_setfsgid) - .long SYMBOL_NAME(sys_pivot_root) - .long SYMBOL_NAME(sys_mincore) - .long SYMBOL_NAME(sys_madvise) - .long SYMBOL_NAME(sys_getdents64) /* 220 */ - .long SYMBOL_NAME(sys_fcntl64) - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */ - .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */ - .long SYMBOL_NAME(sys_gettid) - .long SYMBOL_NAME(sys_readahead) /* 225 */ - .long SYMBOL_NAME(sys_setxattr) - .long SYMBOL_NAME(sys_lsetxattr) - .long SYMBOL_NAME(sys_fsetxattr) - .long SYMBOL_NAME(sys_getxattr) - .long SYMBOL_NAME(sys_lgetxattr) /* 230 */ - .long SYMBOL_NAME(sys_fgetxattr) - .long SYMBOL_NAME(sys_listxattr) - .long SYMBOL_NAME(sys_llistxattr) - .long SYMBOL_NAME(sys_flistxattr) - .long SYMBOL_NAME(sys_removexattr) /* 235 */ - .long SYMBOL_NAME(sys_lremovexattr) - .long SYMBOL_NAME(sys_fremovexattr) - .long SYMBOL_NAME(sys_tkill) - .long SYMBOL_NAME(sys_sendfile64) - .long SYMBOL_NAME(sys_ni_syscall) /* 240 reserved for futex */ - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_setaffinity */ - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_getaffinity */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_thread_area */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_get_thread_area */ - .long SYMBOL_NAME(sys_ni_syscall) /* 245 sys_io_setup */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_destroy */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_getevents */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_submit */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_cancel */ - .long SYMBOL_NAME(sys_ni_syscall) /* 250 sys_alloc_hugepages */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_free_hugepages */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_exit_group */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_lookup_dcookie */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_create */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_ctl 255 */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_wait */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_remap_file_pages */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_tid_address */ - - .rept NR_syscalls-(.-sys_call_table)/4 - .long SYMBOL_NAME(sys_ni_syscall) - .endr diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/head.S --- a/linux-2.4-xen-sparse/arch/xen/kernel/head.S Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,41 +0,0 @@ - -.section __xen_guest - .ascii "GUEST_OS=linux,GUEST_VER=2.4,XEN_VER=3.0,VIRT_BASE=0xC0000000" - .ascii ",LOADER=generic" - .byte 0 - -.text -#include <linux/config.h> -#include <linux/threads.h> -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/desc.h> - -ENTRY(stext) -ENTRY(_stext) - cld - lss stack_start,%esp - /* Copy the necessary stuff from xen_start_info structure. */ - mov $SYMBOL_NAME(xen_start_info_union),%edi - mov $128,%ecx - rep movsl - jmp SYMBOL_NAME(start_kernel) - -ENTRY(stack_start) - .long SYMBOL_NAME(init_task_union)+8192, __KERNEL_DS - -.org 0x1000 -ENTRY(empty_zero_page) - -.org 0x2000 -ENTRY(default_ldt) - -.org 0x3000 -ENTRY(cpu0_pte_quicklist) - -.org 0x3400 -ENTRY(cpu0_pgd_quicklist) - -.org 0x3800 diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/i386_ksyms.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/i386_ksyms.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,180 +0,0 @@ -#include <linux/config.h> -#include <linux/module.h> -#include <linux/smp.h> -#include <linux/user.h> -#include <linux/elfcore.h> -#include <linux/mca.h> -#include <linux/sched.h> -#include <linux/in6.h> -#include <linux/interrupt.h> -#include <linux/smp_lock.h> -#include <linux/pm.h> -#include <linux/pci.h> -#include <linux/apm_bios.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/tty.h> - -#include <asm/semaphore.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/uaccess.h> -#include <asm/checksum.h> -#include <asm/io.h> -#include <asm/hardirq.h> -#include <asm/delay.h> -#include <asm/irq.h> -#include <asm/mmx.h> -#include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> - -extern void dump_thread(struct pt_regs *, struct user *); -extern spinlock_t rtc_lock; - -#if defined(CONFIG_APMXXX) || defined(CONFIG_APM_MODULEXXX) -extern void machine_real_restart(unsigned char *, int); -EXPORT_SYMBOL(machine_real_restart); -extern void default_idle(void); -EXPORT_SYMBOL(default_idle); -#endif - -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -#endif - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -extern struct drive_info_struct drive_info; -EXPORT_SYMBOL(drive_info); -#endif - -// XXX extern unsigned long get_cmos_time(void); - -/* platform dependent support */ -EXPORT_SYMBOL(boot_cpu_data); -EXPORT_SYMBOL(dump_thread); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(dump_extended_fpu); -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(enable_irq); -EXPORT_SYMBOL(disable_irq); -EXPORT_SYMBOL(disable_irq_nosync); -EXPORT_SYMBOL(probe_irq_mask); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(pm_idle); -EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(apm_info); -//EXPORT_SYMBOL(gdt); -EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(phys_to_machine_mapping); - - -#ifdef CONFIG_DEBUG_IOVIRT -EXPORT_SYMBOL(__io_virt_debug); -#endif - -EXPORT_SYMBOL_NOVERS(__down_failed); -EXPORT_SYMBOL_NOVERS(__down_failed_interruptible); -EXPORT_SYMBOL_NOVERS(__down_failed_trylock); -EXPORT_SYMBOL_NOVERS(__up_wakeup); -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); -/* Delay loops */ -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); - -EXPORT_SYMBOL_NOVERS(__get_user_1); -EXPORT_SYMBOL_NOVERS(__get_user_2); -EXPORT_SYMBOL_NOVERS(__get_user_4); - -EXPORT_SYMBOL(strtok); -EXPORT_SYMBOL(strpbrk); -EXPORT_SYMBOL(strstr); - -EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(clear_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__generic_copy_from_user); -EXPORT_SYMBOL(__generic_copy_to_user); -EXPORT_SYMBOL(strnlen_user); - - -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); - -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pcibios_penalize_isa_irq); -EXPORT_SYMBOL(pci_mem_start); -#endif - - -#ifdef CONFIG_X86_USE_3DNOW -EXPORT_SYMBOL(_mmx_memcpy); -EXPORT_SYMBOL(mmx_clear_page); -EXPORT_SYMBOL(mmx_copy_page); -#endif - -#ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(kernel_flag_cacheline); -EXPORT_SYMBOL(smp_num_cpus); -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL_NOVERS(__write_lock_failed); -EXPORT_SYMBOL_NOVERS(__read_lock_failed); - -/* Global SMP irq stuff */ -EXPORT_SYMBOL(synchronize_irq); -EXPORT_SYMBOL(global_irq_holder); -EXPORT_SYMBOL(__global_cli); -EXPORT_SYMBOL(__global_sti); -EXPORT_SYMBOL(__global_save_flags); -EXPORT_SYMBOL(__global_restore_flags); -EXPORT_SYMBOL(smp_call_function); - -/* TLB flushing */ -EXPORT_SYMBOL(flush_tlb_page); - -/* HT support */ -EXPORT_SYMBOL(smp_num_siblings); -EXPORT_SYMBOL(cpu_sibling_map); -#endif - -#ifdef CONFIG_X86_IO_APIC -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -#endif - -#ifdef CONFIG_VT -EXPORT_SYMBOL(screen_info); -#endif - -EXPORT_SYMBOL(get_wchan); - -EXPORT_SYMBOL(rtc_lock); - -#undef memcpy -#undef memset -extern void * memset(void *,int,__kernel_size_t); -extern void * memcpy(void *,const void *,__kernel_size_t); -EXPORT_SYMBOL_NOVERS(memcpy); -EXPORT_SYMBOL_NOVERS(memset); - -#ifdef CONFIG_HAVE_DEC_LOCK -EXPORT_SYMBOL(atomic_dec_and_lock); -#endif - -#ifdef CONFIG_MULTIQUAD -EXPORT_SYMBOL(xquad_portio); -#endif - -#include <asm/xen_proc.h> -EXPORT_SYMBOL(create_xen_proc_entry); -EXPORT_SYMBOL(remove_xen_proc_entry); - -EXPORT_SYMBOL(evtchn_do_upcall); -EXPORT_SYMBOL(force_evtchn_callback); -EXPORT_SYMBOL(HYPERVISOR_shared_info); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/irq.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/irq.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,1242 +0,0 @@ -/* - * linux/arch/i386/kernel/irq.c - * - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the code used by various IRQ handling routines: - * asking for different IRQ's should be done through these routines - * instead of just grabbing them. Thus setups with different IRQ numbers - * shouldn't result in any weird surprises, and installing new handlers - * should be easier. - */ - -/* - * (mostly architecture independent, will move to kernel/irq.c in 2.5.) - * - * IRQs are in fact implemented a bit like signal handlers for the kernel. - * Naturally it's not a 1:1 relation, but there are similarities. - */ - -#include <linux/config.h> -#include <linux/ptrace.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/smp_lock.h> -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/irq.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> - -#include <asm/atomic.h> -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/system.h> -#include <asm/bitops.h> -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#include <asm/delay.h> -#include <asm/desc.h> -#include <asm/irq.h> - - - -/* - * Linux has a controller-independent x86 interrupt architecture. - * every controller has a 'controller-template', that is used - * by the main code to do the right thing. Each driver-visible - * interrupt source is transparently wired to the apropriate - * controller. Thus drivers need not be aware of the - * interrupt-controller. - * - * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, - * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. - * (IO-APICs assumed to be messaging to Pentium local-APICs) - * - * the code is designed to be easily extended with new/different - * interrupt controllers, without having to do assembly magic. - */ - -/* - * Controller mappings for all interrupt sources: - */ -irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = - { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; - -static void register_irq_proc (unsigned int irq); - -/* - * Special irq handlers. - */ - -void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } - -/* - * Generic no controller code - */ - -static void enable_none(unsigned int irq) { } -static unsigned int startup_none(unsigned int irq) { return 0; } -static void disable_none(unsigned int irq) { } -static void ack_none(unsigned int irq) -{ -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves, it doesnt deserve - * a generic callback i think. - */ -#if CONFIG_X86 - printk("unexpected IRQ trap at vector %02x\n", irq); -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - */ - ack_APIC_irq(); -#endif -#endif -} - -/* startup is the same as "enable", shutdown is same as "disable" */ -#define shutdown_none disable_none -#define end_none enable_none - -struct hw_interrupt_type no_irq_type = { - "none", - startup_none, - shutdown_none, - enable_none, - disable_none, - ack_none, - end_none -}; - -atomic_t irq_err_count; -#ifdef CONFIG_X86_IO_APIC -#ifdef APIC_MISMATCH_DEBUG -atomic_t irq_mis_count; -#endif -#endif - -/* - * Generic, controller-independent functions: - */ - -int show_interrupts(struct seq_file *p, void *v) -{ - int i, j; - struct irqaction * action; - - seq_printf(p, " "); - for (j=0; j<smp_num_cpus; j++) - seq_printf(p, "CPU%d ",j); - seq_putc(p,'\n'); - - for (i = 0 ; i < NR_IRQS ; i++) { - action = irq_desc[i].action; - if (!action) - continue; - seq_printf(p, "%3d: ",i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for (j = 0; j < smp_num_cpus; j++) - seq_printf(p, "%10u ", - kstat.irqs[cpu_logical_map(j)][i]); -#endif - seq_printf(p, " %14s", irq_desc[i].handler->typename); - seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - seq_putc(p,'\n'); - } - seq_printf(p, "NMI: "); - for (j = 0; j < smp_num_cpus; j++) - seq_printf(p, "%10u ", - nmi_count(cpu_logical_map(j))); - seq_printf(p, "\n"); -#if CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for (j = 0; j < smp_num_cpus; j++) - seq_printf(p, "%10u ", - apic_timer_irqs[cpu_logical_map(j)]); - seq_printf(p, "\n"); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#ifdef CONFIG_X86_IO_APIC -#ifdef APIC_MISMATCH_DEBUG - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif -#endif - - return 0; -} - - -/* - * Global interrupt locks for SMP. Allow interrupts to come in on any - * CPU, yet make cli/sti act globally to protect critical regions.. - */ - -#ifdef CONFIG_SMP -unsigned char global_irq_holder = NO_PROC_ID; -unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ - -extern void show_stack(unsigned long* esp); - -static void show(char * str) -{ - int i; - int cpu = smp_processor_id(); - - printk("\n%s, CPU %d:\n", str, cpu); - printk("irq: %d [",irqs_running()); - for(i=0;i < smp_num_cpus;i++) - printk(" %d",local_irq_count(i)); - printk(" ]\nbh: %d [",spin_is_locked(&global_bh_lock) ? 1 : 0); - for(i=0;i < smp_num_cpus;i++) - printk(" %d",local_bh_count(i)); - - printk(" ]\nStack dumps:"); - for(i = 0; i < smp_num_cpus; i++) { - unsigned long esp; - if (i == cpu) - continue; - printk("\nCPU %d:",i); - esp = init_tss[i].esp0; - if (!esp) { - /* tss->esp0 is set to NULL in cpu_init(), - * it's initialized when the cpu returns to user - * space. -- manfreds - */ - printk(" <unknown> "); - continue; - } - esp &= ~(THREAD_SIZE-1); - esp += sizeof(struct task_struct); - show_stack((void*)esp); - } - printk("\nCPU %d:",cpu); - show_stack(NULL); - printk("\n"); -} - -#define MAXCOUNT 100000000 - -/* - * I had a lockup scenario where a tight loop doing - * spin_unlock()/spin_lock() on CPU#1 was racing with - * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but - * apparently the spin_unlock() information did not make it - * through to CPU#0 ... nasty, is this by design, do we have to limit - * 'memory update oscillation frequency' artificially like here? - * - * Such 'high frequency update' races can be avoided by careful design, but - * some of our major constructs like spinlocks use similar techniques, - * it would be nice to clarify this issue. Set this define to 0 if you - * want to check whether your system freezes. I suspect the delay done - * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but - * i thought that such things are guaranteed by design, since we use - * the 'LOCK' prefix. - */ -#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 - -#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND -# define SYNC_OTHER_CORES(x) udelay(x+1) -#else -/* - * We have to allow irqs to arrive between __sti and __cli - */ -# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") -#endif - -static inline void wait_on_irq(int cpu) -{ - int count = MAXCOUNT; - - for (;;) { - - /* - * Wait until all interrupts are gone. Wait - * for bottom half handlers unless we're - * already executing in one.. - */ - if (!irqs_running()) - if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) - break; - - /* Duh, we have to loop. Release the lock to avoid deadlocks */ - clear_bit(0,&global_irq_lock); - - for (;;) { - if (!--count) { - show("wait_on_irq"); - count = ~0; - } - __sti(); - SYNC_OTHER_CORES(cpu); - __cli(); - if (irqs_running()) - continue; - if (global_irq_lock) - continue; - if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) - continue; - if (!test_and_set_bit(0,&global_irq_lock)) - break; - } - } -} - -/* - * This is called when we want to synchronize with - * interrupts. We may for example tell a device to - * stop sending interrupts: but to make sure there - * are no interrupts that are executing on another - * CPU we need to call this function. - */ -void synchronize_irq(void) -{ - if (irqs_running()) { - /* Stupid approach */ - cli(); - sti(); - } -} - -static inline void get_irqlock(int cpu) -{ - if (test_and_set_bit(0,&global_irq_lock)) { - /* do we already hold the lock? */ - if ((unsigned char) cpu == global_irq_holder) - return; - /* Uhhuh.. Somebody else got it. Wait.. */ - do { - do { - rep_nop(); - } while (test_bit(0,&global_irq_lock)); - } while (test_and_set_bit(0,&global_irq_lock)); - } - /* - * We also to make sure that nobody else is running - * in an interrupt context. - */ - wait_on_irq(cpu); - - /* - * Ok, finally.. - */ - global_irq_holder = cpu; -} - -/* - * A global "cli()" while in an interrupt context - * turns into just a local cli(). Interrupts - * should use spinlocks for the (very unlikely) - * case that they ever want to protect against - * each other. - * - * If we already have local interrupts disabled, - * this will not turn a local disable into a - * global one (problems with spinlocks: this makes - * save_flags+cli+sti usable inside a spinlock). - */ -void __global_cli(void) -{ - unsigned int flags; - - __save_flags(flags); - if (!flags) { - int cpu = smp_processor_id(); - __cli(); - if (!local_irq_count(cpu)) - get_irqlock(cpu); - } -} - -void __global_sti(void) -{ - int cpu = smp_processor_id(); - - if (!local_irq_count(cpu)) - release_irqlock(cpu); - __sti(); -} - -/* - * SMP flags value to restore to: - * 0 - global cli - * 1 - global sti - * 2 - local cli - * 3 - local sti - */ -unsigned long __global_save_flags(void) -{ - int retval; - int local_enabled; - unsigned long flags; - int cpu = smp_processor_id(); - - __save_flags(flags); - local_enabled = !flags; - /* default to local */ - retval = 2 + local_enabled; - - /* check for global flags if we're not in an interrupt */ - if (!local_irq_count(cpu)) { - if (local_enabled) - retval = 1; - if (global_irq_holder == cpu) - retval = 0; - } - return retval; -} - -void __global_restore_flags(unsigned long flags) -{ - switch (flags) { - case 0: - __global_cli(); - break; - case 1: - __global_sti(); - break; - case 2: - __cli(); - break; - case 3: - __sti(); - break; - default: - printk("global_restore_flags: %08lx (%08lx)\n", - flags, (&flags)[-1]); - } -} - -#endif - -/* - * This should really return information about whether - * we should do bottom half handling etc. Right now we - * end up _always_ checking the bottom half, which is a - * waste of time and is not what some drivers would - * prefer. - */ -int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) -{ - int status; - int cpu = smp_processor_id(); - - irq_enter(cpu, irq); - - status = 1; /* Force the "do bottom halves" bit */ - - if (!(action->flags & SA_INTERRUPT)) - __sti(); - - do { - status |= action->flags; - action->handler(irq, action->dev_id, regs); - action = action->next; - } while (action); - if (status & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - __cli(); - - irq_exit(cpu, irq); - - return status; -} - -/* - * Generic enable/disable code: this just calls - * down into the PIC-specific version for the actual - * hardware disable after having gotten the irq - * controller lock. - */ - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. - * - * This function may be called from IRQ context. - */ - -inline void disable_irq_nosync(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->handler->disable(irq); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ - -void disable_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - - if (!local_irq_count(smp_processor_id())) { - do { - barrier(); - cpu_relax(); - } while (irq_desc[irq].status & IRQ_INPROGRESS); - } -} - -/** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable - * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. - * - * This function may be called from IRQ context. - */ - -void enable_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - switch (desc->depth) { - case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; - desc->status = status; - if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - desc->status = status | IRQ_REPLAY; - hw_resend_irq(desc->handler,irq); - } - desc->handler->enable(irq); - /* fall-through */ - } - default: - desc->depth--; - break; - case 0: - printk("enable_irq(%u) unbalanced from %p\n", irq, - __builtin_return_address(0)); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -asmlinkage unsigned int do_IRQ(struct pt_regs *regs) -{ - /* - * We ack quickly, we don't want the irq controller - * thinking we're snobs just because some other CPU has - * disabled global interrupts (we have already done the - * INT_ACK cycles, it's too late to try to pretend to the - * controller that we aren't taking the interrupt). - * - * 0 return value means that this irq is already being - * handled by some other CPU. (or is disabled) - */ - int irq = regs->orig_eax & 0xff; /* high bits used in ret_from_ code */ - int cpu = smp_processor_id(); - irq_desc_t *desc = irq_desc + irq; - struct irqaction * action; - unsigned int status; -#ifdef CONFIG_DEBUG_STACKOVERFLOW - long esp; - - /* Debugging check for stack overflow: is there less than 1KB free? */ - __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); - if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { - extern void show_stack(unsigned long *); - - printk("do_IRQ: stack overflow: %ld\n", - esp - sizeof(struct task_struct)); - __asm__ __volatile__("movl %%esp,%0" : "=r" (esp)); - show_stack((void *)esp); - } -#endif - - kstat.irqs[cpu][irq]++; - spin_lock(&desc->lock); - desc->handler->ack(irq); - /* - REPLAY is when Linux resends an IRQ that was dropped earlier - WAITING is used by probe to mark irqs that are being tested - */ - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); - status |= IRQ_PENDING; /* we _want_ to handle it */ - - /* - * If the IRQ is disabled for whatever reason, we cannot - * use the action we have. - */ - action = NULL; - if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { - action = desc->action; - status &= ~IRQ_PENDING; /* we commit to handling */ - status |= IRQ_INPROGRESS; /* we are handling it */ - } - desc->status = status; - - /* - * If there is no IRQ handler or it was disabled, exit early. - Since we set PENDING, if another processor is handling - a different instance of this same irq, the other processor - will take care of it. - */ - if (!action) - goto out; - - /* - * Edge triggered interrupts need to remember - * pending events. - * This applies to any hw interrupts that allow a second - * instance of the same irq to arrive while we are in do_IRQ - * or in the handler. But the code here only handles the _second_ - * instance of the irq, not the third or fourth. So it is mostly - * useful for irq hardware that does not mask cleanly in an - * SMP environment. - */ - for (;;) { - spin_unlock(&desc->lock); - handle_IRQ_event(irq, regs, action); - spin_lock(&desc->lock); - - if (!(desc->status & IRQ_PENDING)) - break; - desc->status &= ~IRQ_PENDING; - } - desc->status &= ~IRQ_INPROGRESS; -out: - /* - * The ->end() handler has to deal with interrupts which got - * disabled while the handler was running. - */ - desc->handler->end(irq); - spin_unlock(&desc->lock); - - if (softirq_pending(cpu)) - do_softirq(); - return 1; -} - -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - * - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ - -int request_irq(unsigned int irq, - void (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, - const char * devname, - void *dev_id) -{ - int retval; - struct irqaction * action; - -#if 1 - /* - * Sanity-check: shared interrupts should REALLY pass in - * a real dev-ID, otherwise we'll have trouble later trying - * to figure out which interrupt is which (messes up the - * interrupt freeing logic etc). - */ - if (irqflags & SA_SHIRQ) { - if (!dev_id) - printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]); - } -#endif - - if (irq >= NR_IRQS) - return -EINVAL; - if (!handler) - return -EINVAL; - - action = (struct irqaction *) - kmalloc(sizeof(struct irqaction), GFP_KERNEL); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irqflags; - action->mask = 0; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - if (retval) - kfree(action); - return retval; -} - -/* - * Internal function to unregister an irqaction - typically used to - * deallocate special interrupts that are part of the architecture. - */ -int teardown_irq(unsigned int irq, struct irqaction * old) -{ - irq_desc_t *desc; - struct irqaction **p; - unsigned long flags; - - if (irq >= NR_IRQS) - return -ENOENT; - - desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - for (;;) { - struct irqaction * action = *p; - if (action) { - struct irqaction **pp = p; - p = &action->next; - if (action != old) - continue; - - /* Found it - now remove it from the list of entries */ - *pp = action->next; - if (!desc->action) { - desc->status |= IRQ_DISABLED; - desc->handler->shutdown(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - -#ifdef CONFIG_SMP - /* Wait to make sure it's not being used on another CPU */ - while (desc->status & IRQ_INPROGRESS) { - barrier(); - cpu_relax(); - } -#endif - return 0; - } - printk("Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); - return -ENOENT; - } -} - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function may be called from interrupt context. - * - * Bugs: Attempting to free an irq in a handler for the same irq hangs - * the machine. - */ - -void free_irq(unsigned int irq, void *dev_id) -{ - irq_desc_t *desc; - struct irqaction *action; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock,flags); - for (action = desc->action; action != NULL; action = action->next) { - if (action->dev_id != dev_id) - continue; - - spin_unlock_irqrestore(&desc->lock,flags); - - if (teardown_irq(irq, action) == 0) - kfree(action); - return; - } - printk("Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); - return; -} - -/* - * IRQ autodetection code.. - * - * This depends on the fact that any interrupt that - * comes in on to an unassigned handler will get stuck - * with "IRQ_WAITING" cleared and the interrupt - * disabled. - */ - -static DECLARE_MUTEX(probe_sem); - -/** - * probe_irq_on - begin an interrupt autodetect - * - * Commence probing for an interrupt. The interrupts are scanned - * and a mask of potential interrupt lines is returned. - * - */ - -unsigned long probe_irq_on(void) -{ - unsigned int i; - irq_desc_t *desc; - unsigned long val; - unsigned long delay; - - down(&probe_sem); - /* - * something may have generated an irq long ago and we want to - * flush such a longstanding irq before considering it as spurious. - */ - for (i = NR_PIRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!irq_desc[i].action) - irq_desc[i].handler->startup(i); - spin_unlock_irq(&desc->lock); - } - - /* Wait for longstanding interrupts to trigger. */ - for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) - /* about 20ms delay */ synchronize_irq(); - - /* - * enable any unassigned irqs - * (we must startup again here because if a longstanding irq - * happened in the previous stage, it may have masked itself) - */ - for (i = NR_PIRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!desc->action) { - desc->status |= IRQ_AUTODETECT | IRQ_WAITING; - if (desc->handler->startup(i)) - desc->status |= IRQ_PENDING; - } - spin_unlock_irq(&desc->lock); - } - - /* - * Wait for spurious interrupts to trigger - */ - for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) - /* about 100ms delay */ synchronize_irq(); - - /* - * Now filter out any obviously spurious interrupts - */ - val = 0; - for (i = 0; i < NR_PIRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - /* It triggered already - consider it spurious. */ - if (!(status & IRQ_WAITING)) { - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } else - if (i < 32) - val |= 1 << i; - } - spin_unlock_irq(&desc->lock); - } - - return val; -} - -/* - * Return a mask of triggered interrupts (this - * can handle only legacy ISA interrupts). - */ - -/** - * probe_irq_mask - scan a bitmap of interrupt lines - * @val: mask of interrupts to consider - * - * Scan the ISA bus interrupt lines and return a bitmap of - * active interrupts. The interrupt probe logic state is then - * returned to its previous value. - * - * Note: we need to scan all the irq's even though we will - * only return ISA irq numbers - just so that we reset them - * all to a known state. - */ -unsigned int probe_irq_mask(unsigned long val) -{ - int i; - unsigned int mask; - - mask = 0; - for (i = 0; i < NR_PIRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (i < 16 && !(status & IRQ_WAITING)) - mask |= 1 << i; - - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - return mask & val; -} - -/* - * Return the one interrupt that triggered (this can - * handle any interrupt source). - */ - -/** - * probe_irq_off - end an interrupt autodetect - * @val: mask of potential interrupts (unused) - * - * Scans the unused interrupt lines and returns the line which - * appears to have triggered the interrupt. If no interrupt was - * found then zero is returned. If more than one interrupt is - * found then minus the first candidate is returned to indicate - * their is doubt. - * - * The interrupt probe logic state is returned to its previous - * value. - * - * BUGS: When used in a module (which arguably shouldnt happen) - * nothing prevents two IRQ probe callers from overlapping. The - * results of this are non-optimal. - */ - -int probe_irq_off(unsigned long val) -{ - int i, irq_found, nr_irqs; - - nr_irqs = 0; - irq_found = 0; - for (i = 0; i < NR_PIRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (!(status & IRQ_WAITING)) { - if (!nr_irqs) - irq_found = i; - nr_irqs++; - } - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - if (nr_irqs > 1) - irq_found = -irq_found; - return irq_found; -} - -/* this was setup_x86_irq but it seems pretty generic */ -int setup_irq(unsigned int irq, struct irqaction * new) -{ - int shared = 0; - unsigned long flags; - struct irqaction *old, **p; - irq_desc_t *desc = irq_desc + irq; - - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & SA_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - /* - * The following block of code has to be executed atomically - */ - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&desc->lock,flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; - } - - *p = new; - - if (!shared) { - desc->depth = 0; - desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS); - desc->handler->startup(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - - register_irq_proc(irq); - return 0; -} - -static struct proc_dir_entry * root_irq_dir; -static struct proc_dir_entry * irq_dir [NR_IRQS]; - -#define HEX_DIGITS 8 - -static unsigned int parse_hex_value (const char *buffer, - unsigned long count, unsigned long *ret) -{ - unsigned char hexnum [HEX_DIGITS]; - unsigned long value; - int i; - - if (!count) - return -EINVAL; - if (count > HEX_DIGITS) - count = HEX_DIGITS; - if (copy_from_user(hexnum, buffer, count)) - return -EFAULT; - - /* - * Parse the first 8 characters as a hex string, any non-hex char - * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. - */ - value = 0; - - for (i = 0; i < count; i++) { - unsigned int c = hexnum[i]; - - switch (c) { - case '0' ... '9': c -= '0'; break; - case 'a' ... 'f': c -= 'a'-10; break; - case 'A' ... 'F': c -= 'A'-10; break; - default: - goto out; - } - value = (value << 4) | c; - } -out: - *ret = value; - return 0; -} - -#if CONFIG_SMP - -static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; - -static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; -static int irq_affinity_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - if (count < HEX_DIGITS+1) - return -EINVAL; - return sprintf (page, "%08lx\n", irq_affinity[(long)data]); -} - -static int irq_affinity_write_proc (struct file *file, const char *buffer, - unsigned long count, void *data) -{ - int irq = (long) data, full_count = count, err; - unsigned long new_value; - - if (!irq_desc[irq].handler->set_affinity) - return -EIO; - - err = parse_hex_value(buffer, count, &new_value); - - /* - * Do not allow disabling IRQs completely - it's a too easy - * way to make the system unusable accidentally :-) At least - * one online CPU still has to be targeted. - */ - if (!(new_value & cpu_online_map)) - return -EINVAL; - - irq_affinity[irq] = new_value; - irq_desc[irq].handler->set_affinity(irq, new_value); - - return full_count; -} - -#endif - -static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - unsigned long *mask = (unsigned long *) data; - if (count < HEX_DIGITS+1) - return -EINVAL; - return sprintf (page, "%08lx\n", *mask); -} - -static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, - unsigned long count, void *data) -{ - unsigned long *mask = (unsigned long *) data, full_count = count, err; - unsigned long new_value; - - err = parse_hex_value(buffer, count, &new_value); - if (err) - return err; - - *mask = new_value; - return full_count; -} - -#define MAX_NAMELEN 10 - -static void register_irq_proc (unsigned int irq) -{ - char name [MAX_NAMELEN]; - - if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || - irq_dir[irq]) - return; - - memset(name, 0, MAX_NAMELEN); - sprintf(name, "%d", irq); - - /* create /proc/irq/1234 */ - irq_dir[irq] = proc_mkdir(name, root_irq_dir); - -#if CONFIG_SMP - { - struct proc_dir_entry *entry; - - /* create /proc/irq/1234/smp_affinity */ - entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); - - if (entry) { - entry->nlink = 1; - entry->data = (void *)(long)irq; - entry->read_proc = irq_affinity_read_proc; - entry->write_proc = irq_affinity_write_proc; - } - - smp_affinity_entry[irq] = entry; - } -#endif -} - -unsigned long prof_cpu_mask = -1; - -void init_irq_proc (void) -{ - struct proc_dir_entry *entry; - int i; - - /* create /proc/irq */ - root_irq_dir = proc_mkdir("irq", 0); - - /* create /proc/irq/prof_cpu_mask */ - entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); - - if (!entry) - return; - - entry->nlink = 1; - entry->data = (void *)&prof_cpu_mask; - entry->read_proc = prof_cpu_mask_read_proc; - entry->write_proc = prof_cpu_mask_write_proc; - - /* - * Create entries for all existing IRQs. - */ - for (i = 0; i < NR_IRQS; i++) - register_irq_proc(i); -} - diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/ldt.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/ldt.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,272 +0,0 @@ -/* - * linux/kernel/ldt.c - * - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx> - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> - -#include <asm/mmu_context.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/ldt.h> -#include <asm/desc.h> - -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ -static void flush_ldt(void *mm) -{ - if (current->active_mm) - load_LDT(&current->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt; - void *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - wmb(); - pc->ldt = newldt; - pc->size = mincount; - if (reload) { - make_pages_readonly( - pc->ldt, - (pc->size*LDT_ENTRY_SIZE)/PAGE_SIZE); - load_LDT(pc); -#ifdef CONFIG_SMP - if (current->mm->cpu_vm_mask != (1<<smp_processor_id())) - smp_call_function(flush_ldt, 0, 1, 1); -#endif - } - wmb(); - if (oldsize) { - make_pages_writable( - oldldt, (oldsize*LDT_ENTRY_SIZE)/PAGE_SIZE); - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - if (err < 0) { - printk(KERN_WARNING "ldt allocation failed\n"); - new->size = 0; - return err; - } - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); - make_pages_readonly(new->ldt, (new->size*LDT_ENTRY_SIZE)/PAGE_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct * old_mm; - int retval = 0; - - init_MUTEX(&mm->context.sem); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); - retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); - } - return retval; -} - -/* - * No need to lock the MM as we are the last user - * Do not touch the ldt register, we are already - * in the next thread. - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - make_pages_writable( - mm->context.ldt, - (mm->context.size*LDT_ENTRY_SIZE)/PAGE_SIZE); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct * mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - - down(&mm->context.sem); - size = mm->context.size*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - up(&mm->context.sem); - if (err < 0) - return err; - if (size != bytecount) { - /* zero-fill the rest */ - clear_user(ptr+size, bytecount-size); - } - return bytecount; -} - -static int read_default_ldt(void * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - void *address; - - err = 0; - address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); - if (size > bytecount) - size = bytecount; - - err = size; - if (copy_to_user(ptr, address, size)) - err = -EFAULT; - - return err; -} - -static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2, *lp; - unsigned long mach_lp; - int error; - struct modify_ldt_ldt_s ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - down(&mm->context.sem); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1); - if (error < 0) - goto out_unlock; - } - - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); - mach_lp = arbitrary_virt_to_machine(lp); - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || - (ldt_info.contents == 0 && - ldt_info.read_exec_only == 1 && - ldt_info.seg_32bit == 0 && - ldt_info.limit_in_pages == 0 && - ldt_info.seg_not_present == 1 && - ldt_info.useable == 0 )) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | - (ldt_info.limit & 0x0ffff); - entry_2 = (ldt_info.base_addr & 0xff000000) | - ((ldt_info.base_addr & 0x00ff0000) >> 16) | - (ldt_info.limit & 0xf0000) | - ((ldt_info.read_exec_only ^ 1) << 9) | - (ldt_info.contents << 10) | - ((ldt_info.seg_not_present ^ 1) << 15) | - (ldt_info.seg_32bit << 22) | - (ldt_info.limit_in_pages << 23) | - 0x7000; - if (!oldmode) - entry_2 |= (ldt_info.useable << 20); - - /* Install the new entry ... */ -install: - error = HYPERVISOR_update_descriptor(mach_lp, entry_1, entry_2); - -out_unlock: - up(&mm->context.sem); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/pci-pc.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/pci-pc.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,260 +0,0 @@ -/* - * Low-Level PCI Support for PC - * - * (c) 1999--2000 Martin Mares <mj@xxxxxx> - * - * Adjusted to use Xen's interface by Rolf Neugebauer, Intel Research Cambridge - * Further modifications by Keir Fraser, University of Cambridge - */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/pci.h> -#include <linux/init.h> -#include <linux/ioport.h> - -#include <asm/segment.h> -#include <asm/io.h> - -#include <asm-xen/xen-public/xen.h> -#include <asm-xen/xen-public/physdev.h> - -#include "pci-i386.h" - -/* - * NB. The following interface functions are not included here: - * 1. void eisa_set_level_irq(unsigned int irq) - * 2. irq_routing_table * __devinit pcibios_get_irq_routing_table(void) - * 3. int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) - * All are used by the ACPI driver. This should be ported to Xen if it is - * ever required -- Xen is the ultimate source for IRQ-routing knowledge. - */ - -struct pci_ops *pci_root_ops = NULL; - -int (*pci_config_read)(int seg, int bus, int dev, int fn, - int reg, int len, u32 *value) = NULL; -int (*pci_config_write)(int seg, int bus, int dev, int fn, - int reg, int len, u32 value) = NULL; - -unsigned int pci_probe = PCI_PROBE_BIOS; - -struct pci_fixup pcibios_fixups[] = { { 0 } }; - -static int pci_confx_read(int seg, int bus, int dev, int fn, int reg, - int len, u32 *value) -{ - int ret; - physdev_op_t op; - - if (bus > 255 || dev > 31 || fn > 7 || reg > 255) - return -EINVAL; - - op.cmd = PHYSDEVOP_PCI_CFGREG_READ; - op.u.pci_cfgreg_read.bus = bus; - op.u.pci_cfgreg_read.dev = dev; - op.u.pci_cfgreg_read.func = fn; - op.u.pci_cfgreg_read.reg = reg; - op.u.pci_cfgreg_read.len = len; - - if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) - return ret; - - *value = op.u.pci_cfgreg_read.value; - - return 0; -} - -static int pci_confx_write(int seg, int bus, int dev, int fn, int reg, - int len, u32 value) -{ - int ret; - physdev_op_t op; - - if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) - return -EINVAL; - - op.cmd = PHYSDEVOP_PCI_CFGREG_WRITE; - op.u.pci_cfgreg_write.bus = bus; - op.u.pci_cfgreg_write.dev = dev; - op.u.pci_cfgreg_write.func = fn; - op.u.pci_cfgreg_write.reg = reg; - op.u.pci_cfgreg_write.len = len; - op.u.pci_cfgreg_write.value = value; - - if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) - return ret; - return 0; -} - - -static int pci_confx_read_config_byte(struct pci_dev *dev, - int where, u8 *value) -{ - int result; - u32 data; - - result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, &data); - - *value = (u8)data; - - return result; -} - -static int pci_confx_read_config_word(struct pci_dev *dev, - int where, u16 *value) -{ - int result; - u32 data; - - result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, &data); - - *value = (u16)data; - - return result; -} - -static int pci_confx_read_config_dword(struct pci_dev *dev, - int where, u32 *value) -{ - return pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static int pci_confx_write_config_byte(struct pci_dev *dev, - int where, u8 value) -{ - return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, value); -} - -static int pci_confx_write_config_word(struct pci_dev *dev, - int where, u16 value) -{ - return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, value); -} - -static int pci_confx_write_config_dword(struct pci_dev *dev, - int where, u32 value) -{ - return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static struct pci_ops pci_conf_xen = { - pci_confx_read_config_byte, - pci_confx_read_config_word, - pci_confx_read_config_dword, - pci_confx_write_config_byte, - pci_confx_write_config_word, - pci_confx_write_config_dword -}; - -void pcibios_penalize_isa_irq(int irq) -{ - /* nothing */ -} - -void __devinit pcibios_fixup_bus(struct pci_bus *b) -{ - pci_read_bridge_bases(b); -} - -struct pci_bus * __devinit pcibios_scan_root(int busnum) -{ - struct list_head *list; - struct pci_bus *bus; - - list_for_each ( list, &pci_root_buses ) - { - bus = pci_bus_b(list); - if ( bus->number == busnum ) - return bus; - } - - printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); - return pci_scan_bus(busnum, pci_root_ops, NULL); -} - -void __init pcibios_init(void) -{ - int bus; - physdev_op_t op; - - if ( !pci_probe ) - return; - - pci_root_ops = &pci_conf_xen; - pci_config_read = pci_confx_read; - pci_config_write = pci_confx_write; - - pcibios_set_cacheline_size(); - - op.cmd = PHYSDEVOP_PCI_PROBE_ROOT_BUSES; - if ( HYPERVISOR_physdev_op(&op) != 0 ) - { - printk(KERN_WARNING "PCI: System does not support PCI\n"); - return; - } - - printk(KERN_INFO "PCI: Probing PCI hardware\n"); - for ( bus = 0; bus < 256; bus++ ) - if ( test_bit(bus, &op.u.pci_probe_root_buses.busmask[0]) ) - (void)pcibios_scan_root(bus); - - pcibios_resource_survey(); -} - -char * __devinit pcibios_setup(char *str) -{ - if ( !strcmp(str, "off") ) - pci_probe = 0; - return NULL; -} - -unsigned int pcibios_assign_all_busses(void) -{ - return 0; -} - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - int err; - u8 pin; - physdev_op_t op; - - /* Inform Xen that we are going to use this device. */ - op.cmd = PHYSDEVOP_PCI_INITIALISE_DEVICE; - op.u.pci_initialise_device.bus = dev->bus->number; - op.u.pci_initialise_device.dev = PCI_SLOT(dev->devfn); - op.u.pci_initialise_device.func = PCI_FUNC(dev->devfn); - if ( (err = HYPERVISOR_physdev_op(&op)) != 0 ) - return err; - - /* Now we can bind to the very final IRQ line. */ - pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &pin); - dev->irq = pin; - - /* Turn on device I/O and memory access as necessary. */ - if ( (err = pcibios_enable_resources(dev, mask)) < 0 ) - return err; - - /* Sanity-check that an interrupt-producing device is routed to an IRQ. */ - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if ( pin != 0 ) - { - if ( dev->irq != 0 ) - printk(KERN_INFO "PCI: Obtained IRQ %d for device %s\n", - dev->irq, dev->slot_name); - else - printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of " - "device %s.\n", 'A' + pin - 1, dev->slot_name); - } - - return 0; -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/process.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/process.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,448 +0,0 @@ -/* - * linux/arch/i386/kernel/process.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 - */ - -/* - * This file handles the architecture-dependent parts of process handling.. - */ - -#define __KERNEL_SYSCALLS__ -#include <stdarg.h> - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/user.h> -#include <linux/a.out.h> -#include <linux/interrupt.h> -#include <linux/config.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/init.h> -#include <linux/mc146818rtc.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/ldt.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/desc.h> -#include <asm/mmu_context.h> -#include <asm-xen/xen-public/physdev.h> - -#include <linux/irq.h> - -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); - -int hlt_counter; - -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); - -/* - * Power off function, if any - */ -void (*pm_power_off)(void); - -void disable_hlt(void) -{ - hlt_counter++; -} - -void enable_hlt(void) -{ - hlt_counter--; -} - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle (void) -{ - extern int set_timeout_timer(void); - - /* Endless idle loop with no priority at all. */ - init_idle(); - current->nice = 20; - current->counter = -100; - - for ( ; ; ) - { - while ( !current->need_resched ) - { - __cli(); - if ( current->need_resched ) - { - /* The race-free check for events failed. */ - __sti(); - break; - } - else if ( set_timeout_timer() == 0 ) - { - /* NB. Blocking reenable events in a race-free manner. */ - HYPERVISOR_block(); - } - else - { - /* No race here: yielding will get us the CPU again anyway. */ - __sti(); - HYPERVISOR_yield(); - } - } - schedule(); - check_pgt_cache(); - } -} - -extern void show_trace(unsigned long* esp); - -void show_regs(struct pt_regs * regs) -{ - printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); - if (regs->xcs & 2) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s\n",regs->eflags, print_tainted()); - printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes); - - show_trace(&regs->esp); -} - - -/* - * Create a kernel thread - */ -int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - long retval, d0; - - __asm__ __volatile__( - "movl %%esp,%%esi\n\t" - "int $0x80\n\t" /* Linux/i386 system call */ - "cmpl %%esp,%%esi\n\t" /* child or parent? */ - "je 1f\n\t" /* parent - jump */ - /* Load the argument into eax, and push it. That way, it does - * not matter whether the called function is compiled with - * -mregparm or not. */ - "movl %4,%%eax\n\t" - "pushl %%eax\n\t" - "call *%5\n\t" /* call fn */ - "movl %3,%0\n\t" /* exit */ - "int $0x80\n" - "1:\t" - :"=&a" (retval), "=&S" (d0) - :"0" (__NR_clone), "i" (__NR_exit), - "r" (arg), "r" (fn), - "b" (flags | CLONE_VM) - : "memory"); - - return retval; -} - -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ - /* nothing to do ... */ -} - -void flush_thread(void) -{ - struct task_struct *tsk = current; - - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); - - /* - * Forget coprocessor state.. - */ - clear_fpu(tsk); - tsk->used_math = 0; -} - -void release_thread(struct task_struct *dead_task) -{ - if (dead_task->mm) { - // temporary debugging check - if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%08x>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); - BUG(); - } - } - //release_x86_irqs(dead_task); -} - - -/* - * Save a segment. - */ -#define savesegment(seg,value) \ - asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value))) - -int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, - unsigned long unused, - struct task_struct * p, struct pt_regs * regs) -{ - struct pt_regs * childregs; - - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; - struct_cpy(childregs, regs); - childregs->eax = 0; - childregs->esp = esp; - - p->thread.esp = (unsigned long) childregs; - p->thread.esp0 = (unsigned long) (childregs+1); - - p->thread.eip = (unsigned long) ret_from_fork; - - savesegment(fs,p->thread.fs); - savesegment(gs,p->thread.gs); - - unlazy_fpu(current); - struct_cpy(&p->thread.i387, &current->thread.i387); - - p->thread.io_pl = current->thread.io_pl; - - return 0; -} - -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - int i; - -/* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - for (i = 0; i < 8; i++) - dump->u_debugreg[i] = current->thread.debugreg[i]; - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - - dump->regs.ebx = regs->ebx; - dump->regs.ecx = regs->ecx; - dump->regs.edx = regs->edx; - dump->regs.esi = regs->esi; - dump->regs.edi = regs->edi; - dump->regs.ebp = regs->ebp; - dump->regs.eax = regs->eax; - dump->regs.ds = regs->xds; - dump->regs.es = regs->xes; - savesegment(fs,dump->regs.fs); - savesegment(gs,dump->regs.gs); - dump->regs.orig_eax = regs->orig_eax; - dump->regs.eip = regs->eip; - dump->regs.cs = regs->xcs; - dump->regs.eflags = regs->eflags; - dump->regs.esp = regs->esp; - dump->regs.ss = regs->xss; - - dump->u_fpvalid = dump_fpu (regs, &dump->i387); -} - -/* - * switch_to(x,yn) should switch tasks from x to y. - * - * We fsave/fwait so that an exception goes off at the right time - * (as a call from the fsave or fwait in effect) rather than to - * the wrong process. Lazy FP saving no longer makes any sense - * with modern CPU's, and this simplifies a lot of things (SMP - * and UP become the same). - * - * NOTE! We used to use the x86 hardware context switching. The - * reason for not using it any more becomes apparent when you - * try to recover gracefully from saved state that is no longer - * valid (stale segment register values in particular). With the - * hardware task-switch, there is no way to fix up bad state in - * a reasonable manner. - * - * The fact that Intel documents the hardware task-switching to - * be slow is a fairly red herring - this code is not noticeably - * faster. However, there _is_ some room for improvement here, - * so the performance issues may eventually be a valid point. - * More important, however, is the fact that this allows us much - * more flexibility. - */ -void fastcall __switch_to(struct task_struct *prev_p, struct task_struct *next_p) -{ - struct thread_struct *next = &next_p->thread; - physdev_op_t op; - multicall_entry_t _mcl[8], *mcl = _mcl; - - /* - * This is basically 'unlazy_fpu', except that we queue a multicall to - * indicate FPU task switch, rather than synchronously trapping to Xen. - */ - if ( prev_p->flags & PF_USEDFPU ) - { - if ( cpu_has_fxsr ) - asm volatile( "fxsave %0 ; fnclex" - : "=m" (prev_p->thread.i387.fxsave) ); - else - asm volatile( "fnsave %0 ; fwait" - : "=m" (prev_p->thread.i387.fsave) ); - prev_p->flags &= ~PF_USEDFPU; - mcl->op = __HYPERVISOR_fpu_taskswitch; - mcl->args[0] = 1; - mcl++; - } - - mcl->op = __HYPERVISOR_stack_switch; - mcl->args[0] = __KERNEL_DS; - mcl->args[1] = next->esp0; - mcl++; - - if ( prev_p->thread.io_pl != next->io_pl ) - { - op.cmd = PHYSDEVOP_SET_IOPL; - op.u.set_iopl.iopl = next->io_pl; - mcl->op = __HYPERVISOR_physdev_op; - mcl->args[0] = (unsigned long)&op; - mcl++; - } - - (void)HYPERVISOR_multicall(_mcl, mcl - _mcl); - - /* - * Restore %fs and %gs. - */ - loadsegment(fs, next->fs); - loadsegment(gs, next->gs); - - /* - * Now maybe reload the debug registers - */ - if ( next->debugreg[7] != 0 ) - { - HYPERVISOR_set_debugreg(0, next->debugreg[0]); - HYPERVISOR_set_debugreg(1, next->debugreg[1]); - HYPERVISOR_set_debugreg(2, next->debugreg[2]); - HYPERVISOR_set_debugreg(3, next->debugreg[3]); - /* no 4 and 5 */ - HYPERVISOR_set_debugreg(6, next->debugreg[6]); - HYPERVISOR_set_debugreg(7, next->debugreg[7]); - } -} - -asmlinkage int sys_fork(struct pt_regs regs) -{ - return do_fork(SIGCHLD, regs.esp, &regs, 0); -} - -asmlinkage int sys_clone(struct pt_regs regs) -{ - unsigned long clone_flags; - unsigned long newsp; - - clone_flags = regs.ebx; - newsp = regs.ecx; - if (!newsp) - newsp = regs.esp; - return do_fork(clone_flags, newsp, &regs, 0); -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -asmlinkage int sys_vfork(struct pt_regs regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0); -} - -/* - * sys_execve() executes a new program. - */ -asmlinkage int sys_execve(struct pt_regs regs) -{ - int error; - char * filename; - - filename = getname((char *) regs.ebx); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs); - if (error == 0) - current->ptrace &= ~PT_DTRACE; - putname(filename); - out: - return error; -} - -/* - * These bracket the sleeping functions.. - */ -extern void scheduling_functions_start_here(void); -extern void scheduling_functions_end_here(void); -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long ebp, esp, eip; - unsigned long stack_page; - int count = 0; - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack_page = (unsigned long)p; - esp = p->thread.esp; - if (!stack_page || esp < stack_page || esp > 8188+stack_page) - return 0; - /* include/asm-i386/system.h:switch_to() pushes ebp last. */ - ebp = *(unsigned long *) esp; - do { - if (ebp < stack_page || ebp > 8184+stack_page) - return 0; - eip = *(unsigned long *) (ebp+4); - if (eip < first_sched || eip >= last_sched) - return eip; - ebp = *(unsigned long *) ebp; - } while (count++ < 16); - return 0; -} -#undef last_sched -#undef first_sched diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/setup.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/setup.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,1213 +0,0 @@ -/* - * linux/arch/i386/kernel/setup.c - * - * Copyright (C) 1995 Linus Torvalds - */ - -/* - * This file handles the architecture-dependent parts of initialization - */ - -#define __KERNEL_SYSCALLS__ -static int errno; -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/slab.h> -#include <linux/user.h> -#include <linux/a.out.h> -#include <linux/tty.h> -#include <linux/ioport.h> -#include <linux/delay.h> -#include <linux/config.h> -#include <linux/init.h> -#include <linux/apm_bios.h> -#ifdef CONFIG_BLK_DEV_RAM -#include <linux/blk.h> -#endif -#include <linux/highmem.h> -#include <linux/bootmem.h> -#include <linux/seq_file.h> -#include <linux/reboot.h> -#include <asm/processor.h> -#include <linux/console.h> -#include <linux/module.h> -#include <asm/mtrr.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/msr.h> -#include <asm/desc.h> -#include <asm/dma.h> -#include <asm/mpspec.h> -#include <asm/mmu_context.h> -#include <asm/ctrl_if.h> -#include <asm/hypervisor.h> -#include <asm-xen/xen-public/physdev.h> -#include <linux/netdevice.h> -#include <linux/rtnetlink.h> -#include <linux/tqueue.h> -#include <net/pkt_sched.h> /* dev_(de)activate */ - -/* - * Point at the empty zero page to start with. We map the real shared_info - * page as soon as fixmap is up and running. - */ -shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; - -unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list; - -/* - * Machine setup.. - */ - -char ignore_irq13; /* set if exception 16 works */ -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; - -unsigned long mmu_cr4_features; - -unsigned char * vgacon_mmap; - -/* - * Bus types .. - */ -#ifdef CONFIG_EISA -int EISA_bus; -#endif -int MCA_bus; - -/* for MCA, but anyone else can use it if they want */ -unsigned int machine_id; -unsigned int machine_submodel_id; -unsigned int BIOS_revision; -unsigned int mca_pentium_flag; - -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; - -/* - * Setup options - */ -struct drive_info_struct { char dummy[32]; } drive_info; -struct screen_info screen_info; -struct apm_info apm_info; -struct sys_desc_table_struct { - unsigned short length; - unsigned char table[0]; -}; - -unsigned char aux_device_present; - -extern int root_mountflags; -extern char _text, _etext, _edata, _end; - -extern int blk_nohighio; - -int enable_acpi_smp_table; - -/* Raw start-of-day parameters from the hypervisor. */ -union xen_start_info_union xen_start_info_union; - -#define COMMAND_LINE_SIZE MAX_GUEST_CMDLINE -static char command_line[COMMAND_LINE_SIZE]; -char saved_command_line[COMMAND_LINE_SIZE]; - -/* parse_mem_cmdline() - * returns the value of the mem= boot param converted to pages or 0 - */ -static int __init parse_mem_cmdline (char ** cmdline_p) -{ - char c = ' ', *to = command_line, *from = saved_command_line; - int len = 0; - unsigned long long bytes; - int mem_param = 0; - - /* Save unparsed command line copy for /proc/cmdline */ - memcpy(saved_command_line, xen_start_info.cmd_line, COMMAND_LINE_SIZE); - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; - - for (;;) { - /* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to <mem>, overriding the bios size. - * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * <start> to <start>+<mem>, overriding the bios size. - */ - if (c == ' ' && !memcmp(from, "mem=", 4)) { - if (to != command_line) - to--; - if (!memcmp(from+4, "nopentium", 9)) { - from += 9+4; - } else if (!memcmp(from+4, "exactmap", 8)) { - from += 8+4; - } else { - bytes = memparse(from+4, &from); - mem_param = bytes>>PAGE_SHIFT; - if (*from == '@') - (void)memparse(from+1, &from); - } - } - - c = *(from++); - if (!c) - break; - if (COMMAND_LINE_SIZE <= ++len) - break; - *(to++) = c; - } - *to = '\0'; - *cmdline_p = command_line; - - return mem_param; -} - -/* - * Every exception-fixup table is sorted (i.e., kernel main table, and every - * module table. Some elements may be out of order if they reference text.init, - * for example. - */ -static void sort_exception_table(struct exception_table_entry *start, - struct exception_table_entry *end) -{ - struct exception_table_entry *p, *q, tmp; - - for ( p = start; p < end; p++ ) - { - for ( q = p-1; q > start; q-- ) - if ( p->insn > q->insn ) - break; - if ( ++q != p ) - { - tmp = *p; - memmove(q+1, q, (p-q)*sizeof(*p)); - *q = tmp; - } - } -} - -int xen_module_init(struct module *mod) -{ - sort_exception_table(mod->ex_table_start, mod->ex_table_end); - return 0; -} - -void __init setup_arch(char **cmdline_p) -{ - int i,j; - unsigned long bootmap_size, start_pfn, lmax_low_pfn; - int mem_param; /* user specified memory size in pages */ - int boot_pfn; /* low pages available for bootmem */ - physdev_op_t op; - - extern void hypervisor_callback(void); - extern void failsafe_callback(void); - - extern unsigned long cpu0_pte_quicklist[]; - extern unsigned long cpu0_pgd_quicklist[]; - - extern const struct exception_table_entry __start___ex_table[]; - extern const struct exception_table_entry __stop___ex_table[]; - - extern char _stext; - - /* Force a quick death if the kernel panics. */ - extern int panic_timeout; - if ( panic_timeout == 0 ) - panic_timeout = 1; - - /* Ensure that the kernel exception-fixup table is sorted. */ - sort_exception_table(__start___ex_table, __stop___ex_table); - -#ifndef CONFIG_HIGHIO - blk_nohighio = 1; -#endif - - HYPERVISOR_vm_assist( - VMASST_CMD_enable, VMASST_TYPE_4gb_segments); - HYPERVISOR_vm_assist( - VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); - - HYPERVISOR_set_callbacks( - __KERNEL_CS, (unsigned long)hypervisor_callback, - __KERNEL_CS, (unsigned long)failsafe_callback); - - boot_cpu_data.pgd_quick = cpu0_pgd_quicklist; - boot_cpu_data.pte_quick = cpu0_pte_quicklist; - - /* This must be initialized to UNNAMED_MAJOR for ipconfig to work - properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd. */ - ROOT_DEV = MKDEV(UNNAMED_MAJOR,0); - memset(&drive_info, 0, sizeof(drive_info)); - memset(&screen_info, 0, sizeof(screen_info)); - - /* This is drawn from a dump from vgacon:startup in standard Linux. */ - screen_info.orig_video_mode = 3; - screen_info.orig_video_isVGA = 1; - screen_info.orig_video_lines = 25; - screen_info.orig_video_cols = 80; - screen_info.orig_video_ega_bx = 3; - screen_info.orig_video_points = 16; - - memset(&apm_info.bios, 0, sizeof(apm_info.bios)); - aux_device_present = 0; -#ifdef CONFIG_BLK_DEV_RAM - rd_image_start = 0; - rd_prompt = 0; - rd_doload = 0; -#endif - - root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) &_text; - init_mm.end_code = (unsigned long) &_etext; - init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; - - /* The mem= kernel command line param overrides the detected amount - * of memory. For xenolinux, if this override is larger than detected - * memory, then boot using only detected memory and make provisions to - * use all of the override value. The hypervisor can give this - * domain more memory later on and it will be added to the free - * lists at that time. See claim_new_pages() in - * arch/xen/drivers/balloon/balloon.c - */ - mem_param = parse_mem_cmdline(cmdline_p); - if (mem_param < xen_start_info.nr_pages) - mem_param = xen_start_info.nr_pages; - -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) - -/* - * 128MB for vmalloc(), iomap(), kmap(), and fixaddr mappings. - */ -#define VMALLOC_RESERVE (unsigned long)(128 << 20) -#define MAXMEM (unsigned long)(HYPERVISOR_VIRT_START-PAGE_OFFSET-VMALLOC_RESERVE) -#define MAXMEM_PFN PFN_DOWN(MAXMEM) -#define MAX_NONPAE_PFN (1 << 20) - - /* - * Determine low and high memory ranges: - */ - lmax_low_pfn = max_pfn = mem_param; - if (lmax_low_pfn > MAXMEM_PFN) { - lmax_low_pfn = MAXMEM_PFN; -#ifndef CONFIG_HIGHMEM - /* Maximum memory usable is what is directly addressable */ - printk(KERN_WARNING "Warning only %ldMB will be used.\n", - MAXMEM>>20); - if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); - else - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); - max_pfn = lmax_low_pfn; -#else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_X86_PAE - if (max_pfn > MAX_NONPAE_PFN) { - max_pfn = MAX_NONPAE_PFN; - printk(KERN_WARNING "Warning only 4GB will be used.\n"); - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); - } -#endif /* !CONFIG_X86_PAE */ -#endif /* !CONFIG_HIGHMEM */ - } - -#ifdef CONFIG_HIGHMEM - highstart_pfn = highend_pfn = max_pfn; - if (max_pfn > MAXMEM_PFN) { - highstart_pfn = MAXMEM_PFN; - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - } -#endif - - phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list; - cur_pgd = init_mm.pgd = (pgd_t *)xen_start_info.pt_base; - - start_pfn = (__pa(xen_start_info.pt_base) >> PAGE_SHIFT) + - xen_start_info.nr_pt_frames; - - /* - * Initialize the boot-time allocator, and free up all RAM. Then reserve - * space for OS image, initrd, phys->machine table, bootstrap page table, - * and the bootmem bitmap. - * NB. There is definitely enough room for the bootmem bitmap in the - * bootstrap page table. We are guaranteed to get >=512kB unused 'padding' - * for our own use after all bootstrap elements - * (see asm-xen/xen-public/xen.h). - */ - boot_pfn = min((int)xen_start_info.nr_pages,lmax_low_pfn); - bootmap_size = init_bootmem(start_pfn,boot_pfn); - free_bootmem(0, PFN_PHYS(boot_pfn)); - reserve_bootmem(__pa(&_stext), - PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1 - - __pa(&_stext)); - - /* init_bootmem() set the global max_low_pfn to boot_pfn. Now max_low_pfn - * can be set to the override value. - */ - max_low_pfn = lmax_low_pfn; - -#ifdef CONFIG_BLK_DEV_INITRD - if ( xen_start_info.mod_start != 0 ) - { - if ( (__pa(xen_start_info.mod_start) + xen_start_info.mod_len) <= - (max_low_pfn << PAGE_SHIFT) ) - { - initrd_start = xen_start_info.mod_start; - initrd_end = initrd_start + xen_start_info.mod_len; - initrd_below_start_ok = 1; - } - else - { - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - __pa(xen_start_info.mod_start) + xen_start_info.mod_len, - max_low_pfn << PAGE_SHIFT); - initrd_start = 0; - } - } -#endif - - paging_init(); - - /* Make sure we have a correctly sized P->M table. */ - if ( max_pfn != xen_start_info.nr_pages ) - { - phys_to_machine_mapping = alloc_bootmem_low_pages( - max_pfn * sizeof(unsigned long)); - if ( max_pfn > xen_start_info.nr_pages ) - { - memset(phys_to_machine_mapping, ~0, - max_pfn * sizeof(unsigned long)); - memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - xen_start_info.nr_pages * sizeof(unsigned long)); - } - else - { - memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - max_pfn * sizeof(unsigned long)); - if (HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, - (unsigned long *)xen_start_info.mfn_list + max_pfn, - xen_start_info.nr_pages - max_pfn, 0) != - (xen_start_info.nr_pages - max_pfn)) - BUG(); - } - free_bootmem(__pa(xen_start_info.mfn_list), - PFN_PHYS(PFN_UP(xen_start_info.nr_pages * - sizeof(unsigned long)))); - } - - pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); - for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) - { - pfn_to_mfn_frame_list[j] = - virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT; - } - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = - virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; - - op.cmd = PHYSDEVOP_SET_IOPL; - op.u.set_iopl.iopl = current->thread.io_pl = 1; - HYPERVISOR_physdev_op(&op); - - if (xen_start_info.flags & SIF_INITDOMAIN ) - { - if( !(xen_start_info.flags & SIF_PRIVILEGED) ) - panic("Xen granted us console access but not privileged status"); - -#if defined(CONFIG_VT) -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif -#endif - } -} - -static int cachesize_override __initdata = -1; -static int __init cachesize_setup(char *str) -{ - get_option (&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -static int __init highio_setup(char *str) -{ - printk("i386: disabling HIGHMEM block I/O\n"); - blk_nohighio = 1; - return 1; -} -__setup("nohighio", highio_setup); - -static int __init get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (cpuid_eax(0x80000000) < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while ( *p == ' ' ) - p++; - if ( p != q ) { - while ( *p ) - *q++ = *p++; - while ( q <= &c->x86_model_id[48] ) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - - -static void __init display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ecx, edx, l2size; - - n = cpuid_eax(0x80000000); - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size=(ecx>>24)+(edx>>24); - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - ecx = cpuid_ecx(0x80000006); - l2size = ecx >> 16; - - /* AMD errata T13 (order #21922) */ - if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - l2size = 64; - if (c->x86_model == 4 && - (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ - l2size = 256; - } - - /* Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) && - (c->x86_model == 11) && (l2size == 0)) - l2size = 256; - - if (c->x86_vendor == X86_VENDOR_CENTAUR) { - /* VIA C3 CPUs (670-68F) need further shifting. */ - if ((c->x86 == 6) && - ((c->x86_model == 7) || (c->x86_model == 8))) { - l2size >>= 8; - } - - /* VIA also screwed up Nehemiah stepping 1, and made - it return '65KB' instead of '64KB' - - Note, it seems this may only be in engineering samples. */ - if ((c->x86==6) && (c->x86_model==9) && - (c->x86_mask==1) && (l2size==65)) - l2size -= 1; - } - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if ( l2size == 0 ) - return; /* Again, no L2 cache is possible */ - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -static void __init init_c3(struct cpuinfo_x86 *c) -{ - /* Test for Centaur Extended Feature Flags presence */ - if (cpuid_eax(0xC0000000) >= 0xC0000001) { - /* store Centaur Extended Feature Flags as - * word 5 of the CPU capability bit array - */ - c->x86_capability[5] = cpuid_edx(0xC0000001); - } - - switch (c->x86_model) { - case 9: /* Nehemiah */ - default: - get_model_name(c); - display_cacheinfo(c); - break; - } -} - -static void __init init_centaur(struct cpuinfo_x86 *c) -{ - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*3231) anyway */ - clear_bit(0*32+31, &c->x86_capability); - - switch (c->x86) { - case 6: - init_c3(c); - break; - default: - panic("Unsupported Centaur CPU (%i)\n", c->x86); - } -} - -static int __init init_amd(struct cpuinfo_x86 *c) -{ - int r; - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, &c->x86_capability); - - r = get_model_name(c); - - switch(c->x86) - { - case 5: /* We don't like AMD K6 */ - panic("Unsupported AMD processor\n"); - case 6: /* An Athlon/Duron. We can trust the BIOS probably */ - break; - } - - display_cacheinfo(c); - return r; -} - - -static void __init init_intel(struct cpuinfo_x86 *c) -{ - char *p = NULL; - unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ - - if (c->cpuid_level > 1) { - /* supports eax=2 call */ - int i, j, n; - int regs[4]; - unsigned char *dp = (unsigned char *)regs; - - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; - - for ( i = 0 ; i < n ; i++ ) { - cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]); - - /* If bit 31 is set, this is an unknown format */ - for ( j = 0 ; j < 3 ; j++ ) { - if ( regs[j] < 0 ) regs[j] = 0; - } - - /* Byte 0 is level count, not a descriptor */ - for ( j = 1 ; j < 16 ; j++ ) { - unsigned char des = dp[j]; - unsigned char dl, dh; - unsigned int cs; - - dh = des >> 4; - dl = des & 0x0F; - - /* Black magic... */ - - switch ( dh ) - { - case 0: - switch ( dl ) { - case 6: - /* L1 I cache */ - l1i += 8; - break; - case 8: - /* L1 I cache */ - l1i += 16; - break; - case 10: - /* L1 D cache */ - l1d += 8; - break; - case 12: - /* L1 D cache */ - l1d += 16; - break; - default:; - /* TLB, or unknown */ - } - break; - case 2: - if ( dl ) { - /* L3 cache */ - cs = (dl-1) << 9; - l3 += cs; - } - break; - case 4: - if ( c->x86 > 6 && dl ) { - /* P4 family */ - /* L3 cache */ - cs = 128 << (dl-1); - l3 += cs; - break; - } - /* else same as 8 - fall through */ - case 8: - if ( dl ) { - /* L2 cache */ - cs = 128 << (dl-1); - l2 += cs; - } - break; - case 6: - if (dl > 5) { - /* L1 D cache */ - cs = 8<<(dl-6); - l1d += cs; - } - break; - case 7: - if ( dl >= 8 ) - { - /* L2 cache */ - cs = 64<<(dl-8); - l2 += cs; - } else { - /* L0 I cache, count as L1 */ - cs = dl ? (16 << (dl-1)) : 12; - l1i += cs; - } - break; - default: - /* TLB, or something else we don't know about */ - break; - } - } - } - if ( l1i || l1d ) - printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n", - l1i, l1d); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - - /* - * This assumes the L3 cache is shared; it typically lives in - * the northbridge. The L1 caches are included by the L2 - * cache, and so should not be included for the purpose of - * SMP switching weights. - */ - c->x86_cache_size = l2 ? l2 : (l1i+l1d); - } - - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ - if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) - clear_bit(X86_FEATURE_SEP, &c->x86_capability); - - /* Names for the Pentium II/Celeron processors - detectable only by also checking the cache size. - Dixon is NOT a Celeron. */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (l2 == 0) - p = "Celeron (Covington)"; - if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if ( p ) - strcpy(c->x86_model_id, p); -} - -void __init get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - - if (!strcmp(v, "GenuineIntel")) - c->x86_vendor = X86_VENDOR_INTEL; - else if (!strcmp(v, "AuthenticAMD")) - c->x86_vendor = X86_VENDOR_AMD; - else if (!strcmp(v, "CentaurHauls")) - c->x86_vendor = X86_VENDOR_CENTAUR; - else - c->x86_vendor = X86_VENDOR_UNKNOWN; -} - -struct cpu_model_info { - int vendor; - int family; - char *model_names[16]; -}; - -/* Naming convention should be: <Name> [(<Codename>)] */ -/* This table only is used unless init_<vendor>() below doesn't set it; */ -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ -static struct cpu_model_info cpu_models[] __initdata = { - { X86_VENDOR_INTEL, 6, - { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", - NULL, "Pentium II (Deschutes)", "Mobile Pentium II", - "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL, - "Pentium III (Cascades)", NULL, NULL, NULL, NULL }}, - { X86_VENDOR_AMD, 6, /* Is this this really necessary?? */ - { "Athlon", "Athlon", - "Athlon", NULL, "Athlon", NULL, - NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL }} -}; - -/* Look up CPU names by table lookup. */ -static char __init *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info = cpu_models; - int i; - - if ( c->x86_model >= 16 ) - return NULL; /* Range check */ - - for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) { - if ( info->vendor == c->x86_vendor && - info->family == c->x86 ) { - return info->model_names[c->x86_model]; - } - info++; - } - return NULL; /* Not found */ -} - - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __init have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - - - -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) -unsigned char eddnr; -struct edd_info edd[EDDMAXNR]; -unsigned int edd_disk80_sig; -/** - * copy_edd() - Copy the BIOS EDD information - * from empty_zero_page into a safe place. - * - */ -static inline void copy_edd(void) -{ - eddnr = EDD_NR; - memcpy(edd, EDD_BUF, sizeof(edd)); - edd_disk80_sig = DISK80_SIGNATURE_BUFFER; -} -#else -static inline void copy_edd(void) {} -#endif - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -void __init identify_cpu(struct cpuinfo_x86 *c) -{ - int junk, i; - u32 xlvl, tfms; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - memset(&c->x86_capability, 0, sizeof c->x86_capability); - c->hard_math = 1; - - if ( !have_cpuid_p() ) { - panic("Processor must support CPUID\n"); - } else { - /* CPU does have CPUID */ - - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, - (int *)&c->x86_vendor_id[0], - (int *)&c->x86_vendor_id[8], - (int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - - /* Intel-defined flags: level 0x00000001 */ - if ( c->cpuid_level >= 0x00000001 ) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { - c->x86 += (tfms >> 20) & 0xff; - c->x86_model += ((tfms >> 16) & 0xF) << 4; - } - c->x86_mask = tfms & 15; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) - c->x86_capability[1] = cpuid_edx(0x80000001); - if ( xlvl >= 0x80000004 ) - get_model_name(c); /* Default name */ - } - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ( (xlvl & 0xffff0000) == 0x80860000 ) { - if ( xlvl >= 0x80860001 ) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - } - - printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_vendor); - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - switch ( c->x86_vendor ) { - case X86_VENDOR_AMD: - init_amd(c); - break; - - case X86_VENDOR_INTEL: - init_intel(c); - break; - - case X86_VENDOR_CENTAUR: - init_centaur(c); - break; - - default: - printk("Unsupported CPU vendor (%d) -- please report!\n", - c->x86_vendor); - } - - printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); - - - /* If the model name is still unset, do table lookup. */ - if ( !c->x86_model_id[0] ) { - char *p; - p = table_lookup_model(c); - if ( p ) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86_vendor, c->x86_model); - } - - /* Now the feature flags better reflect actual CPU features! */ - - printk(KERN_DEBUG "CPU: After generic, caps: %08x %08x %08x %08x\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if ( c != &boot_cpu_data ) { - /* AND the already accumulated flags with these */ - for ( i = 0 ; i < NCAPINTS ; i++ ) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - printk(KERN_DEBUG "CPU: Common caps: %08x %08x %08x %08x\n", - boot_cpu_data.x86_capability[0], - boot_cpu_data.x86_capability[1], - boot_cpu_data.x86_capability[2], - boot_cpu_data.x86_capability[3]); -} - - -/* These need to match <asm/processor.h> */ -static char *cpu_vendor_names[] __initdata = { - "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta" }; - - -void __init print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *)) - vendor = cpu_vendor_names[c->x86_vendor]; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); - - if (!c->x86_model_id[0]) - printk("%d86", c->x86); - else - printk("%s", c->x86_model_id); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); - else - printk("\n"); -} - -/* - * Get CPU information for use by the procfs. - */ -static int show_cpuinfo(struct seq_file *m, void *v) -{ - /* - * These flag bits must match the definitions in <asm/cpufeature.h>. - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu/<cpu_nr>/cpuid instead. - */ - static char *x86_cap_flags[] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", NULL, NULL, "mmxext", NULL, - NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "tm2", - "est", NULL, "cid", NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "xstore", NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - }; - struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; - int fpu_exception; - -#ifdef CONFIG_SMP - if (!(cpu_online_map & (1<<n))) - return 0; -#endif - seq_printf(m, "processor\t: %d\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" - "model\t\t: %d\n" - "model name\t: %s\n", - n, - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); - else - seq_printf(m, "stepping\t: unknown\n"); - - if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", - cpu_khz / 1000, (cpu_khz % 1000)); - } - - /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); - - /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ - fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu); - seq_printf(m, "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" - "f00f_bug\t: %s\n" - "coma_bug\t: %s\n" - "fpu\t\t: %s\n" - "fpu_exception\t: %s\n" - "cpuid level\t: %d\n" - "wp\t\t: %s\n" - "flags\t\t:", - c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", - c->f00f_bug ? "yes" : "no", - c->coma_bug ? "yes" : "no", - c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", - c->cpuid_level, - c->wp_works_ok ? "yes" : "no"); - - for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, &c->x86_capability) && - x86_cap_flags[i] != NULL ) - seq_printf(m, " %s", x86_cap_flags[i]); - - seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < NR_CPUS ? cpu_data + *pos : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) -{ -} -struct seq_operations cpuinfo_op = { - start: c_start, - next: c_next, - stop: c_stop, - show: show_cpuinfo, -}; - -unsigned long cpu_initialized __initdata = 0; - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - */ -void __init cpu_init (void) -{ - int nr = smp_processor_id(); - - if (test_and_set_bit(nr, &cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", nr); - for (;;) __sti(); - } - printk(KERN_INFO "Initializing CPU#%d\n", nr); - - /* - * set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - if(current->mm) - BUG(); - enter_lazy_tlb(&init_mm, current, nr); - - HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0); - - load_LDT(&init_mm.context); - - /* Force FPU initialization. */ - current->flags &= ~PF_USEDFPU; - current->used_math = 0; - stts(); -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/signal.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/signal.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,717 +0,0 @@ -/* - * linux/arch/i386/kernel/signal.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson - * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes - */ - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/errno.h> -#include <linux/wait.h> -#include <linux/ptrace.h> -#include <linux/unistd.h> -#include <linux/stddef.h> -#include <linux/tty.h> -#include <linux/personality.h> -#include <asm/ucontext.h> -#include <asm/uaccess.h> -#include <asm/i387.h> - -#define DEBUG_SIG 0 - -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - -int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); - -int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) -{ - if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) - return -EFAULT; - if (from->si_code < 0) - return __copy_to_user(to, from, sizeof(siginfo_t)); - else { - int err; - - /* If you change siginfo_t structure, please be sure - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); - /* First 32bits of unions are always present. */ - err |= __put_user(from->si_pid, &to->si_pid); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: - break; - case __SI_CHLD >> 16: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - default: - err |= __put_user(from->si_uid, &to->si_uid); - break; - /* case __SI_RT: This is not generated by the kernel as of now. */ - } - return err; - } -} - -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int -sys_sigsuspend(int history0, int history1, old_sigset_t mask) -{ - struct pt_regs * regs = (struct pt_regs *) &history0; - sigset_t saveset; - - mask &= _BLOCKABLE; - spin_lock_irq(&current->sigmask_lock); - saveset = current->blocked; - siginitset(&current->blocked, mask); - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - - regs->eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage int -sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) -{ - struct pt_regs * regs = (struct pt_regs *) &unewset; - sigset_t saveset, newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&newset, unewset, sizeof(newset))) - return -EFAULT; - sigdelsetmask(&newset, ~_BLOCKABLE); - - spin_lock_irq(&current->sigmask_lock); - saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - - regs->eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage int -sys_sigaction(int sig, const struct old_sigaction *act, - struct old_sigaction *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (verify_area(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) - return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) - return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - } - - return ret; -} - -asmlinkage int -sys_sigaltstack(const stack_t *uss, stack_t *uoss) -{ - struct pt_regs *regs = (struct pt_regs *) &uss; - return do_sigaltstack(uss, uoss, regs->esp); -} - - -/* - * Do a signal return; undo the signal stack. - */ - -struct sigframe -{ - char *pretcode; - int sig; - struct sigcontext sc; - struct _fpstate fpstate; - unsigned long extramask[_NSIG_WORDS-1]; - char retcode[8]; -}; - -struct rt_sigframe -{ - char *pretcode; - int sig; - struct siginfo *pinfo; - void *puc; - struct siginfo info; - struct ucontext uc; - struct _fpstate fpstate; - char retcode[8]; -}; - -static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax) -{ - unsigned int err = 0; - -#define COPY(x) err |= __get_user(regs->x, &sc->x) - -#define COPY_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->x##seg = tmp; } - -#define COPY_SEG_STRICT(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->x##seg = tmp|3; } - -#define GET_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - loadsegment(seg,tmp); } - - GET_SEG(gs); - GET_SEG(fs); - COPY_SEG(es); - COPY_SEG(ds); - COPY(edi); - COPY(esi); - COPY(ebp); - COPY(esp); - COPY(ebx); - COPY(edx); - COPY(ecx); - COPY(eip); - COPY_SEG_STRICT(cs); - COPY_SEG_STRICT(ss); - - { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->eflags); - regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); - regs->orig_eax = -1; /* disable syscall checks */ - } - - { - struct _fpstate * buf; - err |= __get_user(buf, &sc->fpstate); - if (buf) { - if (verify_area(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } - } - - err |= __get_user(*peax, &sc->eax); - return err; - -badframe: - return 1; -} - -asmlinkage int sys_sigreturn(unsigned long __unused) -{ - struct pt_regs *regs = (struct pt_regs *) &__unused; - struct sigframe *frame = (struct sigframe *)(regs->esp - 8); - sigset_t set; - int eax; - - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__get_user(set.sig[0], &frame->sc.oldmask) - || (_NSIG_WORDS > 1 - && __copy_from_user(&set.sig[1], &frame->extramask, - sizeof(frame->extramask)))) - goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(&current->sigmask_lock); - current->blocked = set; - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - - if (restore_sigcontext(regs, &frame->sc, &eax)) - goto badframe; - return eax; - -badframe: - force_sig(SIGSEGV, current); - return 0; -} - -asmlinkage int sys_rt_sigreturn(unsigned long __unused) -{ - struct pt_regs *regs = (struct pt_regs *) &__unused; - struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); - sigset_t set; - stack_t st; - int eax; - - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) - goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(&current->sigmask_lock); - current->blocked = set; - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) - goto badframe; - - if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) - goto badframe; - /* It is more difficult to avoid calling this function than to - call it and ignore errors. */ - do_sigaltstack(&st, NULL, regs->esp); - - return eax; - -badframe: - force_sig(SIGSEGV, current); - return 0; -} - -/* - * Set up a signal frame. - */ - -static int -setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, - struct pt_regs *regs, unsigned long mask) -{ - int tmp, err = 0; - - tmp = 0; - __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->gs); - __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->fs); - - err |= __put_user(regs->xes, (unsigned int *)&sc->es); - err |= __put_user(regs->xds, (unsigned int *)&sc->ds); - err |= __put_user(regs->edi, &sc->edi); - err |= __put_user(regs->esi, &sc->esi); - err |= __put_user(regs->ebp, &sc->ebp); - err |= __put_user(regs->esp, &sc->esp); - err |= __put_user(regs->ebx, &sc->ebx); - err |= __put_user(regs->edx, &sc->edx); - err |= __put_user(regs->ecx, &sc->ecx); - err |= __put_user(regs->eax, &sc->eax); - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->eip, &sc->eip); - err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); - err |= __put_user(regs->eflags, &sc->eflags); - err |= __put_user(regs->esp, &sc->esp_at_signal); - err |= __put_user(regs->xss, (unsigned int *)&sc->ss); - - tmp = save_i387(fpstate); - if (tmp < 0) - err = 1; - else - err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); - - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); - - return err; -} - -/* - * Determine which stack to use.. - */ -static inline void * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) -{ - unsigned long esp; - - /* Default to using normal stack */ - esp = regs->esp; - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(esp) == 0) - esp = current->sas_ss_sp + current->sas_ss_size; - } - - /* This is the legacy signal stack switching. */ - else if ((regs->xss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - esp = (unsigned long) ka->sa.sa_restorer; - } - - return (void *)((esp - frame_size) & -8ul); -} - -static void setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) -{ - struct sigframe *frame; - int err = 0; - - frame = get_sigframe(ka, regs, sizeof(*frame)); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; - - err |= __put_user((current->exec_domain - && current->exec_domain->signal_invmap - && sig < 32 - ? current->exec_domain->signal_invmap[sig] - : sig), - &frame->sig); - if (err) - goto give_sigsegv; - - err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); - if (err) - goto give_sigsegv; - - if (_NSIG_WORDS > 1) { - err |= __copy_to_user(frame->extramask, &set->sig[1], - sizeof(frame->extramask)); - } - if (err) - goto give_sigsegv; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - err |= __put_user(frame->retcode, &frame->pretcode); - /* This is popl %eax ; movl $,%eax ; int $0x80 */ - err |= __put_user(0xb858, (short *)(frame->retcode+0)); - err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2)); - err |= __put_user(0x80cd, (short *)(frame->retcode+6)); - } - - if (err) - goto give_sigsegv; - - /* Set up registers for signal handler */ - regs->esp = (unsigned long) frame; - regs->eip = (unsigned long) ka->sa.sa_handler; - - set_fs(USER_DS); - regs->xds = __USER_DS; - regs->xes = __USER_DS; - regs->xss = __USER_DS; - regs->xcs = __USER_CS; - regs->eflags &= ~TF_MASK; - -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", - current->comm, current->pid, frame, regs->eip, frame->pretcode); -#endif - - return; - -give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); -} - -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) -{ - struct rt_sigframe *frame; - int err = 0; - - frame = get_sigframe(ka, regs, sizeof(*frame)); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; - - err |= __put_user((current->exec_domain - && current->exec_domain->signal_invmap - && sig < 32 - ? current->exec_domain->signal_invmap[sig] - : sig), - &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); - if (err) - goto give_sigsegv; - - /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->esp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - goto give_sigsegv; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - err |= __put_user(frame->retcode, &frame->pretcode); - /* This is movl $,%eax ; int $0x80 */ - err |= __put_user(0xb8, (char *)(frame->retcode+0)); - err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1)); - err |= __put_user(0x80cd, (short *)(frame->retcode+5)); - } - - if (err) - goto give_sigsegv; - - /* Set up registers for signal handler */ - regs->esp = (unsigned long) frame; - regs->eip = (unsigned long) ka->sa.sa_handler; - - set_fs(USER_DS); - regs->xds = __USER_DS; - regs->xes = __USER_DS; - regs->xss = __USER_DS; - regs->xcs = __USER_CS; - regs->eflags &= ~TF_MASK; - -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", - current->comm, current->pid, frame, regs->eip, frame->pretcode); -#endif - - return; - -give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); -} - -/* - * OK, we're invoking a handler - */ - -static void -handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) -{ - /* Are we from a system call? */ - if (regs->orig_eax >= 0) { - /* If so, check system call restarting.. */ - switch (regs->eax) { - case -ERESTARTNOHAND: - regs->eax = -EINTR; - break; - - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->eax = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - regs->eax = regs->orig_eax; - regs->eip -= 2; - } - } - - /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); - else - setup_frame(sig, ka, oldset, regs); - - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; - - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(&current->sigmask_lock); - sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); - sigaddset(&current->blocked,sig); - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - } -} - -/* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - */ -int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) -{ - siginfo_t info; - struct k_sigaction *ka; - - /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. - */ - if ((regs->xcs & 2) != 2) - return 1; - - if (!oldset) - oldset = &current->blocked; - - for (;;) { - unsigned long signr; - - spin_lock_irq(&current->sigmask_lock); - signr = dequeue_signal(&current->blocked, &info); - spin_unlock_irq(&current->sigmask_lock); - - if (!signr) - break; - - if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { - /* Let the debugger run. */ - current->exit_code = signr; - current->state = TASK_STOPPED; - notify_parent(current, SIGCHLD); - schedule(); - - /* We're back. Did the debugger cancel the sig? */ - if (!(signr = current->exit_code)) - continue; - current->exit_code = 0; - - /* The debugger continued. Ignore SIGSTOP. */ - if (signr == SIGSTOP) - continue; - - /* Update the siginfo structure. Is this good? */ - if (signr != info.si_signo) { - info.si_signo = signr; - info.si_errno = 0; - info.si_code = SI_USER; - info.si_pid = current->p_pptr->pid; - info.si_uid = current->p_pptr->uid; - } - - /* If the (new) signal is now blocked, requeue it. */ - if (sigismember(&current->blocked, signr)) { - send_sig_info(signr, &info, current); - continue; - } - } - - ka = &current->sig->action[signr-1]; - if (ka->sa.sa_handler == SIG_IGN) { - if (signr != SIGCHLD) - continue; - /* Check for SIGCHLD: it's special. */ - while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) - /* nothing */; - continue; - } - - if (ka->sa.sa_handler == SIG_DFL) { - int exit_code = signr; - - /* Init gets no signals it doesn't want. */ - if (current->pid == 1) - continue; - - switch (signr) { - case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: - continue; - - case SIGTSTP: case SIGTTIN: case SIGTTOU: - if (is_orphaned_pgrp(current->pgrp)) - continue; - /* FALLTHRU */ - - case SIGSTOP: { - struct signal_struct *sig; - current->state = TASK_STOPPED; - current->exit_code = signr; - sig = current->p_pptr->sig; - if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) - notify_parent(current, SIGCHLD); - schedule(); - continue; - } - - case SIGQUIT: case SIGILL: case SIGTRAP: - case SIGABRT: case SIGFPE: case SIGSEGV: - case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: - if (do_coredump(signr, regs)) - exit_code |= 0x80; - /* FALLTHRU */ - - default: - sig_exit(signr, exit_code, &info); - /* NOTREACHED */ - } - } - - /* Reenable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if ( current->thread.debugreg[7] != 0 ) - HYPERVISOR_set_debugreg(7, current->thread.debugreg[7]); - - /* Whee! Actually deliver the signal. */ - handle_signal(signr, ka, &info, oldset, regs); - return 1; - } - - /* Did we come from a system call? */ - if (regs->orig_eax >= 0) { - /* Restart the system call - no handlers present */ - if (regs->eax == -ERESTARTNOHAND || - regs->eax == -ERESTARTSYS || - regs->eax == -ERESTARTNOINTR) { - regs->eax = regs->orig_eax; - regs->eip -= 2; - } - } - return 0; -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/time.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/time.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,741 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- - **************************************************************************** - * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge - * (C) 2002-2003 - Keir Fraser - University of Cambridge - **************************************************************************** - * - * File: arch/xen/kernel/time.c - * Author: Rolf Neugebauer and Keir Fraser - * - * Description: Interface with Xen to get correct notion of time - */ - -/* - * linux/arch/i386/kernel/time.c - * - * Copyright (C) 1991, 1992, 1995 Linus Torvalds - * - * This file contains the PC-specific time handling details: - * reading the RTC at bootup, etc.. - * 1994-07-02 Alan Modra - * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime - * 1995-03-26 Markus Kuhn - * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 - * precision CMOS clock update - * 1996-05-03 Ingo Molnar - * fixed time warps in do_[slow|fast]_gettimeoffset() - * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 - * "A Kernel Model for Precision Timekeeping" by Dave Mills - * 1998-09-05 (Various) - * More robust do_fast_gettimeoffset() algorithm implemented - * (works with APM, Cyrix 6x86MX and Centaur C6), - * monotonic gettimeofday() with fast_get_timeoffset(), - * drift-proof precision TSC calibration on boot - * (C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, Andrew D. - * Balsa <andrebalsa@xxxxxxxxxx>, Philip Gladstone <philip@xxxxxxxxxx>; - * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@xxxxxxxxxxxxx>). - * 1998-12-16 Andrea Arcangeli - * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy - * because was not accounting lost_ticks. - * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli - * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to - * serialize accesses to xtime/lost_ticks). - */ - -#include <asm/smp.h> -#include <asm/irq.h> -#include <asm/msr.h> -#include <asm/delay.h> -#include <asm/mpspec.h> -#include <asm/uaccess.h> -#include <asm/processor.h> - -#include <asm/div64.h> -#include <asm/hypervisor.h> -#include <asm-xen/xen-public/dom0_ops.h> - -#include <linux/mc146818rtc.h> -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/time.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/irq.h> -#include <linux/sysctl.h> -#include <linux/sysrq.h> - -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; -extern rwlock_t xtime_lock; -extern unsigned long wall_jiffies; - -unsigned long cpu_khz; /* get this from Xen, used elsewhere */ - -static unsigned int rdtsc_bitshift; -static u32 st_scale_f; /* convert ticks -> usecs */ -static u32 st_scale_i; /* convert ticks -> usecs */ - -/* These are peridically updated in shared_info, and then copied here. */ -static u32 shadow_tsc_stamp; -static u64 shadow_system_time; -static u32 shadow_time_version; -static struct timeval shadow_tv; - -/* - * We use this to ensure that gettimeofday() is monotonically increasing. We - * only break this guarantee if the wall clock jumps backwards "a long way". - */ -static struct timeval last_seen_tv = {0,0}; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST -/* Periodically propagate synchronised time base to the RTC and to Xen. */ -static long last_update_to_rtc, last_update_to_xen; -#endif - -/* Periodically take synchronised time base from Xen, if we need it. */ -static long last_update_from_xen; /* UTC seconds when last read Xen clock. */ - -/* Keep track of last time we did processing/updating of jiffies and xtime. */ -static u64 processed_system_time; /* System time (ns) at last processing. */ - -#define NS_PER_TICK (1000000000ULL/HZ) - -#ifndef NSEC_PER_SEC -#define NSEC_PER_SEC (1000000000L) -#endif - -#define HANDLE_USEC_UNDERFLOW(_tv) \ - do { \ - while ( (_tv).tv_usec < 0 ) \ - { \ - (_tv).tv_usec += 1000000; \ - (_tv).tv_sec--; \ - } \ - } while ( 0 ) -#define HANDLE_USEC_OVERFLOW(_tv) \ - do { \ - while ( (_tv).tv_usec >= 1000000 ) \ - { \ - (_tv).tv_usec -= 1000000; \ - (_tv).tv_sec++; \ - } \ - } while ( 0 ) -static inline void __normalize_time(time_t *sec, s64 *nsec) -{ - while (*nsec >= NSEC_PER_SEC) { - (*nsec) -= NSEC_PER_SEC; - (*sec)++; - } - while (*nsec < 0) { - (*nsec) += NSEC_PER_SEC; - (*sec)--; - } -} - -/* Dynamically-mapped IRQs. */ -static int time_irq, debug_irq; - -/* Does this guest OS track Xen time, or set its wall clock independently? */ -static int independent_wallclock = 0; -static int __init __independent_wallclock(char *str) -{ - independent_wallclock = 1; - return 1; -} -__setup("independent_wallclock", __independent_wallclock); -#define INDEPENDENT_WALLCLOCK() \ - (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN)) - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST -/* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be - * called 500 ms after the second nowtime has started, because when - * nowtime is written into the registers of the CMOS clock, it will - * jump to the next second precisely 500 ms later. Check the Motorola - * MC146818A or Dallas DS12887 data sheet for details. - * - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you'll only notice that after reboot! - */ -static int set_rtc_mmss(unsigned long nowtime) -{ - int retval = 0; - int real_seconds, real_minutes, cmos_minutes; - unsigned char save_control, save_freq_select; - - /* gets recalled with irq locally disabled */ - spin_lock(&rtc_lock); - save_control = CMOS_READ(RTC_CONTROL); - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - cmos_minutes = CMOS_READ(RTC_MINUTES); - if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) - BCD_TO_BIN(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, don't interfere with - * hour overflow. This avoids messing with unknown time zones but requires - * your RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 ) - real_minutes += 30; /* correct for half hour time zone */ - real_minutes %= 60; - - if ( abs(real_minutes - cmos_minutes) < 30 ) - { - if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) - { - BIN_TO_BCD(real_seconds); - BIN_TO_BCD(real_minutes); - } - CMOS_WRITE(real_seconds,RTC_SECONDS); - CMOS_WRITE(real_minutes,RTC_MINUTES); - } - else - { - printk(KERN_WARNING - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - spin_unlock(&rtc_lock); - - return retval; -} -#endif - - -/* - * Reads a consistent set of time-base values from Xen, into a shadow data - * area. Must be called with the xtime_lock held for writing. - */ -static void __get_time_values_from_xen(void) -{ - shared_info_t *s = HYPERVISOR_shared_info; - struct vcpu_time_info *src; - struct shadow_time_info *dst; - - src = &s->vcpu_time[smp_processor_id()]; - dst = &per_cpu(shadow_time, smp_processor_id()); - - do { - dst->version = src->time_version2; - rmb(); - dst->tsc_timestamp = src->tsc_timestamp; - dst->system_timestamp = src->system_time; - dst->tsc_to_nsec_mul = src->tsc_to_system_mul; - dst->tsc_shift = src->tsc_shift; - rmb(); - } - while (dst->version != src->time_version1); - - dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000; - - if ((shadow_tv.tv_sec != s->wc_sec) || - (shadow_tv.tv_usec != s->wc_usec)) - update_wallclock(); -} - - -/* - * Returns the system time elapsed, in ns, since the current shadow_timestamp - * was calculated. Must be called with the xtime_lock held for reading. - */ -static inline unsigned long __get_time_delta_usecs(void) -{ - s32 delta_tsc; - u32 low; - u64 delta, tsc; - - rdtscll(tsc); - low = (u32)(tsc >> rdtsc_bitshift); - delta_tsc = (s32)(low - shadow_tsc_stamp); - if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; - delta = ((u64)delta_tsc * st_scale_f); - delta >>= 32; - delta += ((u64)delta_tsc * st_scale_i); - - return (unsigned long)delta; -} - -static inline int time_values_up_to_date() -{ - struct vcpu_time_info *src; - struct shadow_time_info *dst; - - src = &HYPERVISOR_shared_info->vcpu_time[smp_processor_id()]; - dst = &per_cpu(shadow_time, smp_processor_id()); - - return (dst->version == src->time_version2); -} - - -/* - * Returns the current time-of-day in UTC timeval format. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long flags, lost; - struct timeval _tv; - s64 nsec; - - again: - read_lock_irqsave(&xtime_lock, flags); - - _tv.tv_usec = __get_time_delta_usecs(); - if ( (lost = (jiffies - wall_jiffies)) != 0 ) - _tv.tv_usec += lost * (1000000 / HZ); - _tv.tv_sec = xtime.tv_sec; - _tv.tv_usec += xtime.tv_usec; - - nsec = shadow_system_time - processed_system_time; - __normalize_time(&_tv.tv_sec, &nsec); - _tv.tv_usec += (long)nsec / 1000L; - - if ( unlikely(!time_values_up_to_date()) ) - { - /* - * We may have blocked for a long time, rendering our calculations - * invalid (e.g. the time delta may have overflowed). Detect that - * and recalculate with fresh values. - */ - read_unlock_irqrestore(&xtime_lock, flags); - write_lock_irqsave(&xtime_lock, flags); - __get_time_values_from_xen(); - write_unlock_irqrestore(&xtime_lock, flags); - goto again; - } - - HANDLE_USEC_OVERFLOW(_tv); - - /* Ensure that time-of-day is monotonically increasing. */ - if ( (_tv.tv_sec < last_seen_tv.tv_sec) || - ((_tv.tv_sec == last_seen_tv.tv_sec) && - (_tv.tv_usec < last_seen_tv.tv_usec)) ) - _tv = last_seen_tv; - last_seen_tv = _tv; - - read_unlock_irqrestore(&xtime_lock, flags); - - *tv = _tv; -} - - -/* - * Sets the current time-of-day based on passed-in UTC timeval parameter. - */ -void do_settimeofday(struct timeval *tv) -{ - struct timeval newtv; - s64 nsec; - suseconds_t usec; - - if ( !INDEPENDENT_WALLCLOCK() ) - return; - - write_lock_irq(&xtime_lock); - - /* - * Ensure we don't get blocked for a long time so that our time delta - * overflows. If that were to happen then our shadow time values would - * be stale, so we can retry with fresh ones. - */ - again: - usec = tv->tv_usec - __get_time_delta_usecs(); - - nsec = shadow_system_time - processed_system_time; - __normalize_time(&tv->tv_sec, &nsec); - usec -= (long)nsec / 1000L; - - if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) - { - __get_time_values_from_xen(); - goto again; - } - tv->tv_usec = usec; - - HANDLE_USEC_UNDERFLOW(*tv); - - newtv = *tv; - - tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); - HANDLE_USEC_UNDERFLOW(*tv); - - xtime = *tv; - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; - - /* Reset all our running time counts. They make no sense now. */ - last_seen_tv.tv_sec = 0; - last_update_from_xen = 0; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if ( xen_start_info.flags & SIF_INITDOMAIN ) - { - dom0_op_t op; - last_update_to_rtc = last_update_to_xen = 0; - op.cmd = DOM0_SETTIME; - op.u.settime.secs = newtv.tv_sec; - op.u.settime.usecs = newtv.tv_usec; - op.u.settime.system_time = shadow_system_time; - write_unlock_irq(&xtime_lock); - HYPERVISOR_dom0_op(&op); - } - else -#endif - { - write_unlock_irq(&xtime_lock); - } -} - - -asmlinkage long sys_stime(int *tptr) -{ - int value; - struct timeval tv; - - if ( !capable(CAP_SYS_TIME) ) - return -EPERM; - - if ( get_user(value, tptr) ) - return -EFAULT; - - tv.tv_sec = value; - tv.tv_usec = 0; - - do_settimeofday(&tv); - - return 0; -} - - -/* Convert jiffies to system time. Call with xtime_lock held for reading. */ -static inline u64 __jiffies_to_st(unsigned long j) -{ - return processed_system_time + ((j - jiffies) * NS_PER_TICK); -} - - -static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - s64 delta; - unsigned long ticks = 0; - long sec_diff; - - do { - __get_time_values_from_xen(); - - delta = (s64)(shadow_system_time + - ((s64)__get_time_delta_usecs() * 1000LL) - - processed_system_time); - } - while ( !TIME_VALUES_UP_TO_DATE ); - - if ( unlikely(delta < 0) ) - { - printk("Timer ISR: Time went backwards: %lld\n", delta); - return; - } - - /* Process elapsed jiffies since last call. */ - while ( delta >= NS_PER_TICK ) - { - ticks++; - delta -= NS_PER_TICK; - processed_system_time += NS_PER_TICK; - } - - if ( ticks != 0 ) - { - do_timer_ticks(ticks); - - if ( user_mode(regs) ) - update_process_times_us(ticks, 0); - else - update_process_times_us(0, ticks); - } - - /* - * Take synchronised time from Xen once a minute if we're not - * synchronised ourselves, and we haven't chosen to keep an independent - * time base. - */ - if ( !INDEPENDENT_WALLCLOCK() && - ((time_status & STA_UNSYNC) != 0) && - (xtime.tv_sec > (last_update_from_xen + 60)) ) - { - /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */ - shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ); - HANDLE_USEC_UNDERFLOW(shadow_tv); - - /* - * Reset our running time counts if they are invalidated by a warp - * backwards of more than 500ms. - */ - sec_diff = xtime.tv_sec - shadow_tv.tv_sec; - if ( unlikely(abs(sec_diff) > 1) || - unlikely(((sec_diff * 1000000) + - xtime.tv_usec - shadow_tv.tv_usec) > 500000) ) - { -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - last_update_to_rtc = last_update_to_xen = 0; -#endif - last_seen_tv.tv_sec = 0; - } - - /* Update our unsynchronised xtime appropriately. */ - xtime = shadow_tv; - - last_update_from_xen = xtime.tv_sec; - } - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if ( (xen_start_info.flags & SIF_INITDOMAIN) && - ((time_status & STA_UNSYNC) == 0) ) - { - /* Send synchronised time to Xen approximately every minute. */ - if ( xtime.tv_sec > (last_update_to_xen + 60) ) - { - dom0_op_t op; - struct timeval tv = xtime; - - tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ); - HANDLE_USEC_OVERFLOW(tv); - - op.cmd = DOM0_SETTIME; - op.u.settime.secs = tv.tv_sec; - op.u.settime.usecs = tv.tv_usec; - op.u.settime.system_time = shadow_system_time; - HYPERVISOR_dom0_op(&op); - - last_update_to_xen = xtime.tv_sec; - } - - /* - * If we have an externally synchronized Linux clock, then update CMOS - * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called - * as close as possible to 500 ms before the new second starts. - */ - if ( (xtime.tv_sec > (last_update_to_rtc + 660)) && - (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) && - (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) ) - { - if ( set_rtc_mmss(xtime.tv_sec) == 0 ) - last_update_to_rtc = xtime.tv_sec; - else - last_update_to_rtc = xtime.tv_sec - 600; - } - } -#endif -} - - -static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - write_lock(&xtime_lock); - do_timer_interrupt(irq, NULL, regs); - write_unlock(&xtime_lock); -} - -static struct irqaction irq_timer = { - timer_interrupt, - SA_INTERRUPT, - 0, - "timer", - NULL, - NULL -}; - - -/* - * This function works out when the the next timer function has to be - * executed (by looking at the timer list) and sets the Xen one-shot - * domain timer to the appropriate value. This is typically called in - * cpu_idle() before the domain blocks. - * - * The function returns a non-0 value on error conditions. - * - * It must be called with interrupts disabled. - */ -extern spinlock_t timerlist_lock; -int set_timeout_timer(void) -{ - struct timer_list *timer; - u64 alarm = 0; - int ret = 0; - - spin_lock(&timerlist_lock); - - /* - * This is safe against long blocking (since calculations are not based on - * TSC deltas). It is also safe against warped system time since - * suspend-resume is cooperative and we would first get locked out. It is - * safe against normal updates of jiffies since interrupts are off. - */ - if ( (timer = next_timer_event()) != NULL ) - alarm = __jiffies_to_st(timer->expires); - - /* Tasks on the timer task queue expect to be executed on the next tick. */ - if ( TQ_ACTIVE(tq_timer) ) - alarm = __jiffies_to_st(jiffies + 1); - - /* Failure is pretty bad, but we'd best soldier on. */ - if ( HYPERVISOR_set_timer_op(alarm) != 0 ) - ret = -1; - - spin_unlock(&timerlist_lock); - - return ret; -} - - -/* Time debugging. */ -static void dbg_time_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - unsigned long flags, j; - u64 s_now, j_st; - struct timeval s_tv, tv; - - struct timer_list *timer; - u64 t_st; - - read_lock_irqsave(&xtime_lock, flags); - s_tv.tv_sec = shadow_tv.tv_sec; - s_tv.tv_usec = shadow_tv.tv_usec; - s_now = shadow_system_time; - read_unlock_irqrestore(&xtime_lock, flags); - - do_gettimeofday(&tv); - - j = jiffies; - j_st = __jiffies_to_st(j); - - timer = next_timer_event(); - t_st = __jiffies_to_st(timer->expires); - - printk(KERN_ALERT "time: shadow_st=0x%X:%08X\n", - (u32)(s_now>>32), (u32)s_now); - printk(KERN_ALERT "time: wct=%lds %ldus shadow_wct=%lds %ldus\n", - tv.tv_sec, tv.tv_usec, s_tv.tv_sec, s_tv.tv_usec); - printk(KERN_ALERT "time: jiffies=%lu(0x%X:%08X) timeout=%lu(0x%X:%08X)\n", - jiffies,(u32)(j_st>>32), (u32)j_st, - timer->expires,(u32)(t_st>>32), (u32)t_st); - printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n", - (u32)(processed_system_time>>32), (u32)processed_system_time); - -#ifdef CONFIG_MAGIC_SYSRQ - handle_sysrq('t',NULL,NULL,NULL); -#endif -} - -static struct irqaction dbg_time = { - dbg_time_int, - SA_SHIRQ, - 0, - "timer_dbg", - &dbg_time_int, - NULL -}; - -void __init time_init(void) -{ - unsigned long long alarm; - u64 __cpu_khz, __cpu_ghz, cpu_freq, scale, scale2; - unsigned int cpu_ghz; - - __cpu_khz = __cpu_ghz = cpu_freq = HYPERVISOR_shared_info->cpu_freq; - do_div(__cpu_khz, 1000UL); - cpu_khz = (u32)__cpu_khz; - do_div(__cpu_ghz, 1000000000UL); - cpu_ghz = (unsigned int)__cpu_ghz; - - printk("Xen reported: %lu.%03lu MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - - xtime.tv_sec = HYPERVISOR_shared_info->wc_sec; - xtime.tv_usec = HYPERVISOR_shared_info->wc_usec; - processed_system_time = shadow_system_time; - - for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 ) - continue; - - scale = 1000000LL << (32 + rdtsc_bitshift); - do_div(scale, (u32)cpu_freq); - - if ( (cpu_freq >> 32) != 0 ) - { - scale2 = 1000000LL << rdtsc_bitshift; - do_div(scale2, (u32)(cpu_freq>>32)); - scale += scale2; - } - - st_scale_f = scale & 0xffffffff; - st_scale_i = scale >> 32; - - __get_time_values_from_xen(); - processed_system_time = shadow_system_time; - - time_irq = bind_virq_to_irq(VIRQ_TIMER); - debug_irq = bind_virq_to_irq(VIRQ_DEBUG); - - (void)setup_irq(time_irq, &irq_timer); - (void)setup_irq(debug_irq, &dbg_time); - - rdtscll(alarm); -} - -void time_suspend(void) -{ -} - -void time_resume(void) -{ - unsigned long flags; - write_lock_irqsave(&xtime_lock, flags); - /* Get timebases for new environment. */ - __get_time_values_from_xen(); - /* Reset our own concept of passage of system time. */ - processed_system_time = shadow_system_time; - /* Accept a warp in UTC (wall-clock) time. */ - last_seen_tv.tv_sec = 0; - /* Make sure we resync UTC time with Xen on next timer interrupt. */ - last_update_from_xen = 0; - write_unlock_irqrestore(&xtime_lock, flags); -} - -/* - * /proc/sys/xen: This really belongs in another file. It can stay here for - * now however. - */ -static ctl_table xen_subtable[] = { - {1, "independent_wallclock", &independent_wallclock, - sizeof(independent_wallclock), 0644, NULL, proc_dointvec}, - {0} -}; -static ctl_table xen_table[] = { - {123, "xen", NULL, 0, 0555, xen_subtable}, - {0} -}; -static int __init xen_sysctl_init(void) -{ - (void)register_sysctl_table(xen_table, 0); - return 0; -} -__initcall(xen_sysctl_init); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/kernel/traps.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/traps.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,619 +0,0 @@ -/* - * linux/arch/i386/traps.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 - */ - -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ -#include <linux/config.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/ptrace.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <linux/spinlock.h> -#include <linux/interrupt.h> -#include <linux/highmem.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/atomic.h> -#include <asm/debugreg.h> -#include <asm/desc.h> -#include <asm/i387.h> - -#include <asm/smp.h> -#include <asm/pgalloc.h> - -#include <asm/hypervisor.h> - -#include <linux/irq.h> -#include <linux/module.h> - -asmlinkage int system_call(void); -asmlinkage void lcall7(void); -asmlinkage void lcall27(void); - -asmlinkage void divide_error(void); -asmlinkage void debug(void); -asmlinkage void int3(void); -asmlinkage void overflow(void); -asmlinkage void bounds(void); -asmlinkage void invalid_op(void); -asmlinkage void device_not_available(void); -asmlinkage void double_fault(void); -asmlinkage void coprocessor_segment_overrun(void); -asmlinkage void invalid_TSS(void); -asmlinkage void segment_not_present(void); -asmlinkage void stack_segment(void); -asmlinkage void general_protection(void); -asmlinkage void page_fault(void); -asmlinkage void coprocessor_error(void); -asmlinkage void simd_coprocessor_error(void); -asmlinkage void alignment_check(void); -asmlinkage void fixup_4gb_segment(void); -asmlinkage void machine_check(void); - -int kstack_depth_to_print = 24; - - -/* - * If the address is either in the .text section of the - * kernel, or in the vmalloc'ed module regions, it *may* - * be the address of a calling routine - */ - -#ifdef CONFIG_MODULES - -extern struct module *module_list; -extern struct module kernel_module; - -static inline int kernel_text_address(unsigned long addr) -{ - int retval = 0; - struct module *mod; - - if (addr >= (unsigned long) &_stext && - addr <= (unsigned long) &_etext) - return 1; - - for (mod = module_list; mod != &kernel_module; mod = mod->next) { - /* mod_bound tests for addr being inside the vmalloc'ed - * module area. Of course it'd be better to test only - * for the .text subset... */ - if (mod_bound(addr, 0, mod)) { - retval = 1; - break; - } - } - - return retval; -} - -#else - -static inline int kernel_text_address(unsigned long addr) -{ - return (addr >= (unsigned long) &_stext && - addr <= (unsigned long) &_etext); -} - -#endif - -void show_trace(unsigned long * stack) -{ - int i; - unsigned long addr; - - if (!stack) - stack = (unsigned long*)&stack; - - printk("Call Trace: "); - i = 1; - while (((long) stack & (THREAD_SIZE-1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - if (i && ((i % 6) == 0)) - printk("\n "); - printk("[<%08lx>] ", addr); - i++; - } - } - printk("\n"); -} - -void show_trace_task(struct task_struct *tsk) -{ - unsigned long esp = tsk->thread.esp; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) - return; - show_trace((unsigned long *)esp); -} - -void show_stack(unsigned long * esp) -{ - unsigned long *stack; - int i; - - // debugging aid: "show_stack(NULL);" prints the - // back trace for this cpu. - - if(esp==NULL) - esp=(unsigned long*)&esp; - - stack = esp; - for(i=0; i < kstack_depth_to_print; i++) { - if (((long) stack & (THREAD_SIZE-1)) == 0) - break; - if (i && ((i % 8) == 0)) - printk("\n "); - printk("%08lx ", *stack++); - } - printk("\n"); - show_trace(esp); -} - -void show_registers(struct pt_regs *regs) -{ - int in_kernel = 1; - unsigned long esp; - unsigned short ss; - - esp = (unsigned long) (&regs->esp); - ss = __KERNEL_DS; - if (regs->xcs & 2) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } - printk(KERN_ALERT "CPU: %d\n", smp_processor_id() ); - printk(KERN_ALERT "EIP: %04x:[<%08lx>] %s\n", - 0xffff & regs->xcs, regs->eip, print_tainted()); - printk(KERN_ALERT "EFLAGS: %08lx\n",regs->eflags); - printk(KERN_ALERT "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk(KERN_ALERT "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_ALERT "ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk(KERN_ALERT "Process %s (pid: %d, stackpage=%08lx)", - current->comm, current->pid, 4096+(unsigned long)current); - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (in_kernel) { - - printk(KERN_ALERT "\nStack: "); - show_stack((unsigned long*)esp); - -#if 0 - { - int i; - printk(KERN_ALERT "\nCode: "); - if(regs->eip < PAGE_OFFSET) - goto bad; - - for(i=0;i<20;i++) - { - unsigned char c; - if(__get_user(c, &((unsigned char*)regs->eip)[i])) { -bad: - printk(KERN_ALERT " Bad EIP value."); - break; - } - printk("%02x ", c); - } - } -#endif - } - printk(KERN_ALERT "\n"); -} - -spinlock_t die_lock = SPIN_LOCK_UNLOCKED; - -void die(const char * str, struct pt_regs * regs, long err) -{ - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("%s: %04lx\n", str, err & 0xffff); - show_registers(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - do_exit(SIGSEGV); -} - -static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) -{ - if (!(2 & regs->xcs)) - die(str, regs, err); -} - - -static void inline do_trap(int trapnr, int signr, char *str, - struct pt_regs * regs, long error_code, - siginfo_t *info) -{ - if (!(regs->xcs & 2)) - goto kernel_trap; - - /*trap_signal:*/ { - struct task_struct *tsk = current; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); - return; - } - - kernel_trap: { - unsigned long fixup = search_exception_table(regs->eip); - if (fixup) - regs->eip = fixup; - else - die(str, regs, error_code); - return; - } -} - -#define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ -} - -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void *)siaddr; \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ -} - -DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) -DO_ERROR( 3, SIGTRAP, "int3", int3) -DO_ERROR( 4, SIGSEGV, "overflow", overflow) -DO_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) -DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) -DO_ERROR( 8, SIGSEGV, "double fault", double_fault) -DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR(18, SIGBUS, "machine check", machine_check) - -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) -{ - /* - * If we trapped on an LDT access then ensure that the default_ldt is - * loaded, if nothing else. We load default_ldt lazily because LDT - * switching costs time and many applications don't need it. - */ - if ( unlikely((error_code & 6) == 4) ) - { - unsigned long ldt; - __asm__ __volatile__ ( "sldt %0" : "=r" (ldt) ); - if ( ldt == 0 ) - { - xen_set_ldt((unsigned long)&default_ldt[0], 5); - return; - } - } - - if (!(regs->xcs & 2)) - goto gp_in_kernel; - - current->thread.error_code = error_code; - current->thread.trap_no = 13; - force_sig(SIGSEGV, current); - return; - -gp_in_kernel: - { - unsigned long fixup; - fixup = search_exception_table(regs->eip); - if (fixup) { - regs->eip = fixup; - return; - } - die("general protection fault", regs, error_code); - } -} - - -asmlinkage void do_debug(struct pt_regs * regs, long error_code) -{ - unsigned int condition; - struct task_struct *tsk = current; - siginfo_t info; - - condition = HYPERVISOR_get_debugreg(6); - - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg[7]) - goto clear_dr7; - } - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg[6] = condition; - - /* Mask out spurious TF errors due to lazy TF clearing */ - if (condition & DR_STEP) { - /* - * The TF error should be masked out only if the current - * process is not traced and if the TRAP flag has been set - * previously by a tracing process (condition detected by - * the PT_DTRACE flag); remember that the i386 TRAP flag - * can be modified by the process itself in user mode, - * allowing programs to debug themselves without the ptrace() - * interface. - */ - if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) - goto clear_TF; - } - - /* Ok, finally something we can handle */ - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - - /* If this is a kernel mode trap, save the user PC on entry to - * the kernel, that's what the debugger can make sense of. - */ - info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : - (void *)regs->eip; - force_sig_info(SIGTRAP, &info, tsk); - - /* Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ - clear_dr7: - HYPERVISOR_set_debugreg(7, 0); - return; - - clear_TF: - regs->eflags &= ~TF_MASK; - return; -} - - -/* - * Note that we play around with the 'TS' bit in an attempt to get - * the correct behaviour even in the presence of the asynchronous - * IRQ13 behaviour - */ -void math_error(void *eip) -{ - struct task_struct * task; - siginfo_t info; - unsigned short cwd, swd; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 16; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = eip; - /* - * (~cwd & swd) will mask out exceptions that are not set to unmasked - * status. 0x3f is the exception bits in these regs, 0x200 is the - * C1 reg you need in case of a stack fault, 0x040 is the stack - * fault bit. We should only be taking one exception at a time, - * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't syncronizing its FPU usage - * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception - */ - cwd = get_fpu_cwd(task); - swd = get_fpu_swd(task); - switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - case 0x041: /* Stack Fault */ - case 0x241: /* Stack Fault | Direction */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) -{ - ignore_irq13 = 1; - math_error((void *)regs->eip); -} - -void simd_math_error(void *eip) -{ - struct task_struct * task; - siginfo_t info; - unsigned short mxcsr; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 19; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = eip; - /* - * The SIMD FPU exceptions are handled a little differently, as there - * is only a single status/control register. Thus, to determine which - * unmasked exception was caught we must mask the exception mask bits - * at 0x1f80, and then use these to mask the exception bits at 0x3f. - */ - mxcsr = get_fpu_mxcsr(task); - switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, - long error_code) -{ - if (cpu_has_xmm) { - /* Handle SIMD FPU exceptions on PIII+ processors. */ - ignore_irq13 = 1; - simd_math_error((void *)regs->eip); - } else { - die_if_kernel("cache flush denied", regs, error_code); - current->thread.trap_no = 19; - current->thread.error_code = error_code; - force_sig(SIGSEGV, current); - } -} - -/* - * 'math_state_restore()' saves the current math information in the - * old math state array, and gets the new ones from the current task - * - * Careful.. There are problems with IBM-designed IRQ13 behaviour. - * Don't touch unless you *really* know how it works. - */ -asmlinkage void math_state_restore(struct pt_regs regs) -{ - /* - * A trap in kernel mode can be ignored. It'll be the fast XOR or - * copying libraries, which will correctly save/restore state and - * reset the TS bit in CR0. - */ - if ( (regs.xcs & 2) == 0 ) - return; - - if (current->used_math) { - restore_fpu(current); - } else { - init_fpu(); - } - current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */ -} - - -#define _set_gate(gate_addr,type,dpl,addr) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ - "movw %4,%%dx\n\t" \ - "movl %%eax,%0\n\t" \ - "movl %%edx,%1" \ - :"=m" (*((long *) (gate_addr))), \ - "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ - :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ - "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \ -} while (0) - -static void __init set_call_gate(void *a, void *addr) -{ - _set_gate(a,12,3,addr); -} - - -/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */ -static trap_info_t trap_table[] = { - { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, - { 1, 0, __KERNEL_CS, (unsigned long)debug }, - { 3, 3, __KERNEL_CS, (unsigned long)int3 }, - { 4, 3, __KERNEL_CS, (unsigned long)overflow }, - { 5, 3, __KERNEL_CS, (unsigned long)bounds }, - { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, - { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, - { 8, 0, __KERNEL_CS, (unsigned long)double_fault }, - { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, - { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, - { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, - { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, - { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, - { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, - { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment }, - { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, - { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, - { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, - { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, - { SYSCALL_VECTOR, - 3, __KERNEL_CS, (unsigned long)system_call }, - { 0, 0, 0, 0 } -}; - - -void __init trap_init(void) -{ - HYPERVISOR_set_trap_table(trap_table); - - /* - * The default LDT is a single-entry callgate to lcall7 for iBCS and a - * callgate to lcall27 for Solaris/x86 binaries. - */ - clear_page(&default_ldt[0]); - set_call_gate(&default_ldt[0],lcall7); - set_call_gate(&default_ldt[4],lcall27); - __make_page_readonly(&default_ldt[0]); - - cpu_init(); -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/lib/Makefile --- a/linux-2.4-xen-sparse/arch/xen/lib/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,15 +0,0 @@ - -.S.o: - $(CC) $(AFLAGS) -c $< -o $*.o - -L_TARGET = lib.a - -obj-y = checksum.o old-checksum.o delay.o \ - usercopy.o getuser.o \ - memcpy.o strstr.o xen_proc.o - -obj-$(CONFIG_X86_USE_3DNOW) += mmx.o -obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o -obj-$(CONFIG_DEBUG_IOVIRT) += iodebug.o - -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/lib/delay.c --- a/linux-2.4-xen-sparse/arch/xen/lib/delay.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,52 +0,0 @@ -/* - * Precise Delay Loops for i386 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. The additional - * jump magic is needed to get the timing stable on all the CPU's - * we have to worry about. - */ - -#include <linux/config.h> -#include <linux/sched.h> -#include <linux/delay.h> -#include <asm/processor.h> -#include <asm/delay.h> - -#ifdef CONFIG_SMP -#include <asm/smp.h> -#endif - -void __delay(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do - { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - -inline void __const_udelay(unsigned long xloops) -{ - int d0; - __asm__("mull %0" - :"=d" (xloops), "=&a" (d0) - :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy)); - __delay(xloops * HZ); -} - -void __udelay(unsigned long usecs) -{ - __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ -} - -void __ndelay(unsigned long nsecs) -{ - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/mm/Makefile --- a/linux-2.4-xen-sparse/arch/xen/mm/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,16 +0,0 @@ -# -# Makefile for the linux i386-specific parts of the memory manager. -# -# Note! Dependencies are done automagically by 'make dep', which also -# removes any old dependencies. DON'T put your own dependencies here -# unless it's something special (ie not a .c file). -# -# Note 2! The CFLAGS definition is now in the main makefile... - -O_TARGET := mm.o - -obj-y := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o - -export-objs := pageattr.o - -include $(TOPDIR)/Rules.make diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/mm/fault.c --- a/linux-2.4-xen-sparse/arch/xen/mm/fault.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,302 +0,0 @@ -/* - * linux/arch/i386/mm/fault.c - * - * Copyright (C) 1995 Linus Torvalds - */ - -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/tty.h> -#include <linux/vt_kern.h> /* For unblank_screen() */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#include <asm/hardirq.h> - -extern void die(const char *,struct pt_regs *,long); - -pgd_t *cur_pgd; - -extern spinlock_t timerlist_lock; - -/* - * Unlock any spinlocks which will prevent us from getting the - * message out (timerlist_lock is acquired through the - * console unblank code) - */ -void bust_spinlocks(int yes) -{ - spin_lock_init(&timerlist_lock); - if (yes) { - oops_in_progress = 1; - } else { - int loglevel_save = console_loglevel; -#ifdef CONFIG_VT - unblank_screen(); -#endif - oops_in_progress = 0; - /* - * OK, the message is on the console. Now we call printk() - * without oops_in_progress set so that printk will give klogd - * a poke. Hold onto your hats... - */ - console_loglevel = 15; /* NMI oopser may have shut the console up */ - printk(" "); - console_loglevel = loglevel_save; - } -} - -/* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. - * - * error_code: - * bit 0 == 0 means no page found, 1 means protection fault - * bit 1 == 0 means read, 1 means write - * bit 2 == 0 means kernel, 1 means user-mode - */ -asmlinkage void do_page_fault(struct pt_regs *regs, - unsigned long error_code, - unsigned long address) -{ - struct task_struct *tsk = current; - struct mm_struct *mm; - struct vm_area_struct * vma; - unsigned long page; - unsigned long fixup; - int write; - siginfo_t info; - - /* Set the "privileged fault" bit to something sane. */ - error_code &= 3; - error_code |= (regs->xcs & 2) << 1; - - /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - * - * This verifies that the fault happens in kernel space - * (error_code & 4) == 0, and that the fault was not a - * protection error (error_code & 1) == 0. - */ - if (address >= TASK_SIZE && !(error_code & 5)) - goto vmalloc_fault; - - mm = tsk->mm; - info.si_code = SEGV_MAPERR; - - /* - * If we're in an interrupt or have no user - * context, we must not take the fault.. - */ - if (in_interrupt() || !mm) - goto no_context; - - down_read(&mm->mmap_sem); - - vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (error_code & 4) { - /* - * accessing the stack below %esp is always a bug. - * The "+ 32" is there due to some instructions (like - * pusha) doing post-decrement on the stack and that - * doesn't show up until later.. - */ - if (address + 32 < regs->esp) - goto bad_area; - } - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - info.si_code = SEGV_ACCERR; - write = 0; - switch (error_code & 3) { - default: /* 3: write, present */ - /* fall through */ - case 2: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - write++; - break; - case 1: /* read, present */ - goto bad_area; - case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; - } - - survive: - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ - switch (handle_mm_fault(mm, vma, address, write)) { - case 1: - tsk->min_flt++; - break; - case 2: - tsk->maj_flt++; - break; - case 0: - goto do_sigbus; - default: - goto out_of_memory; - } - - up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - - /* User mode accesses just cause a SIGSEGV */ - if (error_code & 4) { - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *)address; - force_sig_info(SIGSEGV, &info, tsk); - return; - } - -no_context: - /* Are we prepared to handle this kernel fault? */ - if ((fixup = search_exception_table(regs->eip)) != 0) { - regs->eip = fixup; - return; - } - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - - bust_spinlocks(1); - - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); - else - printk(KERN_ALERT "Unable to handle kernel paging request"); - printk(" at virtual address %08lx\n",address); - printk(" printing eip:\n"); - printk("%08lx\n", regs->eip); - page = ((unsigned long *) cur_pgd)[address >> 22]; - printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page)); - if (page & 1) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = machine_to_phys(page); - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; - printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page, - machine_to_phys(page)); - } - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - if (tsk->pid == 1) { - yield(); - goto survive; - } - up_read(&mm->mmap_sem); - printk("VM: killing process %s\n", tsk->comm); - if (error_code & 4) - do_exit(SIGKILL); - goto no_context; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* - * Send a sigbus, regardless of whether we were in kernel - * or user mode. - */ - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void *)address; - force_sig_info(SIGBUS, &info, tsk); - - /* Kernel mode? Handle exceptions or die */ - if (!(error_code & 4)) - goto no_context; - return; - -vmalloc_fault: - { - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "tsk" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - int offset = __pgd_offset(address); - pgd_t *pgd, *pgd_k; - pmd_t *pmd, *pmd_k; - pte_t *pte_k; - - pgd = offset + cur_pgd; - pgd_k = init_mm.pgd + offset; - - if (!pgd_present(*pgd_k)) - goto no_context; - set_pgd(pgd, *pgd_k); - - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); - if (!pmd_present(*pmd_k)) - goto no_context; - set_pmd(pmd, *pmd_k); - - pte_k = pte_offset(pmd_k, address); - if (!pte_present(*pte_k)) - goto no_context; - return; - } -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/mm/init.c --- a/linux-2.4-xen-sparse/arch/xen/mm/init.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,482 +0,0 @@ -/* - * linux/arch/i386/mm/init.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#include <linux/config.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/smp.h> -#include <linux/init.h> -#ifdef CONFIG_BLK_DEV_INITRD -#include <linux/blk.h> -#endif -#include <linux/highmem.h> -#include <linux/pagemap.h> -#include <linux/bootmem.h> -#include <linux/slab.h> - -#include <asm/processor.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/dma.h> -#include <asm/apic.h> -#include <asm/tlb.h> - -/* XEN: We *cannot* use mmx_clear_page() this early. Force dumb memset(). */ -#undef clear_page -#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) - -mmu_gather_t mmu_gathers[NR_CPUS]; -unsigned long highstart_pfn, highend_pfn; -static unsigned long totalram_pages; -static unsigned long totalhigh_pages; - -int do_check_pgt_cache(int low, int high) -{ - int freed = 0; - if(pgtable_cache_size > high) { - do { - if (!QUICKLIST_EMPTY(pgd_quicklist)) { - free_pgd_slow(get_pgd_fast()); - freed++; - } - if (!QUICKLIST_EMPTY(pte_quicklist)) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); - freed++; - } - } while(pgtable_cache_size > low); - } - return freed; -} - -/* - * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the - * physical space so we can cache the place of the first one and move - * around without checking the pgd every time. - */ - -#if CONFIG_HIGHMEM -pte_t *kmap_pte; -pgprot_t kmap_prot; - -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) - -void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; -} -#endif /* CONFIG_HIGHMEM */ - -void show_mem(void) -{ - int i, total = 0, reserved = 0; - int shared = 0, cached = 0; - int highmem = 0; - - printk("Mem-info:\n"); - show_free_areas(); - printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); - i = max_mapnr; - while (i-- > 0) { - total++; - if (PageHighMem(mem_map+i)) - highmem++; - if (PageReserved(mem_map+i)) - reserved++; - else if (PageSwapCache(mem_map+i)) - cached++; - else if (page_count(mem_map+i)) - shared += page_count(mem_map+i) - 1; - } - printk("%d pages of RAM\n", total); - printk("%d pages of HIGHMEM\n",highmem); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); - printk("%ld pages in page table cache\n",pgtable_cache_size); - show_buffers(); -} - -/* References to section boundaries */ - -extern char _text, _etext, _edata, __bss_start, _end; -extern char __init_begin, __init_end; - -static inline void set_pte_phys (unsigned long vaddr, - unsigned long phys, pgprot_t prot) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - - pgd = init_mm.pgd + __pgd_offset(vaddr); - if (pgd_none(*pgd)) { - printk("PAE BUG #00!\n"); - return; - } - pmd = pmd_offset(pgd, vaddr); - if (pmd_none(*pmd)) { - printk("PAE BUG #01!\n"); - return; - } - pte = pte_offset(pmd, vaddr); - - set_pte(pte, (pte_t) { phys | pgprot_val(prot) }); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -void __set_fixmap(enum fixed_addresses idx, unsigned long phys, - pgprot_t flags) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - printk("Invalid __set_fixmap\n"); - return; - } - set_pte_phys(address, phys, flags); -} - -void clear_fixmap(enum fixed_addresses idx) -{ - set_pte_phys(__fix_to_virt(idx), 0, __pgprot(0)); -} - -static void __init fixrange_init (unsigned long start, - unsigned long end, pgd_t *pgd_base) -{ - pgd_t *pgd, *kpgd; - pmd_t *pmd, *kpmd; - pte_t *pte, *kpte; - int i, j; - unsigned long vaddr; - - vaddr = start; - i = __pgd_offset(vaddr); - j = __pmd_offset(vaddr); - pgd = pgd_base + i; - - for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { -#if CONFIG_X86_PAE - if (pgd_none(*pgd)) { - pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); - if (pmd != pmd_offset(pgd, 0)) - printk("PAE BUG #02!\n"); - } - pmd = pmd_offset(pgd, vaddr); -#else - pmd = (pmd_t *)pgd; -#endif - for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { - if (pmd_none(*pmd)) { - pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - clear_page(pte); - kpgd = pgd_offset_k((unsigned long)pte); - kpmd = pmd_offset(kpgd, (unsigned long)pte); - kpte = pte_offset(kpmd, (unsigned long)pte); - set_pte(kpte, pte_wrprotect(*kpte)); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); - } - vaddr += PMD_SIZE; - } - j = 0; - } -} - - -static void __init pagetable_init (void) -{ - unsigned long vaddr, end, ram_end; - pgd_t *kpgd, *pgd, *pgd_base; - int i, j, k; - pmd_t *kpmd, *pmd; - pte_t *kpte, *pte, *pte_base; - - ram_end = end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); - if ( xen_start_info.nr_pages < max_low_pfn ) - ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE); - - pgd_base = init_mm.pgd; - i = __pgd_offset(PAGE_OFFSET); - pgd = pgd_base + i; - - for (; i < PTRS_PER_PGD; pgd++, i++) { - vaddr = i*PGDIR_SIZE; - if (vaddr >= end) - break; - pmd = (pmd_t *)pgd; - for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { - vaddr = i*PGDIR_SIZE + j*PMD_SIZE; - if (vaddr >= end) - break; - - /* Filled in for us already? */ - if ( pmd_val(*pmd) & _PAGE_PRESENT ) - continue; - - pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - clear_page(pte_base); - - for (k = 0; k < PTRS_PER_PTE; pte++, k++) { - vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; - if (vaddr >= ram_end) - break; - *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); - } - kpgd = pgd_offset_k((unsigned long)pte_base); - kpmd = pmd_offset(kpgd, (unsigned long)pte_base); - kpte = pte_offset(kpmd, (unsigned long)pte_base); - set_pte(kpte, pte_wrprotect(*kpte)); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); - } - } - - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd); - -#if CONFIG_HIGHMEM - /* - * Permanent kmaps: - */ - vaddr = PKMAP_BASE; - fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, init_mm.pgd); - - pgd = init_mm.pgd + __pgd_offset(vaddr); - pmd = pmd_offset(pgd, vaddr); - pte = pte_offset(pmd, vaddr); - pkmap_page_table = pte; -#endif -} - -static void __init zone_sizes_init(void) -{ - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; - unsigned int max_dma, high, low; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - low = max_low_pfn; - high = highend_pfn; - - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; -#ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; -#endif - } - free_area_init(zones_size); -} - -void __init paging_init(void) -{ - pagetable_init(); - - zone_sizes_init(); - - /* Switch to the real shared_info page, and clear the dummy page. */ - set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info); - HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - -#ifdef CONFIG_HIGHMEM - kmap_init(); -#endif -} - -static inline int page_is_ram (unsigned long pagenr) -{ - return 1; -} - -#ifdef CONFIG_HIGHMEM -void __init one_highpage_init(struct page *page, int free_page) -{ - ClearPageReserved(page); - set_bit(PG_highmem, &page->flags); - atomic_set(&page->count, 1); - if ( free_page ) - __free_page(page); - totalhigh_pages++; -} -#endif /* CONFIG_HIGHMEM */ - -static void __init set_max_mapnr_init(void) -{ -#ifdef CONFIG_HIGHMEM - highmem_start_page = mem_map + highstart_pfn; - max_mapnr = num_physpages = highend_pfn; - num_mappedpages = max_low_pfn; -#else - max_mapnr = num_mappedpages = num_physpages = max_low_pfn; -#endif -} - -static int __init free_pages_init(void) -{ -#ifdef CONFIG_HIGHMEM - int bad_ppro = 0; -#endif - int reservedpages, pfn; - - /* add only boot_pfn pages of low memory to free list. - * max_low_pfn may be sized for - * pages yet to be allocated from the hypervisor, or it may be set - * to override the xen_start_info amount of memory - */ - int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn); - - /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - /* XEN: init and count low-mem pages outside initial allocation. */ - for (pfn = boot_pfn; pfn < max_low_pfn; pfn++) { - ClearPageReserved(&mem_map[pfn]); - atomic_set(&mem_map[pfn].count, 1); - totalram_pages++; - } - - reservedpages = 0; - for (pfn = 0; pfn < boot_pfn ; pfn++) { - /* - * Only count reserved RAM pages - */ - if (page_is_ram(pfn) && PageReserved(mem_map+pfn)) - reservedpages++; - } -#ifdef CONFIG_HIGHMEM - for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) - one_highpage_init((struct page *) (mem_map + pfn), - (pfn < xen_start_info.nr_pages)); - totalram_pages += totalhigh_pages; -#endif - return reservedpages; -} - -void __init mem_init(void) -{ - int codesize, reservedpages, datasize, initsize; - - if (!mem_map) - BUG(); - -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); - BUG(); - } -#endif - - set_max_mapnr_init(); - - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - - /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); - - reservedpages = free_pages_init(); - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); - - boot_cpu_data.wp_works_ok = 1; -} - -void free_initmem(void) -{ - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - set_page_count(virt_to_page(addr), 1); - free_page(addr); - totalram_pages++; - } - printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (start < end) - printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - totalram_pages++; - } -} -#endif - -void si_meminfo(struct sysinfo *val) -{ - val->totalram = max_pfn; - val->sharedram = 0; - val->freeram = nr_free_pages(); - val->bufferram = atomic_read(&buffermem_pages); - val->totalhigh = max_pfn-max_low_pfn; - val->freehigh = nr_free_highpages(); - val->mem_unit = PAGE_SIZE; - return; -} - -#if defined(CONFIG_X86_PAE) -struct kmem_cache_s *pae_pgd_cachep; -void __init pgtable_cache_init(void) -{ - /* - * PAE pgds must be 16-byte aligned: - */ - pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, - SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); - if (!pae_pgd_cachep) - panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); -} -#endif /* CONFIG_X86_PAE */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/mm/ioremap.c --- a/linux-2.4-xen-sparse/arch/xen/mm/ioremap.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,266 +0,0 @@ -/* - * arch/xen/mm/ioremap.c - * - * Re-map IO memory to kernel address space so that we can access it. - * - * (C) Copyright 1995 1996 Linus Torvalds - * - * Modifications for Xenolinux (c) 2003-2004 Keir Fraser - */ - -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/mman.h> -#include <linux/vmalloc.h> -#include <asm/io.h> -#include <asm/pgalloc.h> -#include <asm/uaccess.h> -#include <asm/tlb.h> -#include <asm/mmu.h> - -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) - -/* These hacky macros avoid phys->machine translations. */ -#define __direct_pte(x) ((pte_t) { (x) } ) -#define __direct_mk_pte(page_nr,pgprot) \ - __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) -#define direct_mk_pte_phys(physpage, pgprot) \ - __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) - -static inline void direct_remap_area_pte(pte_t *pte, - unsigned long address, - unsigned long size, - mmu_update_t **v) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); - - do { - (*v)->ptr = virt_to_machine(pte); - (*v)++; - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline int direct_remap_area_pmd(struct mm_struct *mm, - pmd_t *pmd, - unsigned long address, - unsigned long size, - mmu_update_t **v) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - if (address >= end) - BUG(); - do { - pte_t *pte = pte_alloc(mm, pmd, address); - if (!pte) - return -ENOMEM; - direct_remap_area_pte(pte, address, end - address, v); - - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -int __direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long size, - mmu_update_t *v) -{ - pgd_t * dir; - unsigned long end = address + size; - - dir = pgd_offset(mm, address); - flush_cache_all(); - if (address >= end) - BUG(); - spin_lock(&mm->page_table_lock); - do { - pmd_t *pmd = pmd_alloc(mm, dir, address); - if (!pmd) - return -ENOMEM; - direct_remap_area_pmd(mm, pmd, address, end - address, &v); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - - } while (address && (address < end)); - spin_unlock(&mm->page_table_lock); - flush_tlb_all(); - return 0; -} - - -int direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long machine_addr, - unsigned long size, - pgprot_t prot, - domid_t domid) -{ - int i; - unsigned long start_address; -#define MAX_DIRECTMAP_MMU_QUEUE 130 - mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u; - - start_address = address; - - for( i = 0; i < size; i += PAGE_SIZE ) - { - if ( (v - u) == MAX_DIRECTMAP_MMU_QUEUE ) - { - /* Fill in the PTE pointers. */ - __direct_remap_area_pages( mm, - start_address, - address-start_address, - u); - - if ( HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0 ) - return -EFAULT; - v = u; - start_address = address; - } - - /* - * Fill in the machine address: PTE ptr is done later by - * __direct_remap_area_pages(). - */ - v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot); - - machine_addr += PAGE_SIZE; - address += PAGE_SIZE; - v++; - } - - if ( v != u ) - { - /* get the ptep's filled in */ - __direct_remap_area_pages(mm, - start_address, - address-start_address, - u); - if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) ) - return -EFAULT; - } - - return 0; -} - - -#endif /* CONFIG_XEN_PRIVILEGED_GUEST */ - - -/* - * Remap an arbitrary machine address space into the kernel virtual - * address space. Needed when a privileged instance of Xenolinux wants - * to access space outside its world directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - */ -void * __ioremap(unsigned long machine_addr, - unsigned long size, - unsigned long flags) -{ -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) - void * addr; - struct vm_struct * area; - unsigned long offset, last_addr; - pgprot_t prot; - - /* Don't allow wraparound or zero size */ - last_addr = machine_addr + size - 1; - if (!size || last_addr < machine_addr) - return NULL; - - /* Mappings have to be page-aligned */ - offset = machine_addr & ~PAGE_MASK; - machine_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - machine_addr; - - /* Ok, go for it */ - area = get_vm_area(size, VM_IOREMAP); - if (!area) - return NULL; - addr = area->addr; - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | - _PAGE_ACCESSED | flags); - if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr), - machine_addr, size, prot, 0)) { - vfree(addr); - return NULL; - } - return (void *) (offset + (char *)addr); -#else - return NULL; -#endif -} - -void iounmap(void *addr) -{ - vfree((void *)((unsigned long)addr & PAGE_MASK)); -} - -/* implementation of boot time ioremap for purpose of provising access -to the vga console for privileged domains. Unlike boot time ioremap on -other architectures, ours is permanent and not reclaimed when then vmalloc -infrastructure is started */ - -void __init *bt_ioremap(unsigned long machine_addr, unsigned long size) -{ - unsigned long offset, last_addr; - unsigned int nrpages; - enum fixed_addresses idx; - - /* Don't allow wraparound or zero size */ - last_addr = machine_addr + size - 1; - if (!size || last_addr < machine_addr) - return NULL; - - /* - * Mappings have to be page-aligned - */ - offset = machine_addr & ~PAGE_MASK; - machine_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - machine_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) - return NULL; - - /* - * Ok, go for it.. - */ - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - __set_fixmap(idx, machine_addr, PAGE_KERNEL); - machine_addr += PAGE_SIZE; - --idx; - --nrpages; - } - - flush_tlb_all(); - - return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); -} - - -#if 0 /* We don't support these functions. They shouldn't be required. */ -void __init bt_iounmap(void *addr, unsigned long size) {} -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/arch/xen/vmlinux.lds --- a/linux-2.4-xen-sparse/arch/xen/vmlinux.lds Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,75 +0,0 @@ -/* ld script to make i386 Linux kernel - * Written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>; - */ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) -SECTIONS -{ - . = 0xC0000000 + 0x100000; - _text = .; /* Text and read-only data */ - .text : { - *(.text) - *(.fixup) - *(.gnu.warning) - } = 0x9090 - - _etext = .; /* End of text section */ - - .rodata : { *(.rodata) *(.rodata.*) } - .kstrtab : { *(.kstrtab) } - - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - - __start___ksymtab = .; /* Kernel symbol table */ - __ksymtab : { *(__ksymtab) } - __stop___ksymtab = .; - - .data : { /* Data */ - *(.data) - CONSTRUCTORS - } - - _edata = .; /* End of data section */ - - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .text.init : { *(.text.init) } - .data.init : { *(.data.init) } - . = ALIGN(16); - __setup_start = .; - .setup.init : { *(.setup.init) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { *(.initcall.init) } - __initcall_end = .; - . = ALIGN(4096); - __init_end = .; - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - __bss_start = .; /* BSS */ - .bss : { - *(.bss) - } - _end = . ; - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/drivers/block/ll_rw_blk.c --- a/linux-2.4-xen-sparse/drivers/block/ll_rw_blk.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,1663 +0,0 @@ -/* - * linux/drivers/block/ll_rw_blk.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 1994, Karl Keyte: Added support for disk statistics - * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@xxxxxxx> SuSE - * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@xxxxxxx> - * kernel-doc documentation started by NeilBrown <neilb@xxxxxxxxxxxxxxx> - July2000 - */ - -/* - * This handles all read/write requests to block devices - */ -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/config.h> -#include <linux/locks.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/init.h> -#include <linux/smp_lock.h> -#include <linux/completion.h> -#include <linux/bootmem.h> - -#include <asm/system.h> -#include <asm/io.h> -#include <linux/blk.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <linux/module.h> - -/* - * MAC Floppy IWM hooks - */ - -#ifdef CONFIG_MAC_FLOPPY_IWM -extern int mac_floppy_init(void); -#endif - -/* - * For the allocated request tables - */ -static kmem_cache_t *request_cachep; - -/* - * The "disk" task queue is used to start the actual requests - * after a plug - */ -DECLARE_TASK_QUEUE(tq_disk); - -/* - * Protect the request list against multiple users.. - * - * With this spinlock the Linux block IO subsystem is 100% SMP threaded - * from the IRQ event side, and almost 100% SMP threaded from the syscall - * side (we still have protect against block device array operations, and - * the do_request() side is casually still unsafe. The kernel lock protects - * this part currently.). - * - * there is a fair chance that things will work just OK if these functions - * are called with no global kernel lock held ... - */ -spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; - -/* This specifies how many sectors to read ahead on the disk. */ - -int read_ahead[MAX_BLKDEV]; - -/* blk_dev_struct is: - * *request_fn - * *current_request - */ -struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ - -/* - * blk_size contains the size of all block-devices in units of 1024 byte - * sectors: - * - * blk_size[MAJOR][MINOR] - * - * if (!blk_size[MAJOR]) then no minor size checking is done. - */ -int * blk_size[MAX_BLKDEV]; - -/* - * blksize_size contains the size of all block-devices: - * - * blksize_size[MAJOR][MINOR] - * - * if (!blksize_size[MAJOR]) then 1024 bytes is assumed. - */ -int * blksize_size[MAX_BLKDEV]; - -/* - * hardsect_size contains the size of the hardware sector of a device. - * - * hardsect_size[MAJOR][MINOR] - * - * if (!hardsect_size[MAJOR]) - * then 512 bytes is assumed. - * else - * sector_size is hardsect_size[MAJOR][MINOR] - * This is currently set by some scsi devices and read by the msdos fs driver. - * Other uses may appear later. - */ -int * hardsect_size[MAX_BLKDEV]; - -/* - * The following tunes the read-ahead algorithm in mm/filemap.c - */ -int * max_readahead[MAX_BLKDEV]; - -/* - * Max number of sectors per request - */ -int * max_sectors[MAX_BLKDEV]; - -unsigned long blk_max_low_pfn, blk_max_pfn; -int blk_nohighio = 0; - -int block_dump = 0; - -static struct timer_list writeback_timer; - -static inline int get_max_sectors(kdev_t dev) -{ - if (!max_sectors[MAJOR(dev)]) - return MAX_SECTORS; - return max_sectors[MAJOR(dev)][MINOR(dev)]; -} - -static inline request_queue_t *__blk_get_queue(kdev_t dev) -{ - struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); - - if (bdev->queue) - return bdev->queue(dev); - else - return &blk_dev[MAJOR(dev)].request_queue; -} - -request_queue_t *blk_get_queue(kdev_t dev) -{ - return __blk_get_queue(dev); -} - -static int __blk_cleanup_queue(struct request_list *list) -{ - struct list_head *head = &list->free; - struct request *rq; - int i = 0; - - while (!list_empty(head)) { - rq = list_entry(head->next, struct request, queue); - list_del(&rq->queue); - kmem_cache_free(request_cachep, rq); - i++; - }; - - if (i != list->count) - printk("request list leak!\n"); - - list->count = 0; - return i; -} - -/** - * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed - * @q: the request queue to be released - * - * Description: - * blk_cleanup_queue is the pair to blk_init_queue(). It should - * be called when a request queue is being released; typically - * when a block device is being de-registered. Currently, its - * primary task it to free all the &struct request structures that - * were allocated to the queue. - * Caveat: - * Hopefully the low level driver will have finished any - * outstanding requests first... - **/ -void blk_cleanup_queue(request_queue_t * q) -{ - int count = q->nr_requests; - - count -= __blk_cleanup_queue(&q->rq); - - if (count) - printk("blk_cleanup_queue: leaked requests (%d)\n", count); - if (atomic_read(&q->nr_sectors)) - printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors)); - - memset(q, 0, sizeof(*q)); -} - -/** - * blk_queue_headactive - indicate whether head of request queue may be active - * @q: The queue which this applies to. - * @active: A flag indication where the head of the queue is active. - * - * Description: - * The driver for a block device may choose to leave the currently active - * request on the request queue, removing it only when it has completed. - * The queue handling routines assume this by default for safety reasons - * and will not involve the head of the request queue in any merging or - * reordering of requests when the queue is unplugged (and thus may be - * working on this particular request). - * - * If a driver removes requests from the queue before processing them, then - * it may indicate that it does so, there by allowing the head of the queue - * to be involved in merging and reordering. This is done be calling - * blk_queue_headactive() with an @active flag of %0. - * - * If a driver processes several requests at once, it must remove them (or - * at least all but one of them) from the request queue. - * - * When a queue is plugged the head will be assumed to be inactive. - **/ - -void blk_queue_headactive(request_queue_t * q, int active) -{ - q->head_active = active; -} - -/** - * blk_queue_throttle_sectors - indicates you will call sector throttling funcs - * @q: The queue which this applies to. - * @active: A flag indication if you want sector throttling on - * - * Description: - * The sector throttling code allows us to put a limit on the number of - * sectors pending io to the disk at a given time, sending @active nonzero - * indicates you will call blk_started_sectors and blk_finished_sectors in - * addition to calling blk_started_io and blk_finished_io in order to - * keep track of the number of sectors in flight. - **/ - -void blk_queue_throttle_sectors(request_queue_t * q, int active) -{ - q->can_throttle = active; -} - -/** - * blk_queue_make_request - define an alternate make_request function for a device - * @q: the request queue for the device to be affected - * @mfn: the alternate make_request function - * - * Description: - * The normal way for &struct buffer_heads to be passed to a device - * driver is for them to be collected into requests on a request - * queue, and then to allow the device driver to select requests - * off that queue when it is ready. This works well for many block - * devices. However some block devices (typically virtual devices - * such as md or lvm) do not benefit from the processing on the - * request queue, and are served best by having the requests passed - * directly to them. This can be achieved by providing a function - * to blk_queue_make_request(). - * - * Caveat: - * The driver that does this *must* be able to deal appropriately - * with buffers in "highmemory", either by calling bh_kmap() to get - * a kernel mapping, to by calling create_bounce() to create a - * buffer in normal memory. - **/ - -void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) -{ - q->make_request_fn = mfn; -} - -/** - * blk_queue_bounce_limit - set bounce buffer limit for queue - * @q: the request queue for the device - * @dma_addr: bus address limit - * - * Description: - * Different hardware can have different requirements as to what pages - * it can do I/O directly to. A low level driver can call - * blk_queue_bounce_limit to have lower memory pages allocated as bounce - * buffers for doing I/O to pages residing above @page. By default - * the block layer sets this to the highest numbered "low" memory page. - **/ -void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) -{ - unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; - unsigned long mb = dma_addr >> 20; - static request_queue_t *old_q; - - /* - * keep this for debugging for now... - */ - if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) { - old_q = q; - printk("blk: queue %p, ", q); - if (dma_addr == BLK_BOUNCE_ANY) - printk("no I/O memory limit\n"); - else - printk("I/O limit %luMb (mask 0x%Lx)\n", mb, - (long long) dma_addr); - } - - q->bounce_pfn = bounce_pfn; -} - - -/* - * can we merge the two segments, or do we need to start a new one? - */ -static inline int __blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt) -{ - /* - * if bh and nxt are contigous and don't cross a 4g boundary, it's ok - */ - if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt)) - return 1; - - return 0; -} - -int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt) -{ - return __blk_seg_merge_ok(bh, nxt); -} - -static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) -{ - if (req->nr_segments < max_segments) { - req->nr_segments++; - return 1; - } - return 0; -} - -static int ll_back_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) -{ - if (__blk_seg_merge_ok(req->bhtail, bh)) - return 1; - - return ll_new_segment(q, req, max_segments); -} - -static int ll_front_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) -{ - if (__blk_seg_merge_ok(bh, req->bh)) - return 1; - - return ll_new_segment(q, req, max_segments); -} - -static int ll_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next, int max_segments) -{ - int total_segments = req->nr_segments + next->nr_segments; - - if (__blk_seg_merge_ok(req->bhtail, next->bh)) - total_segments--; - - if (total_segments > max_segments) - return 0; - - req->nr_segments = total_segments; - return 1; -} - -/* - * "plug" the device if there are no outstanding requests: this will - * force the transfer to start only after we have put all the requests - * on the list. - * - * This is called with interrupts off and no requests on the queue. - * (and with the request spinlock acquired) - */ -static void generic_plug_device(request_queue_t *q, kdev_t dev) -{ - /* - * no need to replug device - */ - if (!list_empty(&q->queue_head) || q->plugged) - return; - - q->plugged = 1; - queue_task(&q->plug_tq, &tq_disk); -} - -/* - * remove the plug and let it rip.. - */ -static inline void __generic_unplug_device(request_queue_t *q) -{ - if (q->plugged) { - q->plugged = 0; - if (!list_empty(&q->queue_head)) - q->request_fn(q); - } -} - -void generic_unplug_device(void *data) -{ - request_queue_t *q = (request_queue_t *) data; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock, flags); - __generic_unplug_device(q); - spin_unlock_irqrestore(&io_request_lock, flags); -} - -/** blk_grow_request_list - * @q: The &request_queue_t - * @nr_requests: how many requests are desired - * - * More free requests are added to the queue's free lists, bringing - * the total number of requests to @nr_requests. - * - * The requests are added equally to the request queue's read - * and write freelists. - * - * This function can sleep. - * - * Returns the (new) number of requests which the queue has available. - */ -int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors) -{ - unsigned long flags; - /* Several broken drivers assume that this function doesn't sleep, - * this causes system hangs during boot. - * As a temporary fix, make the function non-blocking. - */ - spin_lock_irqsave(&io_request_lock, flags); - while (q->nr_requests < nr_requests) { - struct request *rq; - - rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC); - if (rq == NULL) - break; - memset(rq, 0, sizeof(*rq)); - rq->rq_status = RQ_INACTIVE; - list_add(&rq->queue, &q->rq.free); - q->rq.count++; - - q->nr_requests++; - } - - /* - * Wakeup waiters after both one quarter of the - * max-in-fligh queue and one quarter of the requests - * are available again. - */ - - q->batch_requests = q->nr_requests / 4; - if (q->batch_requests > 32) - q->batch_requests = 32; - q->batch_sectors = max_queue_sectors / 4; - - q->max_queue_sectors = max_queue_sectors; - - BUG_ON(!q->batch_sectors); - atomic_set(&q->nr_sectors, 0); - - spin_unlock_irqrestore(&io_request_lock, flags); - return q->nr_requests; -} - -static void blk_init_free_list(request_queue_t *q) -{ - struct sysinfo si; - int megs; /* Total memory, in megabytes */ - int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS; - - INIT_LIST_HEAD(&q->rq.free); - q->rq.count = 0; - q->rq.pending[READ] = q->rq.pending[WRITE] = 0; - q->nr_requests = 0; - - si_meminfo(&si); - megs = si.totalram >> (20 - PAGE_SHIFT); - nr_requests = MAX_NR_REQUESTS; - if (megs < 30) { - nr_requests /= 2; - max_queue_sectors /= 2; - } - /* notice early if anybody screwed the defaults */ - BUG_ON(!nr_requests); - BUG_ON(!max_queue_sectors); - - blk_grow_request_list(q, nr_requests, max_queue_sectors); - - init_waitqueue_head(&q->wait_for_requests); - - spin_lock_init(&q->queue_lock); -} - -static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); - -/** - * blk_init_queue - prepare a request queue for use with a block device - * @q: The &request_queue_t to be initialised - * @rfn: The function to be called to process requests that have been - * placed on the queue. - * - * Description: - * If a block device wishes to use the standard request handling procedures, - * which sorts requests and coalesces adjacent requests, then it must - * call blk_init_queue(). The function @rfn will be called when there - * are requests on the queue that need to be processed. If the device - * supports plugging, then @rfn may not be called immediately when requests - * are available on the queue, but may be called at some time later instead. - * Plugged queues are generally unplugged when a buffer belonging to one - * of the requests on the queue is needed, or due to memory pressure. - * - * @rfn is not required, or even expected, to remove all requests off the - * queue, but only as many as it can handle at a time. If it does leave - * requests on the queue, it is responsible for arranging that the requests - * get dealt with eventually. - * - * A global spin lock $io_request_lock must be held while manipulating the - * requests on the request queue. - * - * The request on the head of the queue is by default assumed to be - * potentially active, and it is not considered for re-ordering or merging - * whenever the given queue is unplugged. This behaviour can be changed with - * blk_queue_headactive(). - * - * Note: - * blk_init_queue() must be paired with a blk_cleanup_queue() call - * when the block device is deactivated (such as at module unload). - **/ -void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) -{ - INIT_LIST_HEAD(&q->queue_head); - elevator_init(&q->elevator, ELEVATOR_LINUS); - blk_init_free_list(q); - q->request_fn = rfn; - q->back_merge_fn = ll_back_merge_fn; - q->front_merge_fn = ll_front_merge_fn; - q->merge_requests_fn = ll_merge_requests_fn; - q->make_request_fn = __make_request; - q->plug_tq.sync = 0; - q->plug_tq.routine = &generic_unplug_device; - q->plug_tq.data = q; - q->plugged = 0; - q->can_throttle = 0; - - /* - * These booleans describe the queue properties. We set the - * default (and most common) values here. Other drivers can - * use the appropriate functions to alter the queue properties. - * as appropriate. - */ - q->plug_device_fn = generic_plug_device; - q->head_active = 1; - - blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); -} - -#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); -/* - * Get a free request. io_request_lock must be held and interrupts - * disabled on the way in. Returns NULL if there are no free requests. - */ -static struct request *get_request(request_queue_t *q, int rw) -{ - struct request *rq = NULL; - struct request_list *rl = &q->rq; - - if (blk_oversized_queue(q)) { - int rlim = q->nr_requests >> 5; - - if (rlim < 4) - rlim = 4; - - /* - * if its a write, or we have more than a handful of reads - * pending, bail out - */ - if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim)) - return NULL; - if (blk_oversized_queue_reads(q)) - return NULL; - } - - if (!list_empty(&rl->free)) { - rq = blkdev_free_rq(&rl->free); - list_del(&rq->queue); - rl->count--; - rl->pending[rw]++; - rq->rq_status = RQ_ACTIVE; - rq->cmd = rw; - rq->special = NULL; - rq->q = q; - } - - return rq; -} - -/* - * Here's the request allocation design, low latency version: - * - * 1: Blocking on request exhaustion is a key part of I/O throttling. - * - * 2: We want to be `fair' to all requesters. We must avoid starvation, and - * attempt to ensure that all requesters sleep for a similar duration. Hence - * no stealing requests when there are other processes waiting. - * - * There used to be more here, attempting to allow a process to send in a - * number of requests once it has woken up. But, there's no way to - * tell if a process has just been woken up, or if it is a new process - * coming in to steal requests from the waiters. So, we give up and force - * everyone to wait fairly. - * - * So here's what we do: - * - * a) A READA requester fails if free_requests < batch_requests - * - * We don't want READA requests to prevent sleepers from ever - * waking. Note that READA is used extremely rarely - a few - * filesystems use it for directory readahead. - * - * When a process wants a new request: - * - * b) If free_requests == 0, the requester sleeps in FIFO manner, and - * the queue full condition is set. The full condition is not - * cleared until there are no longer any waiters. Once the full - * condition is set, all new io must wait, hopefully for a very - * short period of time. - * - * When a request is released: - * - * c) If free_requests < batch_requests, do nothing. - * - * d) If free_requests >= batch_requests, wake up a single waiter. - * - * As each waiter gets a request, he wakes another waiter. We do this - * to prevent a race where an unplug might get run before a request makes - * it's way onto the queue. The result is a cascade of wakeups, so delaying - * the initial wakeup until we've got batch_requests available helps avoid - * wakeups where there aren't any requests available yet. - */ - -static struct request *__get_request_wait(request_queue_t *q, int rw) -{ - register struct request *rq; - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue_exclusive(&q->wait_for_requests, &wait); - - do { - set_current_state(TASK_UNINTERRUPTIBLE); - spin_lock_irq(&io_request_lock); - if (blk_oversized_queue(q) || q->rq.count == 0) { - __generic_unplug_device(q); - spin_unlock_irq(&io_request_lock); - schedule(); - spin_lock_irq(&io_request_lock); - } - rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); - } while (rq == NULL); - remove_wait_queue(&q->wait_for_requests, &wait); - current->state = TASK_RUNNING; - - return rq; -} - -static void get_request_wait_wakeup(request_queue_t *q, int rw) -{ - /* - * avoid losing an unplug if a second __get_request_wait did the - * generic_unplug_device while our __get_request_wait was running - * w/o the queue_lock held and w/ our request out of the queue. - */ - if (waitqueue_active(&q->wait_for_requests)) - wake_up(&q->wait_for_requests); -} - -/* RO fail safe mechanism */ - -static long ro_bits[MAX_BLKDEV][8]; - -int is_read_only(kdev_t dev) -{ - int minor,major; - - major = MAJOR(dev); - minor = MINOR(dev); - if (major < 0 || major >= MAX_BLKDEV) return 0; - return ro_bits[major][minor >> 5] & (1 << (minor & 31)); -} - -void set_device_ro(kdev_t dev,int flag) -{ - int minor,major; - - major = MAJOR(dev); - minor = MINOR(dev); - if (major < 0 || major >= MAX_BLKDEV) return; - if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31); - else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); -} - -inline void drive_stat_acct (kdev_t dev, int rw, - unsigned long nr_sectors, int new_io) -{ - unsigned int major = MAJOR(dev); - unsigned int index; - - index = disk_index(dev); - if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR)) - return; - - kstat.dk_drive[major][index] += new_io; - if (rw == READ) { - kstat.dk_drive_rio[major][index] += new_io; - kstat.dk_drive_rblk[major][index] += nr_sectors; - } else if (rw == WRITE) { - kstat.dk_drive_wio[major][index] += new_io; - kstat.dk_drive_wblk[major][index] += nr_sectors; - } else - printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); -} - -#ifdef CONFIG_BLK_STATS -/* - * Return up to two hd_structs on which to do IO accounting for a given - * request. - * - * On a partitioned device, we want to account both against the partition - * and against the whole disk. - */ -static void locate_hd_struct(struct request *req, - struct hd_struct **hd1, - struct hd_struct **hd2) -{ - struct gendisk *gd; - - *hd1 = NULL; - *hd2 = NULL; - - gd = get_gendisk(req->rq_dev); - if (gd && gd->part) { - /* Mask out the partition bits: account for the entire disk */ - int devnr = MINOR(req->rq_dev) >> gd->minor_shift; - int whole_minor = devnr << gd->minor_shift; - - *hd1 = &gd->part[whole_minor]; - if (whole_minor != MINOR(req->rq_dev)) - *hd2= &gd->part[MINOR(req->rq_dev)]; - } -} - -/* - * Round off the performance stats on an hd_struct. - * - * The average IO queue length and utilisation statistics are maintained - * by observing the current state of the queue length and the amount of - * time it has been in this state for. - * Normally, that accounting is done on IO completion, but that can result - * in more than a second's worth of IO being accounted for within any one - * second, leading to >100% utilisation. To deal with that, we do a - * round-off before returning the results when reading /proc/partitions, - * accounting immediately for all queue usage up to the current jiffies and - * restarting the counters again. - */ -void disk_round_stats(struct hd_struct *hd) -{ - unsigned long now = jiffies; - - hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change)); - hd->last_queue_change = now; - - if (hd->ios_in_flight) - hd->io_ticks += (now - hd->last_idle_time); - hd->last_idle_time = now; -} - -static inline void down_ios(struct hd_struct *hd) -{ - disk_round_stats(hd); - --hd->ios_in_flight; -} - -static inline void up_ios(struct hd_struct *hd) -{ - disk_round_stats(hd); - ++hd->ios_in_flight; -} - -static void account_io_start(struct hd_struct *hd, struct request *req, - int merge, int sectors) -{ - switch (req->cmd) { - case READ: - if (merge) - hd->rd_merges++; - hd->rd_sectors += sectors; - break; - case WRITE: - if (merge) - hd->wr_merges++; - hd->wr_sectors += sectors; - break; - } - if (!merge) - up_ios(hd); -} - -static void account_io_end(struct hd_struct *hd, struct request *req) -{ - unsigned long duration = jiffies - req->start_time; - switch (req->cmd) { - case READ: - hd->rd_ticks += duration; - hd->rd_ios++; - break; - case WRITE: - hd->wr_ticks += duration; - hd->wr_ios++; - break; - } - down_ios(hd); -} - -void req_new_io(struct request *req, int merge, int sectors) -{ - struct hd_struct *hd1, *hd2; - - locate_hd_struct(req, &hd1, &hd2); - if (hd1) - account_io_start(hd1, req, merge, sectors); - if (hd2) - account_io_start(hd2, req, merge, sectors); -} - -void req_merged_io(struct request *req) -{ - struct hd_struct *hd1, *hd2; - - locate_hd_struct(req, &hd1, &hd2); - if (hd1) - down_ios(hd1); - if (hd2) - down_ios(hd2); -} - -void req_finished_io(struct request *req) -{ - struct hd_struct *hd1, *hd2; - - locate_hd_struct(req, &hd1, &hd2); - if (hd1) - account_io_end(hd1, req); - if (hd2) - account_io_end(hd2, req); -} -EXPORT_SYMBOL(req_finished_io); -#endif /* CONFIG_BLK_STATS */ - -/* - * add-request adds a request to the linked list. - * io_request_lock is held and interrupts disabled, as we muck with the - * request queue list. - * - * By this point, req->cmd is always either READ/WRITE, never READA, - * which is important for drive_stat_acct() above. - */ -static inline void add_request(request_queue_t * q, struct request * req, - struct list_head *insert_here) -{ - drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); - - if (!q->plugged && q->head_active && insert_here == &q->queue_head) { - spin_unlock_irq(&io_request_lock); - BUG(); - } - - /* - * elevator indicated where it wants this request to be - * inserted at elevator_merge time - */ - list_add(&req->queue, insert_here); -} - -/* - * Must be called with io_request_lock held and interrupts disabled - */ -void blkdev_release_request(struct request *req) -{ - request_queue_t *q = req->q; - - req->rq_status = RQ_INACTIVE; - req->q = NULL; - - /* - * Request may not have originated from ll_rw_blk. if not, - * assume it has free buffers and check waiters - */ - if (q) { - struct request_list *rl = &q->rq; - int oversized_batch = 0; - - if (q->can_throttle) - oversized_batch = blk_oversized_queue_batch(q); - rl->count++; - /* - * paranoia check - */ - if (req->cmd == READ || req->cmd == WRITE) - rl->pending[req->cmd]--; - if (rl->pending[READ] > q->nr_requests) - printk("blk: reads: %u\n", rl->pending[READ]); - if (rl->pending[WRITE] > q->nr_requests) - printk("blk: writes: %u\n", rl->pending[WRITE]); - if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests) - printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests); - list_add(&req->queue, &rl->free); - if (rl->count >= q->batch_requests && !oversized_batch) { - smp_mb(); - if (waitqueue_active(&q->wait_for_requests)) - wake_up(&q->wait_for_requests); - } - } -} - -/* - * Has to be called with the request spinlock acquired - */ -static void attempt_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) -{ - struct request *next; - - next = blkdev_next_request(req); - if (req->sector + req->nr_sectors != next->sector) - return; - if (req->cmd != next->cmd - || req->rq_dev != next->rq_dev - || req->nr_sectors + next->nr_sectors > max_sectors - || next->waiting) - return; - /* - * If we are not allowed to merge these requests, then - * return. If we are allowed to merge, then the count - * will have been updated to the appropriate number, - * and we shouldn't do it here too. - */ - if (!q->merge_requests_fn(q, req, next, max_segments)) - return; - - q->elevator.elevator_merge_req_fn(req, next); - - /* At this point we have either done a back merge - * or front merge. We need the smaller start_time of - * the merged requests to be the current request - * for accounting purposes. - */ - if (time_after(req->start_time, next->start_time)) - req->start_time = next->start_time; - - req->bhtail->b_reqnext = next->bh; - req->bhtail = next->bhtail; - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - list_del(&next->queue); - - /* One last thing: we have removed a request, so we now have one - less expected IO to complete for accounting purposes. */ - req_merged_io(req); - - blkdev_release_request(next); -} - -static inline void attempt_back_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) -{ - if (&req->queue == q->queue_head.prev) - return; - attempt_merge(q, req, max_sectors, max_segments); -} - -static inline void attempt_front_merge(request_queue_t * q, - struct list_head * head, - struct request *req, - int max_sectors, - int max_segments) -{ - struct list_head * prev; - - prev = req->queue.prev; - if (head == prev) - return; - attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); -} - -static int __make_request(request_queue_t * q, int rw, - struct buffer_head * bh) -{ - unsigned int sector, count, sync; - int max_segments = MAX_SEGMENTS; - struct request * req, *freereq = NULL; - int rw_ahead, max_sectors, el_ret; - struct list_head *head, *insert_here; - int latency; - elevator_t *elevator = &q->elevator; - int should_wake = 0; - - count = bh->b_size >> 9; - sector = bh->b_rsector; - sync = test_and_clear_bit(BH_Sync, &bh->b_state); - - rw_ahead = 0; /* normal case; gets changed below for READA */ - switch (rw) { - case READA: -#if 0 /* bread() misinterprets failed READA attempts as IO errors on SMP */ - rw_ahead = 1; -#endif - rw = READ; /* drop into READ */ - case READ: - case WRITE: - latency = elevator_request_latency(elevator, rw); - break; - default: - BUG(); - goto end_io; - } - - /* We'd better have a real physical mapping! - Check this bit only if the buffer was dirty and just locked - down by us so at this point flushpage will block and - won't clear the mapped bit under us. */ - if (!buffer_mapped(bh)) - BUG(); - - /* - * Temporary solution - in 2.5 this will be done by the lowlevel - * driver. Create a bounce buffer if the buffer data points into - * high memory - keep the original buffer otherwise. - */ - bh = blk_queue_bounce(q, rw, bh); - -/* look for a free request. */ - /* - * Try to coalesce the new request with old requests - */ - max_sectors = get_max_sectors(bh->b_rdev); - - req = NULL; - head = &q->queue_head; - /* - * Now we acquire the request spinlock, we have to be mega careful - * not to schedule or do something nonatomic - */ - spin_lock_irq(&io_request_lock); - -again: - insert_here = head->prev; - - if (list_empty(head)) { - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ - goto get_rq; - } else if (q->head_active && !q->plugged) - head = head->next; - - el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); - switch (el_ret) { - - case ELEVATOR_BACK_MERGE: - if (!q->back_merge_fn(q, req, bh, max_segments)) { - insert_here = &req->queue; - break; - } - req->bhtail->b_reqnext = bh; - req->bhtail = bh; - req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); - blk_started_sectors(req, count); - drive_stat_acct(req->rq_dev, req->cmd, count, 0); - req_new_io(req, 1, count); - attempt_back_merge(q, req, max_sectors, max_segments); - goto out; - - case ELEVATOR_FRONT_MERGE: - if (!q->front_merge_fn(q, req, bh, max_segments)) { - insert_here = req->queue.prev; - break; - } - bh->b_reqnext = req->bh; - req->bh = bh; - /* - * may not be valid, but queues not having bounce - * enabled for highmem pages must not look at - * ->buffer anyway - */ - req->buffer = bh->b_data; - req->current_nr_sectors = req->hard_cur_sectors = count; - req->sector = req->hard_sector = sector; - req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); - blk_started_sectors(req, count); - drive_stat_acct(req->rq_dev, req->cmd, count, 0); - req_new_io(req, 1, count); - attempt_front_merge(q, head, req, max_sectors, max_segments); - goto out; - - /* - * elevator says don't/can't merge. get new request - */ - case ELEVATOR_NO_MERGE: - /* - * use elevator hints as to where to insert the - * request. if no hints, just add it to the back - * of the queue - */ - if (req) - insert_here = &req->queue; - break; - - default: - printk("elevator returned crap (%d)\n", el_ret); - BUG(); - } - -get_rq: - if (freereq) { - req = freereq; - freereq = NULL; - } else { - /* - * See description above __get_request_wait() - */ - if (rw_ahead) { - if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) { - spin_unlock_irq(&io_request_lock); - goto end_io; - } - req = get_request(q, rw); - if (req == NULL) - BUG(); - } else { - req = get_request(q, rw); - if (req == NULL) { - spin_unlock_irq(&io_request_lock); - freereq = __get_request_wait(q, rw); - head = &q->queue_head; - spin_lock_irq(&io_request_lock); - should_wake = 1; - goto again; - } - } - } - -/* fill up the request-info, and add it to the queue */ - req->elevator_sequence = latency; - req->cmd = rw; - req->errors = 0; - req->hard_sector = req->sector = sector; - req->hard_nr_sectors = req->nr_sectors = count; - req->current_nr_sectors = req->hard_cur_sectors = count; - req->nr_segments = 1; /* Always 1 for a new request. */ - req->nr_hw_segments = 1; /* Always 1 for a new request. */ - req->buffer = bh->b_data; - req->waiting = NULL; - req->bh = bh; - req->bhtail = bh; - req->rq_dev = bh->b_rdev; - req->start_time = jiffies; - req_new_io(req, 0, count); - blk_started_io(count); - blk_started_sectors(req, count); - add_request(q, req, insert_here); -out: - if (freereq) - blkdev_release_request(freereq); - if (should_wake) - get_request_wait_wakeup(q, rw); - if (sync) - __generic_unplug_device(q); - spin_unlock_irq(&io_request_lock); - return 0; -end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); - return 0; -} - -/** - * generic_make_request: hand a buffer head to it's device driver for I/O - * @rw: READ, WRITE, or READA - what sort of I/O is desired. - * @bh: The buffer head describing the location in memory and on the device. - * - * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct buffer_head and a &rw value. The - * %READ and %WRITE options are (hopefully) obvious in meaning. The - * %READA value means that a read is required, but that the driver is - * free to fail the request if, for example, it cannot get needed - * resources immediately. - * - * generic_make_request() does not return any status. The - * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bh->b_end_io - * function described (one day) else where. - * - * The caller of generic_make_request must make sure that b_page, - * b_addr, b_size are set to describe the memory buffer, that b_rdev - * and b_rsector are set to describe the device address, and the - * b_end_io and optionally b_private are set to describe how - * completion notification should be signaled. BH_Mapped should also - * be set (to confirm that b_dev and b_blocknr are valid). - * - * generic_make_request and the drivers it calls may use b_reqnext, - * and may change b_rdev and b_rsector. So the values of these fields - * should NOT be depended on after the call to generic_make_request. - * Because of this, the caller should record the device address - * information in b_dev and b_blocknr. - * - * Apart from those fields mentioned above, no other fields, and in - * particular, no other flags, are changed by generic_make_request or - * any lower level drivers. - * */ -void generic_make_request (int rw, struct buffer_head * bh) -{ - int major = MAJOR(bh->b_rdev); - int minorsize = 0; - request_queue_t *q; - - if (!bh->b_end_io) - BUG(); - - /* Test device size, when known. */ - if (blk_size[major]) - minorsize = blk_size[major][MINOR(bh->b_rdev)]; - if (minorsize) { - unsigned long maxsector = (minorsize << 1) + 1; - unsigned long sector = bh->b_rsector; - unsigned int count = bh->b_size >> 9; - - if (maxsector < count || maxsector - count < sector) { - /* Yecch */ - bh->b_state &= ~(1 << BH_Dirty); - - /* This may well happen - the kernel calls bread() - without checking the size of the device, e.g., - when mounting a device. */ - printk(KERN_INFO - "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", - kdevname(bh->b_rdev), rw, - (sector + count)>>1, minorsize); - - bh->b_end_io(bh, 0); - return; - } - } - - /* - * Resolve the mapping until finished. (drivers are - * still free to implement/resolve their own stacking - * by explicitly returning 0) - */ - /* NOTE: we don't repeat the blk_size check for each new device. - * Stacking drivers are expected to know what they are doing. - */ - do { - q = __blk_get_queue(bh->b_rdev); - if (!q) { - printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%ld)\n", - kdevname(bh->b_rdev), bh->b_rsector); - buffer_IO_error(bh); - break; - } - } while (q->make_request_fn(q, rw, bh)); -} - - -/** - * submit_bh: submit a buffer_head to the block device later for I/O - * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) - * @bh: The &struct buffer_head which describes the I/O - * - * submit_bh() is very similar in purpose to generic_make_request(), and - * uses that function to do most of the work. - * - * The extra functionality provided by submit_bh is to determine - * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. - * This is is appropriate for IO requests that come from the buffer - * cache and page cache which (currently) always use aligned blocks. - */ -void submit_bh(int rw, struct buffer_head * bh) -{ - int count = bh->b_size >> 9; - - if (!test_bit(BH_Lock, &bh->b_state)) - BUG(); - - set_bit(BH_Req, &bh->b_state); - set_bit(BH_Launder, &bh->b_state); - - /* - * First step, 'identity mapping' - RAID or LVM might - * further remap this. - */ - bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * count; - - get_bh(bh); - generic_make_request(rw, bh); - - /* fix race condition with wait_on_buffer() */ - smp_mb(); /* spin_unlock may have inclusive semantics */ - if (waitqueue_active(&bh->b_wait)) - wake_up(&bh->b_wait); - - if (block_dump) - printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev)); - - put_bh(bh); - switch (rw) { - case WRITE: - kstat.pgpgout += count; - break; - default: - kstat.pgpgin += count; - break; - } -} - -/** - * ll_rw_block: low-level access to block devices - * @rw: whether to %READ or %WRITE or maybe %READA (readahead) - * @nr: number of &struct buffer_heads in the array - * @bhs: array of pointers to &struct buffer_head - * - * ll_rw_block() takes an array of pointers to &struct buffer_heads, - * and requests an I/O operation on them, either a %READ or a %WRITE. - * The third %READA option is described in the documentation for - * generic_make_request() which ll_rw_block() calls. - * - * This function provides extra functionality that is not in - * generic_make_request() that is relevant to buffers in the buffer - * cache or page cache. In particular it drops any buffer that it - * cannot get a lock on (with the BH_Lock state bit), any buffer that - * appears to be clean when doing a write request, and any buffer that - * appears to be up-to-date when doing read request. Further it marks - * as clean buffers that are processed for writing (the buffer cache - * wont assume that they are actually clean until the buffer gets - * unlocked). - * - * ll_rw_block sets b_end_io to simple completion handler that marks - * the buffer up-to-date (if approriate), unlocks the buffer and wakes - * any waiters. As client that needs a more interesting completion - * routine should call submit_bh() (or generic_make_request()) - * directly. - * - * Caveat: - * All of the buffers must be for the same device, and must also be - * of the current approved size for the device. */ - -void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) -{ - unsigned int major; - int correct_size; - int i; - - if (!nr) - return; - - major = MAJOR(bhs[0]->b_dev); - - /* Determine correct block size for this device. */ - correct_size = get_hardsect_size(bhs[0]->b_dev); - - /* Verify requested block sizes. */ - for (i = 0; i < nr; i++) { - struct buffer_head *bh = bhs[i]; - if (bh->b_size % correct_size) { - printk(KERN_NOTICE "ll_rw_block: device %s: " - "only %d-char blocks implemented (%u)\n", - kdevname(bhs[0]->b_dev), - correct_size, bh->b_size); - goto sorry; - } - } - - if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) { - printk(KERN_NOTICE "Can't write to read-only device %s\n", - kdevname(bhs[0]->b_dev)); - goto sorry; - } - - for (i = 0; i < nr; i++) { - struct buffer_head *bh = bhs[i]; - - lock_buffer(bh); - - /* We have the buffer lock */ - atomic_inc(&bh->b_count); - bh->b_end_io = end_buffer_io_sync; - - switch(rw) { - case WRITE: - if (!atomic_set_buffer_clean(bh)) - /* Hmmph! Nothing to write */ - goto end_io; - __mark_buffer_clean(bh); - break; - - case READA: - case READ: - if (buffer_uptodate(bh)) - /* Hmmph! Already have it */ - goto end_io; - break; - default: - BUG(); - end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); - continue; - } - - submit_bh(rw, bh); - } - return; - -sorry: - /* Make sure we don't get infinite dirty retries.. */ - for (i = 0; i < nr; i++) - mark_buffer_clean(bhs[i]); -} - -#ifdef CONFIG_STRAM_SWAP -extern int stram_device_init (void); -#endif - -static void blk_writeback_timer(unsigned long data) -{ - wakeup_bdflush(); - wakeup_kupdate(); -} - -/** - * end_that_request_first - end I/O on one buffer. - * @req: the request being processed - * @uptodate: 0 for I/O error - * @name: the name printed for an I/O error - * - * Description: - * Ends I/O on the first buffer attached to @req, and sets it up - * for the next buffer_head (if any) in the cluster. - * - * Return: - * 0 - we are done with this request, call end_that_request_last() - * 1 - still buffers pending for this request - * - * Caveat: - * Drivers implementing their own end_request handling must call - * blk_finished_io() appropriately. - **/ - -int end_that_request_first (struct request *req, int uptodate, char *name) -{ - struct buffer_head * bh; - int nsect; - - req->errors = 0; - if (!uptodate) - printk("end_request: I/O error, dev %s (%s), sector %lu\n", - kdevname(req->rq_dev), name, req->sector); - - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - blk_finished_sectors(req, nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector = req->hard_sector; - req->nr_sectors = req->hard_nr_sectors; - - req->current_nr_sectors = bh->b_size >> 9; - req->hard_cur_sectors = req->current_nr_sectors; - if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; - printk("end_request: buffer-list destroyed\n"); - } - req->buffer = bh->b_data; - return 1; - } - } - return 0; -} - -extern int laptop_mode; - -void end_that_request_last(struct request *req) -{ - struct completion *waiting = req->waiting; - - /* - * schedule the writeout of pending dirty data when the disk is idle - */ - if (laptop_mode && req->cmd == READ) - mod_timer(&writeback_timer, jiffies + 5 * HZ); - - req_finished_io(req); - blkdev_release_request(req); - if (waiting) - complete(waiting); -} - -int __init blk_dev_init(void) -{ - struct blk_dev_struct *dev; - - request_cachep = kmem_cache_create("blkdev_requests", - sizeof(struct request), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - - if (!request_cachep) - panic("Can't create request pool slab cache\n"); - - for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) - dev->queue = NULL; - - memset(ro_bits,0,sizeof(ro_bits)); - memset(max_readahead, 0, sizeof(max_readahead)); - memset(max_sectors, 0, sizeof(max_sectors)); - - blk_max_low_pfn = max_low_pfn - 1; - blk_max_pfn = max_pfn - 1; - - init_timer(&writeback_timer); - writeback_timer.function = blk_writeback_timer; - -#ifdef CONFIG_AMIGA_Z2RAM - z2_init(); -#endif -#ifdef CONFIG_STRAM_SWAP - stram_device_init(); -#endif -#ifdef CONFIG_ISP16_CDI - isp16_init(); -#endif -#ifdef CONFIG_BLK_DEV_PS2 - ps2esdi_init(); -#endif -#ifdef CONFIG_BLK_DEV_XD - xd_init(); -#endif -#ifdef CONFIG_BLK_DEV_MFM - mfm_init(); -#endif -#ifdef CONFIG_PARIDE - { extern void paride_init(void); paride_init(); }; -#endif -#ifdef CONFIG_MAC_FLOPPY - swim3_init(); -#endif -#ifdef CONFIG_BLK_DEV_SWIM_IOP - swimiop_init(); -#endif -#ifdef CONFIG_AMIGA_FLOPPY - amiga_floppy_init(); -#endif -#ifdef CONFIG_ATARI_FLOPPY - atari_floppy_init(); -#endif -#ifdef CONFIG_BLK_DEV_FD - floppy_init(); -#else -#if defined(__i386__) && !defined(CONFIG_XEN) /* Do we even need this? */ - outb_p(0xc, 0x3f2); -#endif -#endif -#ifdef CONFIG_CDU31A - cdu31a_init(); -#endif -#ifdef CONFIG_ATARI_ACSI - acsi_init(); -#endif -#ifdef CONFIG_MCD - mcd_init(); -#endif -#ifdef CONFIG_MCDX - mcdx_init(); -#endif -#ifdef CONFIG_SBPCD - sbpcd_init(); -#endif -#ifdef CONFIG_AZTCD - aztcd_init(); -#endif -#ifdef CONFIG_CDU535 - sony535_init(); -#endif -#ifdef CONFIG_GSCD - gscd_init(); -#endif -#ifdef CONFIG_CM206 - cm206_init(); -#endif -#ifdef CONFIG_OPTCD - optcd_init(); -#endif -#ifdef CONFIG_SJCD - sjcd_init(); -#endif -#ifdef CONFIG_APBLOCK - ap_init(); -#endif -#ifdef CONFIG_DDV - ddv_init(); -#endif -#ifdef CONFIG_MDISK - mdisk_init(); -#endif -#ifdef CONFIG_DASD - dasd_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) - tapeblock_init(); -#endif -#ifdef CONFIG_BLK_DEV_XPRAM - xpram_init(); -#endif - -#ifdef CONFIG_SUN_JSFLASH - jsfd_init(); -#endif - -#if defined(CONFIG_XEN_BLKDEV_FRONTEND) - xlblk_init(); -#endif - - return 0; -}; - -EXPORT_SYMBOL(io_request_lock); -EXPORT_SYMBOL(end_that_request_first); -EXPORT_SYMBOL(end_that_request_last); -EXPORT_SYMBOL(blk_grow_request_list); -EXPORT_SYMBOL(blk_init_queue); -EXPORT_SYMBOL(blk_get_queue); -EXPORT_SYMBOL(blk_cleanup_queue); -EXPORT_SYMBOL(blk_queue_headactive); -EXPORT_SYMBOL(blk_queue_throttle_sectors); -EXPORT_SYMBOL(blk_queue_make_request); -EXPORT_SYMBOL(generic_make_request); -EXPORT_SYMBOL(blkdev_release_request); -EXPORT_SYMBOL(generic_unplug_device); -EXPORT_SYMBOL(blk_queue_bounce_limit); -EXPORT_SYMBOL(blk_max_low_pfn); -EXPORT_SYMBOL(blk_max_pfn); -EXPORT_SYMBOL(blk_seg_merge_ok); -EXPORT_SYMBOL(blk_nohighio); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/drivers/char/Makefile --- a/linux-2.4-xen-sparse/drivers/char/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,361 +0,0 @@ -# -# Makefile for the kernel character device drivers. -# -# Note! Dependencies are done automagically by 'make dep', which also -# removes any old dependencies. DON'T put your own dependencies here -# unless it's something special (ie not a .c file). -# -# Note 2! The CFLAGS definitions are now inherited from the -# parent makes.. -# - -# -# This file contains the font map for the default (hardware) font -# -FONTMAPFILE = cp437.uni - -O_TARGET := char.o - -obj-y += mem.o tty_io.o n_tty.o tty_ioctl.o raw.o pty.o misc.o random.o - -# All of the (potential) objects that export symbols. -# This list comes from 'grep -l EXPORT_SYMBOL *.[hc]'. - -export-objs := busmouse.o console.o keyboard.o sysrq.o \ - misc.o pty.o random.o selection.o serial.o \ - sonypi.o tty_io.o tty_ioctl.o generic_serial.o \ - au1000_gpio.o vac-serial.o hp_psaux.o nvram.o \ - scx200.o fetchop.o - -mod-subdirs := joystick ftape drm drm-4.0 pcmcia - -list-multi := - -KEYMAP =defkeymap.o -KEYBD =pc_keyb.o -CONSOLE =console.o -SERIAL =serial.o - -ifeq ($(ARCH),xen) - ifneq ($(CONFIG_XEN_PHYSDEV_ACCESS),y) - KEYBD = - endif -endif - -ifeq ($(ARCH),s390) - KEYMAP = - KEYBD = - CONSOLE = - SERIAL = -endif - -ifeq ($(ARCH),mips) - ifneq ($(CONFIG_PC_KEYB),y) - KEYBD = - endif - ifeq ($(CONFIG_VR41XX_KIU),y) - KEYMAP = - KEYBD = vr41xx_keyb.o - endif -endif - -ifeq ($(ARCH),s390x) - KEYMAP = - KEYBD = - CONSOLE = - SERIAL = -endif - -ifeq ($(ARCH),m68k) - ifdef CONFIG_AMIGA - KEYBD = amikeyb.o - else - ifndef CONFIG_MAC - KEYBD = - endif - endif - SERIAL = -endif - -ifeq ($(ARCH),parisc) - ifdef CONFIG_GSC_PS2 - KEYBD = hp_psaux.o hp_keyb.o - else - KEYBD = - endif - ifdef CONFIG_SERIAL_MUX - CONSOLE += mux.o - endif - ifdef CONFIG_PDC_CONSOLE - CONSOLE += pdc_console.o - endif -endif - -ifdef CONFIG_Q40 - KEYBD += q40_keyb.o - SERIAL = serial.o -endif - -ifdef CONFIG_APOLLO - KEYBD += dn_keyb.o -endif - -ifeq ($(ARCH),parisc) - ifdef CONFIG_GSC_PS2 - KEYBD = hp_psaux.o hp_keyb.o - else - KEYBD = - endif - ifdef CONFIG_PDC_CONSOLE - CONSOLE += pdc_console.o - endif -endif - -ifeq ($(ARCH),arm) - ifneq ($(CONFIG_PC_KEYMAP),y) - KEYMAP = - endif - ifneq ($(CONFIG_PC_KEYB),y) - KEYBD = - endif -endif - -ifeq ($(ARCH),sh) - KEYMAP = - KEYBD = - CONSOLE = - ifeq ($(CONFIG_SH_HP600),y) - KEYMAP = defkeymap.o - KEYBD = scan_keyb.o hp600_keyb.o - CONSOLE = console.o - endif - ifeq ($(CONFIG_SH_DMIDA),y) - # DMIDA does not connect the HD64465 PS/2 keyboard port - # but we allow for USB keyboards to be plugged in. - KEYMAP = defkeymap.o - KEYBD = # hd64465_keyb.o pc_keyb.o - CONSOLE = console.o - endif - ifeq ($(CONFIG_SH_EC3104),y) - KEYMAP = defkeymap.o - KEYBD = ec3104_keyb.o - CONSOLE = console.o - endif - ifeq ($(CONFIG_SH_DREAMCAST),y) - KEYMAP = defkeymap.o - KEYBD = - CONSOLE = console.o - endif -endif - -ifeq ($(CONFIG_DECSTATION),y) - KEYMAP = - KEYBD = -endif - -ifeq ($(CONFIG_BAGET_MIPS),y) - KEYBD = - SERIAL = vac-serial.o -endif - -ifeq ($(CONFIG_NINO),y) - SERIAL = -endif - -ifneq ($(CONFIG_SUN_SERIAL),) - SERIAL = -endif - -ifeq ($(CONFIG_QTRONIX_KEYBOARD),y) - KEYBD = qtronix.o - KEYMAP = qtronixmap.o -endif - -ifeq ($(CONFIG_DUMMY_KEYB),y) - KEYBD = dummy_keyb.o -endif - -obj-$(CONFIG_VT) += vt.o vc_screen.o consolemap.o consolemap_deftbl.o $(CONSOLE) selection.o -obj-$(CONFIG_SERIAL) += $(SERIAL) -obj-$(CONFIG_PARPORT_SERIAL) += parport_serial.o -obj-$(CONFIG_SERIAL_HCDP) += hcdp_serial.o -obj-$(CONFIG_SERIAL_21285) += serial_21285.o -obj-$(CONFIG_SERIAL_SA1100) += serial_sa1100.o -obj-$(CONFIG_SERIAL_AMBA) += serial_amba.o -obj-$(CONFIG_TS_AU1X00_ADS7846) += au1000_ts.o -obj-$(CONFIG_SERIAL_DEC) += decserial.o - -ifndef CONFIG_SUN_KEYBOARD - obj-$(CONFIG_VT) += keyboard.o $(KEYMAP) $(KEYBD) -else - obj-$(CONFIG_PCI) += keyboard.o $(KEYMAP) -endif - -obj-$(CONFIG_HIL) += hp_keyb.o -obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o -obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o -obj-$(CONFIG_ROCKETPORT) += rocket.o -obj-$(CONFIG_MOXA_SMARTIO) += mxser.o -obj-$(CONFIG_MOXA_INTELLIO) += moxa.o -obj-$(CONFIG_DIGI) += pcxx.o -obj-$(CONFIG_DIGIEPCA) += epca.o -obj-$(CONFIG_CYCLADES) += cyclades.o -obj-$(CONFIG_STALLION) += stallion.o -obj-$(CONFIG_ISTALLION) += istallion.o -obj-$(CONFIG_SIBYTE_SB1250_DUART) += sb1250_duart.o -obj-$(CONFIG_COMPUTONE) += ip2.o ip2main.o -obj-$(CONFIG_RISCOM8) += riscom8.o -obj-$(CONFIG_ISI) += isicom.o -obj-$(CONFIG_ESPSERIAL) += esp.o -obj-$(CONFIG_SYNCLINK) += synclink.o -obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o -obj-$(CONFIG_N_HDLC) += n_hdlc.o -obj-$(CONFIG_SPECIALIX) += specialix.o -obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o -obj-$(CONFIG_A2232) += ser_a2232.o generic_serial.o -obj-$(CONFIG_SX) += sx.o generic_serial.o -obj-$(CONFIG_RIO) += rio/rio.o generic_serial.o -obj-$(CONFIG_SH_SCI) += sh-sci.o generic_serial.o -obj-$(CONFIG_SERIAL167) += serial167.o -obj-$(CONFIG_MVME147_SCC) += generic_serial.o vme_scc.o -obj-$(CONFIG_MVME162_SCC) += generic_serial.o vme_scc.o -obj-$(CONFIG_BVME6000_SCC) += generic_serial.o vme_scc.o -obj-$(CONFIG_HVC_CONSOLE) += hvc_console.o -obj-$(CONFIG_SERIAL_TX3912) += generic_serial.o serial_tx3912.o -obj-$(CONFIG_TXX927_SERIAL) += serial_txx927.o -obj-$(CONFIG_SERIAL_TXX9) += generic_serial.o serial_txx9.o -obj-$(CONFIG_IP22_SERIAL) += sgiserial.o -obj-$(CONFIG_AU1X00_UART) += au1x00-serial.o -obj-$(CONFIG_SGI_L1_SERIAL) += sn_serial.o - -subdir-$(CONFIG_RIO) += rio -subdir-$(CONFIG_INPUT) += joystick - -obj-$(CONFIG_ATIXL_BUSMOUSE) += atixlmouse.o -obj-$(CONFIG_LOGIBUSMOUSE) += logibusmouse.o -obj-$(CONFIG_PRINTER) += lp.o -obj-$(CONFIG_TIPAR) += tipar.o -obj-$(CONFIG_OBMOUSE) += obmouse.o - -ifeq ($(CONFIG_INPUT),y) -obj-y += joystick/js.o -endif - -obj-$(CONFIG_FETCHOP) += fetchop.o -obj-$(CONFIG_BUSMOUSE) += busmouse.o -obj-$(CONFIG_DTLK) += dtlk.o -obj-$(CONFIG_R3964) += n_r3964.o -obj-$(CONFIG_APPLICOM) += applicom.o -obj-$(CONFIG_SONYPI) += sonypi.o -obj-$(CONFIG_MS_BUSMOUSE) += msbusmouse.o -obj-$(CONFIG_82C710_MOUSE) += qpmouse.o -obj-$(CONFIG_AMIGAMOUSE) += amigamouse.o -obj-$(CONFIG_ATARIMOUSE) += atarimouse.o -obj-$(CONFIG_ADBMOUSE) += adbmouse.o -obj-$(CONFIG_PC110_PAD) += pc110pad.o -obj-$(CONFIG_MK712_MOUSE) += mk712.o -obj-$(CONFIG_RTC) += rtc.o -obj-$(CONFIG_GEN_RTC) += genrtc.o -obj-$(CONFIG_EFI_RTC) += efirtc.o -obj-$(CONFIG_SGI_DS1286) += ds1286.o -obj-$(CONFIG_MIPS_RTC) += mips_rtc.o -obj-$(CONFIG_SGI_IP27_RTC) += ip27-rtc.o -ifeq ($(CONFIG_PPC),) - obj-$(CONFIG_NVRAM) += nvram.o -endif -obj-$(CONFIG_TOSHIBA) += toshiba.o -obj-$(CONFIG_I8K) += i8k.o -obj-$(CONFIG_DS1620) += ds1620.o -obj-$(CONFIG_DS1742) += ds1742.o -obj-$(CONFIG_INTEL_RNG) += i810_rng.o -obj-$(CONFIG_AMD_RNG) += amd768_rng.o -obj-$(CONFIG_HW_RANDOM) += hw_random.o -obj-$(CONFIG_AMD_PM768) += amd76x_pm.o -obj-$(CONFIG_BRIQ_PANEL) += briq_panel.o - -obj-$(CONFIG_ITE_GPIO) += ite_gpio.o -obj-$(CONFIG_AU1X00_GPIO) += au1000_gpio.o -obj-$(CONFIG_AU1X00_USB_TTY) += au1000_usbtty.o -obj-$(CONFIG_AU1X00_USB_RAW) += au1000_usbraw.o -obj-$(CONFIG_COBALT_LCD) += lcd.o - -obj-$(CONFIG_QIC02_TAPE) += tpqic02.o - -subdir-$(CONFIG_FTAPE) += ftape -subdir-$(CONFIG_DRM_OLD) += drm-4.0 -subdir-$(CONFIG_DRM_NEW) += drm -subdir-$(CONFIG_PCMCIA) += pcmcia -subdir-$(CONFIG_AGP) += agp - -ifeq ($(CONFIG_FTAPE),y) -obj-y += ftape/ftape.o -endif - -obj-$(CONFIG_H8) += h8.o -obj-$(CONFIG_PPDEV) += ppdev.o -obj-$(CONFIG_DZ) += dz.o -obj-$(CONFIG_NWBUTTON) += nwbutton.o -obj-$(CONFIG_NWFLASH) += nwflash.o -obj-$(CONFIG_SCx200) += scx200.o -obj-$(CONFIG_SCx200_GPIO) += scx200_gpio.o - -# Only one watchdog can succeed. We probe the hardware watchdog -# drivers first, then the softdog driver. This means if your hardware -# watchdog dies or is 'borrowed' for some reason the software watchdog -# still gives you some cover. - -obj-$(CONFIG_PCWATCHDOG) += pcwd.o -obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o -obj-$(CONFIG_ADVANTECH_WDT) += advantechwdt.o -obj-$(CONFIG_IB700_WDT) += ib700wdt.o -obj-$(CONFIG_MIXCOMWD) += mixcomwd.o -obj-$(CONFIG_60XX_WDT) += sbc60xxwdt.o -obj-$(CONFIG_W83877F_WDT) += w83877f_wdt.o -obj-$(CONFIG_SC520_WDT) += sc520_wdt.o -obj-$(CONFIG_WDT) += wdt.o -obj-$(CONFIG_WDTPCI) += wdt_pci.o -obj-$(CONFIG_21285_WATCHDOG) += wdt285.o -obj-$(CONFIG_977_WATCHDOG) += wdt977.o -obj-$(CONFIG_I810_TCO) += i810-tco.o -obj-$(CONFIG_MACHZ_WDT) += machzwd.o -obj-$(CONFIG_SH_WDT) += shwdt.o -obj-$(CONFIG_EUROTECH_WDT) += eurotechwdt.o -obj-$(CONFIG_ALIM7101_WDT) += alim7101_wdt.o -obj-$(CONFIG_ALIM1535_WDT) += alim1535d_wdt.o -obj-$(CONFIG_INDYDOG) += indydog.o -obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o -obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o -obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o -obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o -obj-$(CONFIG_INDYDOG) += indydog.o -obj-$(CONFIG_8xx_WDT) += mpc8xx_wdt.o - -subdir-$(CONFIG_MWAVE) += mwave -ifeq ($(CONFIG_MWAVE),y) - obj-y += mwave/mwave.o -endif - -subdir-$(CONFIG_IPMI_HANDLER) += ipmi -ifeq ($(CONFIG_IPMI_HANDLER),y) - obj-y += ipmi/ipmi.o -endif - -include $(TOPDIR)/Rules.make - -fastdep: - -conmakehash: conmakehash.c - $(HOSTCC) $(HOSTCFLAGS) -o conmakehash conmakehash.c - -consolemap_deftbl.c: $(FONTMAPFILE) conmakehash - ./conmakehash $(FONTMAPFILE) > consolemap_deftbl.c - -consolemap_deftbl.o: consolemap_deftbl.c $(TOPDIR)/include/linux/types.h - -.DELETE_ON_ERROR: - -defkeymap.c: defkeymap.map - set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@ - -qtronixmap.c: qtronixmap.map - set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/drivers/char/mem.c --- a/linux-2.4-xen-sparse/drivers/char/mem.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,812 +0,0 @@ -/* - * linux/drivers/char/mem.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Added devfs support. - * Jan-11-1998, C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx> - * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@xxxxxxx> - * - * MODIFIED FOR XEN by Keir Fraser, 10th July 2003. - * Linux running on Xen has strange semantics for /dev/mem and /dev/kmem!! - * 1. mmap will not work on /dev/kmem - * 2. mmap on /dev/mem interprets the 'file offset' as a machine address - * rather than a physical address. - * I don't believe anyone sane mmaps /dev/kmem, but /dev/mem is mmapped - * to get at memory-mapped I/O spaces (eg. the VESA X server does this). - * For this to work at all we need to expect machine addresses. - * Reading/writing of /dev/kmem expects kernel virtual addresses, as usual. - * Reading/writing of /dev/mem expects 'physical addresses' as usual -- this - * is because /dev/mem can only read/write existing kernel mappings, which - * will be normal RAM, and we should present pseudo-physical layout for all - * except I/O (which is the sticky case that mmap is hacked to deal with). - */ - -#include <linux/config.h> -#include <linux/mm.h> -#include <linux/miscdevice.h> -#include <linux/tpqic02.h> -#include <linux/ftape.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/mman.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/raw.h> -#include <linux/tty.h> -#include <linux/capability.h> -#include <linux/ptrace.h> - -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/pgalloc.h> - -#ifdef CONFIG_I2C -extern int i2c_init_all(void); -#endif -#ifdef CONFIG_FB -extern void fbmem_init(void); -#endif -#ifdef CONFIG_PROM_CONSOLE -extern void prom_con_init(void); -#endif -#ifdef CONFIG_MDA_CONSOLE -extern void mda_console_init(void); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR) -extern void tapechar_init(void); -#endif - -static ssize_t do_write_mem(struct file * file, void *p, unsigned long realp, - const char * buf, size_t count, loff_t *ppos) -{ - ssize_t written; - - written = 0; -#if defined(__sparc__) || defined(__mc68000__) - /* we don't have page 0 mapped on sparc and m68k.. */ - if (realp < PAGE_SIZE) { - unsigned long sz = PAGE_SIZE-realp; - if (sz > count) sz = count; - /* Hmm. Do something? */ - buf+=sz; - p+=sz; - count-=sz; - written+=sz; - } -#endif - if (copy_from_user(p, buf, count)) - return -EFAULT; - written += count; - *ppos = realp + written; - return written; -} - - -/* - * This funcion reads the *physical* memory. The f_pos points directly to the - * memory location. - */ -static ssize_t read_mem(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - unsigned long end_mem; - ssize_t read; - - end_mem = __pa(high_memory); - if (p >= end_mem) - return 0; - if (count > end_mem - p) - count = end_mem - p; - read = 0; -#if defined(__sparc__) || defined(__mc68000__) - /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE) { - unsigned long sz = PAGE_SIZE-p; - if (sz > count) - sz = count; - if (sz > 0) { - if (clear_user(buf, sz)) - return -EFAULT; - buf += sz; - p += sz; - count -= sz; - read += sz; - } - } -#endif - if (copy_to_user(buf, __va(p), count)) - return -EFAULT; - read += count; - *ppos = p + read; - return read; -} - -static ssize_t write_mem(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - unsigned long end_mem; - - end_mem = __pa(high_memory); - if (p >= end_mem) - return 0; - if (count > end_mem - p) - count = end_mem - p; - return do_write_mem(file, __va(p), p, buf, count, ppos); -} - -#ifndef pgprot_noncached - -/* - * This should probably be per-architecture in <asm/pgtable.h> - */ -static inline pgprot_t pgprot_noncached(pgprot_t _prot) -{ - unsigned long prot = pgprot_val(_prot); - -#if defined(__i386__) || defined(__x86_64__) - /* On PPro and successors, PCD alone doesn't always mean - uncached because of interactions with the MTRRs. PCD | PWT - means definitely uncached. */ - if (boot_cpu_data.x86 > 3) - prot |= _PAGE_PCD | _PAGE_PWT; -#elif defined(__powerpc__) - prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; -#elif defined(__mc68000__) -#ifdef SUN3_PAGE_NOCACHE - if (MMU_IS_SUN3) - prot |= SUN3_PAGE_NOCACHE; - else -#endif - if (MMU_IS_851 || MMU_IS_030) - prot |= _PAGE_NOCACHE030; - /* Use no-cache mode, serialized */ - else if (MMU_IS_040 || MMU_IS_060) - prot = (prot & _CACHEMASK040) | _PAGE_NOCACHE_S; -#endif - - return __pgprot(prot); -} - -#endif /* !pgprot_noncached */ - -/* - * Architectures vary in how they handle caching for addresses - * outside of main memory. - */ -static inline int noncached_address(unsigned long addr) -{ -#if defined(__i386__) - /* - * On the PPro and successors, the MTRRs are used to set - * memory types for physical addresses outside main memory, - * so blindly setting PCD or PWT on those pages is wrong. - * For Pentiums and earlier, the surround logic should disable - * caching for the high addresses through the KEN pin, but - * we maintain the tradition of paranoia in this code. - */ - return !( test_bit(X86_FEATURE_MTRR, &boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_K6_MTRR, &boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CYRIX_ARR, &boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) ) - && addr >= __pa(high_memory); -#else - return addr >= __pa(high_memory); -#endif -} - -#if !defined(CONFIG_XEN) -static int mmap_mem(struct file * file, struct vm_area_struct * vma) -{ - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - - /* - * Accessing memory above the top the kernel knows about or - * through a file pointer that was marked O_SYNC will be - * done non-cached. - */ - if (noncached_address(offset) || (file->f_flags & O_SYNC)) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - /* Don't try to swap out physical pages.. */ - vma->vm_flags |= VM_RESERVED; - - /* - * Don't dump addresses that are not real memory to a core file. - */ - if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC)) - vma->vm_flags |= VM_IO; - - if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - return 0; -} -#elif !defined(CONFIG_XEN_PRIVILEGED_GUEST) -static int mmap_mem(struct file * file, struct vm_area_struct * vma) -{ - return -ENXIO; -} -#else -static int mmap_mem(struct file * file, struct vm_area_struct * vma) -{ - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - - if (!(xen_start_info.flags & SIF_PRIVILEGED)) - return -ENXIO; - - /* DONTCOPY is essential for Xen as copy_page_range is broken. */ - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, - vma->vm_end-vma->vm_start, vma->vm_page_prot, - DOMID_IO)) - return -EAGAIN; - return 0; -} -#endif /* CONFIG_XEN */ - -/* - * This function reads the *virtual* memory as seen by the kernel. - */ -static ssize_t read_kmem(struct file *file, char *buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - ssize_t read = 0; - ssize_t virtr = 0; - char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ - - if (p < (unsigned long) high_memory) { - read = count; - if (count > (unsigned long) high_memory - p) - read = (unsigned long) high_memory - p; - -#if defined(__sparc__) || defined(__mc68000__) - /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE && read > 0) { - size_t tmp = PAGE_SIZE - p; - if (tmp > read) tmp = read; - if (clear_user(buf, tmp)) - return -EFAULT; - buf += tmp; - p += tmp; - read -= tmp; - count -= tmp; - } -#endif - if (copy_to_user(buf, (char *)p, read)) - return -EFAULT; - p += read; - buf += read; - count -= read; - } - - if (count > 0) { - kbuf = (char *)__get_free_page(GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - while (count > 0) { - int len = count; - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - len = vread(kbuf, (char *)p, len); - if (!len) - break; - if (copy_to_user(buf, kbuf, len)) { - free_page((unsigned long)kbuf); - return -EFAULT; - } - count -= len; - buf += len; - virtr += len; - p += len; - } - free_page((unsigned long)kbuf); - } - *ppos = p; - return virtr + read; -} - -extern long vwrite(char *buf, char *addr, unsigned long count); - -/* - * This function writes to the *virtual* memory as seen by the kernel. - */ -static ssize_t write_kmem(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - ssize_t wrote = 0; - ssize_t virtr = 0; - char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ - - if (p < (unsigned long) high_memory) { - wrote = count; - if (count > (unsigned long) high_memory - p) - wrote = (unsigned long) high_memory - p; - - wrote = do_write_mem(file, (void*)p, p, buf, wrote, ppos); - - p += wrote; - buf += wrote; - count -= wrote; - } - - if (count > 0) { - kbuf = (char *)__get_free_page(GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - while (count > 0) { - int len = count; - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - if (len && copy_from_user(kbuf, buf, len)) { - free_page((unsigned long)kbuf); - return -EFAULT; - } - len = vwrite(kbuf, (char *)p, len); - count -= len; - buf += len; - virtr += len; - p += len; - } - free_page((unsigned long)kbuf); - } - - *ppos = p; - return virtr + wrote; -} - -#if defined(CONFIG_ISA) || !defined(__mc68000__) -static ssize_t read_port(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - unsigned long i = *ppos; - char *tmp = buf; - - if (verify_area(VERIFY_WRITE,buf,count)) - return -EFAULT; - while (count-- > 0 && i < 65536) { - if (__put_user(inb(i),tmp) < 0) - return -EFAULT; - i++; - tmp++; - } - *ppos = i; - return tmp-buf; -} - -static ssize_t write_port(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - unsigned long i = *ppos; - const char * tmp = buf; - - if (verify_area(VERIFY_READ,buf,count)) - return -EFAULT; - while (count-- > 0 && i < 65536) { - char c; - if (__get_user(c, tmp)) - return -EFAULT; - outb(c,i); - i++; - tmp++; - } - *ppos = i; - return tmp-buf; -} -#endif - -static ssize_t read_null(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - return 0; -} - -static ssize_t write_null(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - return count; -} - -/* - * For fun, we are using the MMU for this. - */ -static inline size_t read_zero_pagealigned(char * buf, size_t size) -{ - struct mm_struct *mm; - struct vm_area_struct * vma; - unsigned long addr=(unsigned long)buf; - - mm = current->mm; - /* Oops, this was forgotten before. -ben */ - down_read(&mm->mmap_sem); - - /* For private mappings, just map in zero pages. */ - for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { - unsigned long count; - - if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0) - goto out_up; - if (vma->vm_flags & VM_SHARED) - break; - count = vma->vm_end - addr; - if (count > size) - count = size; - - zap_page_range(mm, addr, count); - zeromap_page_range(addr, count, PAGE_COPY); - - size -= count; - buf += count; - addr += count; - if (size == 0) - goto out_up; - } - - up_read(&mm->mmap_sem); - - /* The shared case is hard. Let's do the conventional zeroing. */ - do { - unsigned long unwritten = clear_user(buf, PAGE_SIZE); - if (unwritten) - return size + unwritten - PAGE_SIZE; - if (current->need_resched) - schedule(); - buf += PAGE_SIZE; - size -= PAGE_SIZE; - } while (size); - - return size; -out_up: - up_read(&mm->mmap_sem); - return size; -} - -static ssize_t read_zero(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - unsigned long left, unwritten, written = 0; - - if (!count) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - - left = count; - - /* do we want to be clever? Arbitrary cut-off */ - if (count >= PAGE_SIZE*4) { - unsigned long partial; - - /* How much left of the page? */ - partial = (PAGE_SIZE-1) & -(unsigned long) buf; - unwritten = clear_user(buf, partial); - written = partial - unwritten; - if (unwritten) - goto out; - left -= partial; - buf += partial; - unwritten = read_zero_pagealigned(buf, left & PAGE_MASK); - written += (left & PAGE_MASK) - unwritten; - if (unwritten) - goto out; - buf += left & PAGE_MASK; - left &= ~PAGE_MASK; - } - unwritten = clear_user(buf, left); - written += left - unwritten; -out: - return written ? written : -EFAULT; -} - -static int mmap_zero(struct file * file, struct vm_area_struct * vma) -{ - if (vma->vm_flags & VM_SHARED) - return shmem_zero_setup(vma); - if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return -EAGAIN; - return 0; -} - -static ssize_t write_full(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - return -ENOSPC; -} - -/* - * Special lseek() function for /dev/null and /dev/zero. Most notably, you - * can fopen() both devices with "a" now. This was previously impossible. - * -- SRB. - */ - -static loff_t null_lseek(struct file * file, loff_t offset, int orig) -{ - return file->f_pos = 0; -} - -/* - * The memory devices use the full 32/64 bits of the offset, and so we cannot - * check against negative addresses: they are ok. The return value is weird, - * though, in that case (0). - * - * also note that seeking relative to the "end of file" isn't supported: - * it has no meaning, so it returns -EINVAL. - */ -static loff_t memory_lseek(struct file * file, loff_t offset, int orig) -{ - loff_t ret; - - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - force_successful_syscall_return(); - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - force_successful_syscall_return(); - break; - default: - ret = -EINVAL; - } - return ret; -} - -static int open_port(struct inode * inode, struct file * filp) -{ - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write) -{ - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long kaddr; - pgd_t *pgd; - pmd_t *pmd; - pte_t *ptep, pte; - struct page *page = NULL; - - /* address is user VA; convert to kernel VA of desired page */ - kaddr = (address - vma->vm_start) + offset; - kaddr = VMALLOC_VMADDR(kaddr); - - spin_lock(&init_mm.page_table_lock); - - /* Lookup page structure for kernel VA */ - pgd = pgd_offset(&init_mm, kaddr); - if (pgd_none(*pgd) || pgd_bad(*pgd)) - goto out; - pmd = pmd_offset(pgd, kaddr); - if (pmd_none(*pmd) || pmd_bad(*pmd)) - goto out; - ptep = pte_offset(pmd, kaddr); - if (!ptep) - goto out; - pte = *ptep; - if (!pte_present(pte)) - goto out; - if (write && !pte_write(pte)) - goto out; - page = pte_page(pte); - if (!VALID_PAGE(page)) { - page = NULL; - goto out; - } - - /* Increment reference count on page */ - get_page(page); - -out: - spin_unlock(&init_mm.page_table_lock); - - return page; -} - -struct vm_operations_struct kmem_vm_ops = { - nopage: kmem_vm_nopage, -}; - -static int mmap_kmem(struct file * file, struct vm_area_struct * vma) -{ - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long size = vma->vm_end - vma->vm_start; - - /* - * If the user is not attempting to mmap a high memory address then - * the standard mmap_mem mechanism will work. High memory addresses - * need special handling, as remap_page_range expects a physically- - * contiguous range of kernel addresses (such as obtained in kmalloc). - */ - if ((offset + size) < (unsigned long) high_memory) - return mmap_mem(file, vma); - - /* - * Accessing memory above the top the kernel knows about or - * through a file pointer that was marked O_SYNC will be - * done non-cached. - */ - if (noncached_address(offset) || (file->f_flags & O_SYNC)) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - /* Don't do anything here; "nopage" will fill the holes */ - vma->vm_ops = &kmem_vm_ops; - - /* Don't try to swap out physical pages.. */ - vma->vm_flags |= VM_RESERVED; - - /* - * Don't dump addresses that are not real memory to a core file. - */ - vma->vm_flags |= VM_IO; - - return 0; -} - -#define zero_lseek null_lseek -#define full_lseek null_lseek -#define write_zero write_null -#define read_full read_zero -#define open_mem open_port -#define open_kmem open_mem - -static struct file_operations mem_fops = { - llseek: memory_lseek, - read: read_mem, - write: write_mem, - mmap: mmap_mem, - open: open_mem, -}; - -static struct file_operations kmem_fops = { - llseek: memory_lseek, - read: read_kmem, - write: write_kmem, -#if !defined(CONFIG_XEN) - mmap: mmap_kmem, -#endif - open: open_kmem, -}; - -static struct file_operations null_fops = { - llseek: null_lseek, - read: read_null, - write: write_null, -}; - -#if defined(CONFIG_ISA) || !defined(__mc68000__) -static struct file_operations port_fops = { - llseek: memory_lseek, - read: read_port, - write: write_port, - open: open_port, -}; -#endif - -static struct file_operations zero_fops = { - llseek: zero_lseek, - read: read_zero, - write: write_zero, - mmap: mmap_zero, -}; - -static struct file_operations full_fops = { - llseek: full_lseek, - read: read_full, - write: write_full, -}; - -static int memory_open(struct inode * inode, struct file * filp) -{ - switch (MINOR(inode->i_rdev)) { - case 1: - filp->f_op = &mem_fops; - break; - case 2: - filp->f_op = &kmem_fops; - break; - case 3: - filp->f_op = &null_fops; - break; -#if defined(CONFIG_ISA) || !defined(__mc68000__) - case 4: - filp->f_op = &port_fops; - break; -#endif - case 5: - filp->f_op = &zero_fops; - break; - case 7: - filp->f_op = &full_fops; - break; - case 8: - filp->f_op = &random_fops; - break; - case 9: - filp->f_op = &urandom_fops; - break; - default: - return -ENXIO; - } - if (filp->f_op && filp->f_op->open) - return filp->f_op->open(inode,filp); - return 0; -} - -void __init memory_devfs_register (void) -{ - /* These are never unregistered */ - static const struct { - unsigned short minor; - char *name; - umode_t mode; - struct file_operations *fops; - } list[] = { /* list of minor devices */ - {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops}, - {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops}, - {3, "null", S_IRUGO | S_IWUGO, &null_fops}, -#if defined(CONFIG_ISA) || !defined(__mc68000__) - {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops}, -#endif - {5, "zero", S_IRUGO | S_IWUGO, &zero_fops}, - {7, "full", S_IRUGO | S_IWUGO, &full_fops}, - {8, "random", S_IRUGO | S_IWUSR, &random_fops}, - {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops} - }; - int i; - - for (i=0; i<(sizeof(list)/sizeof(*list)); i++) - devfs_register (NULL, list[i].name, DEVFS_FL_NONE, - MEM_MAJOR, list[i].minor, - list[i].mode | S_IFCHR, - list[i].fops, NULL); -} - -static struct file_operations memory_fops = { - open: memory_open, /* just a selector for the real open */ -}; - -int __init chr_dev_init(void) -{ - if (devfs_register_chrdev(MEM_MAJOR,"mem",&memory_fops)) - printk("unable to get major %d for memory devs\n", MEM_MAJOR); - memory_devfs_register(); - rand_initialize(); -#ifdef CONFIG_I2C - i2c_init_all(); -#endif -#if defined (CONFIG_FB) - fbmem_init(); -#endif -#if defined (CONFIG_PROM_CONSOLE) - prom_con_init(); -#endif -#if defined (CONFIG_MDA_CONSOLE) - mda_console_init(); -#endif - tty_init(); -#ifdef CONFIG_M68K_PRINTER - lp_m68k_init(); -#endif - misc_init(); -#if CONFIG_QIC02_TAPE - qic02_tape_init(); -#endif -#ifdef CONFIG_FTAPE - ftape_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR) - tapechar_init(); -#endif - return 0; -} - -__initcall(chr_dev_init); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/drivers/char/tty_io.c --- a/linux-2.4-xen-sparse/drivers/char/tty_io.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,2891 +0,0 @@ -/* - * linux/drivers/char/tty_io.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles - * or rs-channels. It also implements echoing, cooked mode etc. - * - * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0. - * - * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the - * tty_struct and tty_queue structures. Previously there was an array - * of 256 tty_struct's which was statically allocated, and the - * tty_queue structures were allocated at boot time. Both are now - * dynamically allocated only when the tty is open. - * - * Also restructured routines so that there is more of a separation - * between the high-level tty routines (tty_io.c and tty_ioctl.c) and - * the low-level tty routines (serial.c, pty.c, console.c). This - * makes for cleaner and more compact code. -TYT, 9/17/92 - * - * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines - * which can be dynamically activated and de-activated by the line - * discipline handling modules (like SLIP). - * - * NOTE: pay no attention to the line discipline code (yet); its - * interface is still subject to change in this version... - * -- TYT, 1/31/92 - * - * Added functionality to the OPOST tty handling. No delays, but all - * other bits should be there. - * -- Nick Holloway <alfie@xxxxxxxxxxxxxxxxx>, 27th May 1993. - * - * Rewrote canonical mode and added more termios flags. - * -- julian@xxxxxxxxxxxxxxxxxxxxxx (J. Cowley), 13Jan94 - * - * Reorganized FASYNC support so mouse code can share it. - * -- ctm@xxxxxxxx, 9Sep95 - * - * New TIOCLINUX variants added. - * -- mj@xxxxxxxxxxxxxxxxx, 19-Nov-95 - * - * Restrict vt switching via ioctl() - * -- grif@xxxxxxxxxx, 5-Dec-95 - * - * Move console and virtual terminal code to more appropriate files, - * implement CONFIG_VT and generalize console device interface. - * -- Marko Kohtala <Marko.Kohtala@xxxxxx>, March 97 - * - * Rewrote init_dev and release_dev to eliminate races. - * -- Bill Hawes <whawes@xxxxxxxx>, June 97 - * - * Added devfs support. - * -- C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, 13-Jan-1998 - * - * Added support for a Unix98-style ptmx device. - * -- C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, 14-Jan-1998 - * - * Reduced memory usage for older ARM systems - * -- Russell King <rmk@xxxxxxxxxxxxxxxx> - * - * Move do_SAK() into process context. Less stack use in devfs functions. - * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@xxxxxxxxxx> 17Mar01 - */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/major.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/fcntl.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/tty.h> -#include <linux/tty_driver.h> -#include <linux/tty_flip.h> -#include <linux/devpts_fs.h> -#include <linux/file.h> -#include <linux/console.h> -#include <linux/timer.h> -#include <linux/ctype.h> -#include <linux/kd.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/poll.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/smp_lock.h> - -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/bitops.h> - -#include <linux/kbd_kern.h> -#include <linux/vt_kern.h> -#include <linux/selection.h> -#include <linux/devfs_fs_kernel.h> - -#include <linux/kmod.h> - -#ifdef CONFIG_XEN_CONSOLE -extern void xen_console_init(void); -#endif - -#ifdef CONFIG_VT -extern void con_init_devfs (void); -#endif - -extern void disable_early_printk(void); - -#define CONSOLE_DEV MKDEV(TTY_MAJOR,0) -#define TTY_DEV MKDEV(TTYAUX_MAJOR,0) -#define SYSCONS_DEV MKDEV(TTYAUX_MAJOR,1) -#define PTMX_DEV MKDEV(TTYAUX_MAJOR,2) - -#undef TTY_DEBUG_HANGUP - -#define TTY_PARANOIA_CHECK 1 -#define CHECK_TTY_COUNT 1 - -struct termios tty_std_termios; /* for the benefit of tty drivers */ -struct tty_driver *tty_drivers; /* linked list of tty drivers */ - -#ifdef CONFIG_UNIX98_PTYS -extern struct tty_driver ptm_driver[]; /* Unix98 pty masters; for /dev/ptmx */ -extern struct tty_driver pts_driver[]; /* Unix98 pty slaves; for /dev/ptmx */ -#endif - -static void initialize_tty_struct(struct tty_struct *tty); - -static ssize_t tty_read(struct file *, char *, size_t, loff_t *); -static ssize_t tty_write(struct file *, const char *, size_t, loff_t *); -static unsigned int tty_poll(struct file *, poll_table *); -static int tty_open(struct inode *, struct file *); -static int tty_release(struct inode *, struct file *); -int tty_ioctl(struct inode * inode, struct file * file, - unsigned int cmd, unsigned long arg); -static int tty_fasync(int fd, struct file * filp, int on); -extern int vme_scc_init (void); -extern long vme_scc_console_init(void); -extern int serial167_init(void); -extern long serial167_console_init(void); -extern void console_8xx_init(void); -extern void au1x00_serial_console_init(void); -extern int rs_8xx_init(void); -extern void mac_scc_console_init(void); -extern void hwc_console_init(void); -extern void hwc_tty_init(void); -extern void con3215_init(void); -extern void tty3215_init(void); -extern void tub3270_con_init(void); -extern void tub3270_init(void); -extern void rs285_console_init(void); -extern void sa1100_rs_console_init(void); -extern void sgi_serial_console_init(void); -extern void sn_sal_serial_console_init(void); -extern void sci_console_init(void); -extern void dec_serial_console_init(void); -extern void tx3912_console_init(void); -extern void tx3912_rs_init(void); -extern void txx927_console_init(void); -extern void txx9_rs_init(void); -extern void txx9_serial_console_init(void); -extern void sb1250_serial_console_init(void); -extern void arc_console_init(void); -extern int hvc_console_init(void); - -#ifndef MIN -#define MIN(a,b) ((a) < (b) ? (a) : (b)) -#endif -#ifndef MAX -#define MAX(a,b) ((a) < (b) ? (b) : (a)) -#endif - -static struct tty_struct *alloc_tty_struct(void) -{ - struct tty_struct *tty; - - tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL); - if (tty) - memset(tty, 0, sizeof(struct tty_struct)); - return tty; -} - -static inline void free_tty_struct(struct tty_struct *tty) -{ - kfree(tty); -} - -/* - * This routine returns the name of tty. - */ -static char * -_tty_make_name(struct tty_struct *tty, const char *name, char *buf) -{ - int idx = (tty)?MINOR(tty->device) - tty->driver.minor_start:0; - - if (!tty) /* Hmm. NULL pointer. That's fun. */ - strcpy(buf, "NULL tty"); - else - sprintf(buf, name, - idx + tty->driver.name_base); - - return buf; -} - -#define TTY_NUMBER(tty) (MINOR((tty)->device) - (tty)->driver.minor_start + \ - (tty)->driver.name_base) - -char *tty_name(struct tty_struct *tty, char *buf) -{ - return _tty_make_name(tty, (tty)?tty->driver.name:NULL, buf); -} - -inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device, - const char *routine) -{ -#ifdef TTY_PARANOIA_CHECK - static const char badmagic[] = KERN_WARNING - "Warning: bad magic number for tty struct (%s) in %s\n"; - static const char badtty[] = KERN_WARNING - "Warning: null TTY for (%s) in %s\n"; - - if (!tty) { - printk(badtty, kdevname(device), routine); - return 1; - } - if (tty->magic != TTY_MAGIC) { - printk(badmagic, kdevname(device), routine); - return 1; - } -#endif - return 0; -} - -static int check_tty_count(struct tty_struct *tty, const char *routine) -{ -#ifdef CHECK_TTY_COUNT - struct list_head *p; - int count = 0; - - file_list_lock(); - for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) { - if(list_entry(p, struct file, f_list)->private_data == tty) - count++; - } - file_list_unlock(); - if (tty->driver.type == TTY_DRIVER_TYPE_PTY && - tty->driver.subtype == PTY_TYPE_SLAVE && - tty->link && tty->link->count) - count++; - if (tty->count != count) { - printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) " - "!= #fd's(%d) in %s\n", - kdevname(tty->device), tty->count, count, routine); - return count; - } -#endif - return 0; -} - -/* - * This is probably overkill for real world processors but - * they are not on hot paths so a little discipline won't do - * any harm. - */ - -static void tty_set_termios_ldisc(struct tty_struct *tty, int num) -{ - down(&tty->termios_sem); - tty->termios->c_line = num; - up(&tty->termios_sem); -} - -/* - * This guards the refcounted line discipline lists. The lock - * must be taken with irqs off because there are hangup path - * callers who will do ldisc lookups and cannot sleep. - */ - -spinlock_t tty_ldisc_lock = SPIN_LOCK_UNLOCKED; -DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait); -struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table */ - -int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc) -{ - - unsigned long flags; - int ret = 0; - - if (disc < N_TTY || disc >= NR_LDISCS) - return -EINVAL; - - spin_lock_irqsave(&tty_ldisc_lock, flags); - if (new_ldisc) { - tty_ldiscs[disc] = *new_ldisc; - tty_ldiscs[disc].num = disc; - tty_ldiscs[disc].flags |= LDISC_FLAG_DEFINED; - tty_ldiscs[disc].refcount = 0; - } else { - if(tty_ldiscs[disc].refcount) - ret = -EBUSY; - else - tty_ldiscs[disc].flags &= ~LDISC_FLAG_DEFINED; - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - - return ret; - -} - - -EXPORT_SYMBOL(tty_register_ldisc); - -struct tty_ldisc *tty_ldisc_get(int disc) -{ - unsigned long flags; - struct tty_ldisc *ld; - - if (disc < N_TTY || disc >= NR_LDISCS) - return NULL; - - spin_lock_irqsave(&tty_ldisc_lock, flags); - - ld = &tty_ldiscs[disc]; - /* Check the entry is defined */ - if(ld->flags & LDISC_FLAG_DEFINED) - ld->refcount++; - else - ld = NULL; - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - return ld; -} - -EXPORT_SYMBOL_GPL(tty_ldisc_get); - -void tty_ldisc_put(int disc) -{ - struct tty_ldisc *ld; - unsigned long flags; - - if (disc < N_TTY || disc >= NR_LDISCS) - BUG(); - - spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = &tty_ldiscs[disc]; - if(ld->refcount <= 0) - BUG(); - ld->refcount--; - spin_unlock_irqrestore(&tty_ldisc_lock, flags); -} - -EXPORT_SYMBOL_GPL(tty_ldisc_put); - -void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) -{ - tty->ldisc = *ld; - tty->ldisc.refcount = 0; -} - -/** - * tty_ldisc_try - internal helper - * @tty: the tty - * - * Make a single attempt to grab and bump the refcount on - * the tty ldisc. Return 0 on failure or 1 on success. This is - * used to implement both the waiting and non waiting versions - * of tty_ldisc_ref - */ - -static int tty_ldisc_try(struct tty_struct *tty) -{ - unsigned long flags; - struct tty_ldisc *ld; - int ret = 0; - - spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = &tty->ldisc; - if(test_bit(TTY_LDISC, &tty->flags)) - { - ld->refcount++; - ret = 1; - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - return ret; -} - -/** - * tty_ldisc_ref_wait - wait for the tty ldisc - * @tty: tty device - * - * Dereference the line discipline for the terminal and take a - * reference to it. If the line discipline is in flux then - * wait patiently until it changes. - * - * Note: Must not be called from an IRQ/timer context. The caller - * must also be careful not to hold other locks that will deadlock - * against a discipline change, such as an existing ldisc reference - * (which we check for) - */ - -struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) -{ - /* wait_event is a macro */ - wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); - return &tty->ldisc; -} - -EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); - -/** - * tty_ldisc_ref - get the tty ldisc - * @tty: tty device - * - * Dereference the line discipline for the terminal and take a - * reference to it. If the line discipline is in flux then - * return NULL. Can be called from IRQ and timer functions. - */ - -struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) -{ - if(tty_ldisc_try(tty)) - return &tty->ldisc; - return NULL; -} - -EXPORT_SYMBOL_GPL(tty_ldisc_ref); - - -void tty_ldisc_deref(struct tty_ldisc *ld) -{ - - unsigned long flags; - - if(ld == NULL) - BUG(); - - spin_lock_irqsave(&tty_ldisc_lock, flags); - if(ld->refcount == 0) - printk(KERN_EMERG "tty_ldisc_deref: no references.\n"); - else - ld->refcount--; - if(ld->refcount == 0) - wake_up(&tty_ldisc_wait); - spin_unlock_irqrestore(&tty_ldisc_lock, flags); -} - -EXPORT_SYMBOL_GPL(tty_ldisc_deref); - -/** - * tty_ldisc_enable - allow ldisc use - * @tty: terminal to activate ldisc on - * - * Set the TTY_LDISC flag when the line discipline can be called - * again. Do neccessary wakeups for existing sleepers. - * - * Note: nobody should set this bit except via this function. Clearing - * directly is allowed. - */ - -static void tty_ldisc_enable(struct tty_struct *tty) -{ - set_bit(TTY_LDISC, &tty->flags); - wake_up(&tty_ldisc_wait); -} - -/** - * tty_set_ldisc - set line discipline - * @tty: the terminal to set - * @ldisc: the line discipline - * - * Set the discipline of a tty line. Must be called from a process - * context. - */ - -static int tty_set_ldisc(struct tty_struct *tty, int ldisc) -{ - int retval = 0; - struct tty_ldisc o_ldisc; - char buf[64]; - unsigned long flags; - struct tty_ldisc *ld; - - if ((ldisc < N_TTY) || (ldisc >= NR_LDISCS)) - return -EINVAL; - -restart: - - if (tty->ldisc.num == ldisc) - return 0; /* We are already in the desired discipline */ - - ld = tty_ldisc_get(ldisc); - /* Eduardo Blanco <ejbs@xxxxxxxxxxxx> */ - /* Cyrus Durgin <cider@xxxxxxxxxxxxx> */ - if (ld == NULL) - { - char modname [20]; - sprintf(modname, "tty-ldisc-%d", ldisc); - request_module (modname); - ld = tty_ldisc_get(ldisc); - } - - if (ld == NULL) - return -EINVAL; - - - o_ldisc = tty->ldisc; - tty_wait_until_sent(tty, 0); - - /* - * Make sure we don't change while someone holds a - * reference to the line discipline. The TTY_LDISC bit - * prevents anyone taking a reference once it is clear. - * We need the lock to avoid racing reference takers. - */ - - spin_lock_irqsave(&tty_ldisc_lock, flags); - if(tty->ldisc.refcount) - { - /* Free the new ldisc we grabbed. Must drop the lock - first. */ - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(ldisc); - /* - * There are several reasons we may be busy, including - * random momentary I/O traffic. We must therefore - * retry. We could distinguish between blocking ops - * and retries if we made tty_ldisc_wait() smarter. That - * is up for discussion. - */ - if(wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0) - return -ERESTARTSYS; - goto restart; - } - clear_bit(TTY_LDISC, &tty->flags); - clear_bit(TTY_DONT_FLIP, &tty->flags); - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - - /* - * From this point on we know nobody has an ldisc - * usage reference, nor can they obtain one until - * we say so later on. - */ - - /* - * Wait for ->hangup_work and ->flip.work handlers to terminate - */ - run_task_queue(&tq_timer); - flush_scheduled_tasks(); - - /* Shutdown the current discipline. */ - if (tty->ldisc.close) - (tty->ldisc.close)(tty); - - /* Now set up the new line discipline. */ - tty_ldisc_assign(tty, ld); - tty_set_termios_ldisc(tty, ldisc); - if (tty->ldisc.open) - retval = (tty->ldisc.open)(tty); - if (retval < 0) { - tty_ldisc_put(ldisc); - /* There is an outstanding reference here so this is safe */ - tty_ldisc_assign(tty, tty_ldisc_get(o_ldisc.num)); - tty_set_termios_ldisc(tty, tty->ldisc.num); - if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) { - tty_ldisc_put(o_ldisc.num); - /* This driver is always present */ - tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); - tty_set_termios_ldisc(tty, N_TTY); - if (tty->ldisc.open) { - int r = tty->ldisc.open(tty); - - if (r < 0) - panic("Couldn't open N_TTY ldisc for " - "%s --- error %d.", - tty_name(tty, buf), r); - } - } - } - /* At this point we hold a reference to the new ldisc and a - reference to the old ldisc. If we ended up flipping back - to the existing ldisc we have two references to it */ - - if (tty->ldisc.num != o_ldisc.num && tty->driver.set_ldisc) - tty->driver.set_ldisc(tty); - - tty_ldisc_put(o_ldisc.num); - - /* - * Allow ldisc referencing to occur as soon as the driver - * ldisc callback completes. - */ - tty_ldisc_enable(tty); - - return retval; -} - -/* - * This routine returns a tty driver structure, given a device number - */ -struct tty_driver *get_tty_driver(kdev_t device) -{ - int major, minor; - struct tty_driver *p; - - minor = MINOR(device); - major = MAJOR(device); - - for (p = tty_drivers; p; p = p->next) { - if (p->major != major) - continue; - if (minor < p->minor_start) - continue; - if (minor >= p->minor_start + p->num) - continue; - return p; - } - return NULL; -} - -/* - * If we try to write to, or set the state of, a terminal and we're - * not in the foreground, send a SIGTTOU. If the signal is blocked or - * ignored, go ahead and perform the operation. (POSIX 7.2) - */ -int tty_check_change(struct tty_struct * tty) -{ - if (current->tty != tty) - return 0; - if (tty->pgrp <= 0) { - printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n"); - return 0; - } - if (current->pgrp == tty->pgrp) - return 0; - if (is_ignored(SIGTTOU)) - return 0; - if (is_orphaned_pgrp(current->pgrp)) - return -EIO; - (void) kill_pg(current->pgrp,SIGTTOU,1); - return -ERESTARTSYS; -} - -static ssize_t hung_up_tty_read(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - /* Can't seek (pread) on ttys. */ - if (ppos != &file->f_pos) - return -ESPIPE; - return 0; -} - -static ssize_t hung_up_tty_write(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - /* Can't seek (pwrite) on ttys. */ - if (ppos != &file->f_pos) - return -ESPIPE; - return -EIO; -} - -/* No kernel lock held - none needed ;) */ -static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait) -{ - return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM; -} - -static int hung_up_tty_ioctl(struct inode * inode, struct file * file, - unsigned int cmd, unsigned long arg) -{ - return cmd == TIOCSPGRP ? -ENOTTY : -EIO; -} - -static struct file_operations tty_fops = { - llseek: no_llseek, - read: tty_read, - write: tty_write, - poll: tty_poll, - ioctl: tty_ioctl, - open: tty_open, - release: tty_release, - fasync: tty_fasync, -}; - -static struct file_operations hung_up_tty_fops = { - llseek: no_llseek, - read: hung_up_tty_read, - write: hung_up_tty_write, - poll: hung_up_tty_poll, - ioctl: hung_up_tty_ioctl, - release: tty_release, -}; - -static spinlock_t redirect_lock = SPIN_LOCK_UNLOCKED; -static struct file *redirect; - -/** - * tty_wakeup - request more data - * @tty: terminal - * - * Internal and external helper for wakeups of tty. This function - * informs the line discipline if present that the driver is ready\ - * to receive more output data. - */ - -void tty_wakeup(struct tty_struct *tty) -{ - struct tty_ldisc *ld; - - if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) { - ld = tty_ldisc_ref(tty); - if(ld) { - if(ld->write_wakeup) - ld->write_wakeup(tty); - tty_ldisc_deref(ld); - } - } - wake_up_interruptible(&tty->write_wait); -} - -/* - * tty_wakeup/tty_ldisc_flush are actually _GPL exports but we can't do - * that in 2.4 for modutils compat reasons. - */ -EXPORT_SYMBOL(tty_wakeup); - - -void tty_ldisc_flush(struct tty_struct *tty) -{ - struct tty_ldisc *ld = tty_ldisc_ref(tty); - if(ld) { - if(ld->flush_buffer) - ld->flush_buffer(tty); - tty_ldisc_deref(ld); - } -} - - -/* - * tty_wakeup/tty_ldisc_flush are actually _GPL exports but we can't do - * that in 2.4 for modutils compat reasons. - */ -EXPORT_SYMBOL(tty_ldisc_flush); - -void do_tty_hangup(void *data) -{ - struct tty_struct *tty = (struct tty_struct *) data; - struct file * cons_filp = NULL; - struct file *f = NULL; - struct task_struct *p; - struct list_head *l; - struct tty_ldisc *ld; - int closecount = 0, n; - - if (!tty) - return; - - /* inuse_filps is protected by the single kernel lock */ - lock_kernel(); - - spin_lock(&redirect_lock); - if (redirect && redirect->private_data == tty) { - f = redirect; - redirect = NULL; - } - spin_unlock(&redirect_lock); - - check_tty_count(tty, "do_tty_hangup"); - file_list_lock(); - for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) { - struct file * filp = list_entry(l, struct file, f_list); - if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV || - filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) { - cons_filp = filp; - continue; - } - if (filp->f_op != &tty_fops) - continue; - closecount++; - tty_fasync(-1, filp, 0); /* can't block */ - filp->f_op = &hung_up_tty_fops; - } - file_list_unlock(); - - /* FIXME! What are the locking issues here? This may me overdoing things.. */ - ld = tty_ldisc_ref(tty); - if(ld != NULL) - { - if (ld->flush_buffer) - ld->flush_buffer(tty); - if (tty->driver.flush_buffer) - tty->driver.flush_buffer(tty); - if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && ld->write_wakeup) - ld->write_wakeup(tty); - if (ld->hangup) - ld->hangup(tty); - } - - /* FIXME: Once we trust the LDISC code better we can wait here for - ldisc completion and fix the driver call race */ - - wake_up_interruptible(&tty->write_wait); - wake_up_interruptible(&tty->read_wait); - - /* - * Shutdown the current line discipline, and reset it to - * N_TTY. - */ - - if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) - { - down(&tty->termios_sem); - *tty->termios = tty->driver.init_termios; - up(&tty->termios_sem); - } - - /* Defer ldisc switch */ - /* tty_deferred_ldisc_switch(N_TTY) - This should get done automatically when the port closes and - tty_release is called */ - - read_lock(&tasklist_lock); - for_each_task(p) { - if ((tty->session > 0) && (p->session == tty->session) && - p->leader) { - send_sig(SIGHUP,p,1); - send_sig(SIGCONT,p,1); - if (tty->pgrp > 0) - p->tty_old_pgrp = tty->pgrp; - } - if (p->tty == tty) - p->tty = NULL; - } - read_unlock(&tasklist_lock); - - tty->flags = 0; - tty->session = 0; - tty->pgrp = -1; - tty->ctrl_status = 0; - /* - * If one of the devices matches a console pointer, we - * cannot just call hangup() because that will cause - * tty->count and state->count to go out of sync. - * So we just call close() the right number of times. - */ - if (cons_filp) { - if (tty->driver.close) - for (n = 0; n < closecount; n++) - tty->driver.close(tty, cons_filp); - } else if (tty->driver.hangup) - (tty->driver.hangup)(tty); - - /* We don't want to have driver/ldisc interactions beyond - the ones we did here. The driver layer expects no - calls after ->hangup() from the ldisc side. However we - can't yet guarantee all that */ - - set_bit(TTY_HUPPED, &tty->flags); - if(ld) { - tty_ldisc_enable(tty); - tty_ldisc_deref(ld); - } - unlock_kernel(); - if (f) - fput(f); -} - -void tty_hangup(struct tty_struct * tty) -{ -#ifdef TTY_DEBUG_HANGUP - char buf[64]; - - printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf)); -#endif - schedule_task(&tty->tq_hangup); -} - -void tty_vhangup(struct tty_struct * tty) -{ -#ifdef TTY_DEBUG_HANGUP - char buf[64]; - - printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf)); -#endif - do_tty_hangup((void *) tty); -} - -int tty_hung_up_p(struct file * filp) -{ - return (filp->f_op == &hung_up_tty_fops); -} - -/* - * This function is typically called only by the session leader, when - * it wants to disassociate itself from its controlling tty. - * - * It performs the following functions: - * (1) Sends a SIGHUP and SIGCONT to the foreground process group - * (2) Clears the tty from being controlling the session - * (3) Clears the controlling tty for all processes in the - * session group. - * - * The argument on_exit is set to 1 if called when a process is - * exiting; it is 0 if called by the ioctl TIOCNOTTY. - */ -void disassociate_ctty(int on_exit) -{ - struct tty_struct *tty = current->tty; - struct task_struct *p; - int tty_pgrp = -1; - - if (tty) { - tty_pgrp = tty->pgrp; - if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY) - tty_vhangup(tty); - } else { - if (current->tty_old_pgrp) { - kill_pg(current->tty_old_pgrp, SIGHUP, on_exit); - kill_pg(current->tty_old_pgrp, SIGCONT, on_exit); - } - return; - } - if (tty_pgrp > 0) { - kill_pg(tty_pgrp, SIGHUP, on_exit); - if (!on_exit) - kill_pg(tty_pgrp, SIGCONT, on_exit); - } - - current->tty_old_pgrp = 0; - tty->session = 0; - tty->pgrp = -1; - - read_lock(&tasklist_lock); - for_each_task(p) - if (p->session == current->session) - p->tty = NULL; - read_unlock(&tasklist_lock); -} - -void stop_tty(struct tty_struct *tty) -{ - if (tty->stopped) - return; - tty->stopped = 1; - if (tty->link && tty->link->packet) { - tty->ctrl_status &= ~TIOCPKT_START; - tty->ctrl_status |= TIOCPKT_STOP; - wake_up_interruptible(&tty->link->read_wait); - } - if (tty->driver.stop) - (tty->driver.stop)(tty); -} - -void start_tty(struct tty_struct *tty) -{ - if (!tty->stopped || tty->flow_stopped) - return; - tty->stopped = 0; - if (tty->link && tty->link->packet) { - tty->ctrl_status &= ~TIOCPKT_STOP; - tty->ctrl_status |= TIOCPKT_START; - wake_up_interruptible(&tty->link->read_wait); - } - if (tty->driver.start) - (tty->driver.start)(tty); - /* If we have a running line discipline it may need kicking */ - tty_wakeup(tty); -} - -static ssize_t tty_read(struct file * file, char * buf, size_t count, - loff_t *ppos) -{ - int i; - struct tty_struct * tty; - struct inode *inode; - struct tty_ldisc *ld; - - /* Can't seek (pread) on ttys. */ - if (ppos != &file->f_pos) - return -ESPIPE; - - tty = (struct tty_struct *)file->private_data; - inode = file->f_dentry->d_inode; - if (tty_paranoia_check(tty, inode->i_rdev, "tty_read")) - return -EIO; - if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) - return -EIO; - - /* This check not only needs to be done before reading, but also - whenever read_chan() gets woken up after sleeping, so I've - moved it to there. This should only be done for the N_TTY - line discipline, anyway. Same goes for write_chan(). -- jlc. */ -#if 0 - if ((inode->i_rdev != CONSOLE_DEV) && /* don't stop on /dev/console */ - (tty->pgrp > 0) && - (current->tty == tty) && - (tty->pgrp != current->pgrp)) - if (is_ignored(SIGTTIN) || is_orphaned_pgrp(current->pgrp)) - return -EIO; - else { - (void) kill_pg(current->pgrp, SIGTTIN, 1); - return -ERESTARTSYS; - } -#endif - /* We want to wait for the line discipline to sort out in this - situation */ - ld = tty_ldisc_ref_wait(tty); - lock_kernel(); - if (ld->read) - i = (ld->read)(tty,file,buf,count); - else - i = -EIO; - tty_ldisc_deref(ld); - unlock_kernel(); - if (i > 0) - inode->i_atime = CURRENT_TIME; - return i; -} - -/* - * Split writes up in sane blocksizes to avoid - * denial-of-service type attacks - */ -static inline ssize_t do_tty_write( - ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t), - struct tty_struct *tty, - struct file *file, - const unsigned char *buf, - size_t count) -{ - ssize_t ret = 0, written = 0; - - if (file->f_flags & O_NONBLOCK) { - if (down_trylock(&tty->atomic_write)) - return -EAGAIN; - } - else { - if (down_interruptible(&tty->atomic_write)) - return -ERESTARTSYS; - } - if ( test_bit(TTY_NO_WRITE_SPLIT, &tty->flags) ) { - lock_kernel(); - written = write(tty, file, buf, count); - unlock_kernel(); - } else { - for (;;) { - unsigned long size = MAX(PAGE_SIZE*2,16384); - if (size > count) - size = count; - lock_kernel(); - ret = write(tty, file, buf, size); - unlock_kernel(); - if (ret <= 0) - break; - written += ret; - buf += ret; - count -= ret; - if (!count) - break; - ret = -ERESTARTSYS; - if (signal_pending(current)) - break; - if (current->need_resched) - schedule(); - } - } - if (written) { - file->f_dentry->d_inode->i_mtime = CURRENT_TIME; - ret = written; - } - up(&tty->atomic_write); - return ret; -} - - -static ssize_t tty_write(struct file * file, const char * buf, size_t count, - loff_t *ppos) -{ - int is_console; - struct tty_struct * tty; - struct inode *inode = file->f_dentry->d_inode; - ssize_t ret; - struct tty_ldisc *ld; - - /* Can't seek (pwrite) on ttys. */ - if (ppos != &file->f_pos) - return -ESPIPE; - - /* - * For now, we redirect writes from /dev/console as - * well as /dev/tty0. - */ - inode = file->f_dentry->d_inode; - is_console = (inode->i_rdev == SYSCONS_DEV || - inode->i_rdev == CONSOLE_DEV); - - if (is_console) { - struct file *p = NULL; - - spin_lock(&redirect_lock); - if (redirect) { - get_file(redirect); - p = redirect; - } - spin_unlock(&redirect_lock); - - if (p) { - ssize_t res = p->f_op->write(p, buf, count, &p->f_pos); - fput(p); - return res; - } - } - - tty = (struct tty_struct *)file->private_data; - if (tty_paranoia_check(tty, inode->i_rdev, "tty_write")) - return -EIO; - if (!tty || !tty->driver.write || (test_bit(TTY_IO_ERROR, &tty->flags))) - return -EIO; -#if 0 - if (!is_console && L_TOSTOP(tty) && (tty->pgrp > 0) && - (current->tty == tty) && (tty->pgrp != current->pgrp)) { - if (is_orphaned_pgrp(current->pgrp)) - return -EIO; - if (!is_ignored(SIGTTOU)) { - (void) kill_pg(current->pgrp, SIGTTOU, 1); - return -ERESTARTSYS; - } - } -#endif - - ld = tty_ldisc_ref_wait(tty); - if (!ld->write) - ret = -EIO; - else - ret = do_tty_write(ld->write, tty, file, - (const unsigned char __user *)buf, count); - tty_ldisc_deref(ld); - return ret; -} - -/* Semaphore to protect creating and releasing a tty. This is shared with - vt.c for deeply disgusting hack reasons */ -static DECLARE_MUTEX(tty_sem); - -static void down_tty_sem(int index) -{ - down(&tty_sem); -} - -static void up_tty_sem(int index) -{ - up(&tty_sem); -} - -static void release_mem(struct tty_struct *tty, int idx); - -/* - * WSH 06/09/97: Rewritten to remove races and properly clean up after a - * failed open. The new code protects the open with a semaphore, so it's - * really quite straightforward. The semaphore locking can probably be - * relaxed for the (most common) case of reopening a tty. - */ -static int init_dev(kdev_t device, struct tty_struct **ret_tty) -{ - struct tty_struct *tty, *o_tty; - struct termios *tp, **tp_loc, *o_tp, **o_tp_loc; - struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc; - struct tty_driver *driver; - int retval=0; - int idx; - - driver = get_tty_driver(device); - if (!driver) - return -ENODEV; - - idx = MINOR(device) - driver->minor_start; - - /* - * Check whether we need to acquire the tty semaphore to avoid - * race conditions. For now, play it safe. - */ - down_tty_sem(idx); - - /* check whether we're reopening an existing tty */ - tty = driver->table[idx]; - if (tty) goto fast_track; - - /* - * First time open is complex, especially for PTY devices. - * This code guarantees that either everything succeeds and the - * TTY is ready for operation, or else the table slots are vacated - * and the allocated memory released. (Except that the termios - * and locked termios may be retained.) - */ - - o_tty = NULL; - tp = o_tp = NULL; - ltp = o_ltp = NULL; - - tty = alloc_tty_struct(); - if(!tty) - goto fail_no_mem; - initialize_tty_struct(tty); - tty->device = device; - tty->driver = *driver; - - tp_loc = &driver->termios[idx]; - if (!*tp_loc) { - tp = (struct termios *) kmalloc(sizeof(struct termios), - GFP_KERNEL); - if (!tp) - goto free_mem_out; - *tp = driver->init_termios; - } - - ltp_loc = &driver->termios_locked[idx]; - if (!*ltp_loc) { - ltp = (struct termios *) kmalloc(sizeof(struct termios), - GFP_KERNEL); - if (!ltp) - goto free_mem_out; - memset(ltp, 0, sizeof(struct termios)); - } - - if (driver->type == TTY_DRIVER_TYPE_PTY) { - o_tty = alloc_tty_struct(); - if (!o_tty) - goto free_mem_out; - initialize_tty_struct(o_tty); - o_tty->device = (kdev_t) MKDEV(driver->other->major, - driver->other->minor_start + idx); - o_tty->driver = *driver->other; - - o_tp_loc = &driver->other->termios[idx]; - if (!*o_tp_loc) { - o_tp = (struct termios *) - kmalloc(sizeof(struct termios), GFP_KERNEL); - if (!o_tp) - goto free_mem_out; - *o_tp = driver->other->init_termios; - } - - o_ltp_loc = &driver->other->termios_locked[idx]; - if (!*o_ltp_loc) { - o_ltp = (struct termios *) - kmalloc(sizeof(struct termios), GFP_KERNEL); - if (!o_ltp) - goto free_mem_out; - memset(o_ltp, 0, sizeof(struct termios)); - } - - /* - * Everything allocated ... set up the o_tty structure. - */ - driver->other->table[idx] = o_tty; - if (!*o_tp_loc) - *o_tp_loc = o_tp; - if (!*o_ltp_loc) - *o_ltp_loc = o_ltp; - o_tty->termios = *o_tp_loc; - o_tty->termios_locked = *o_ltp_loc; - (*driver->other->refcount)++; - if (driver->subtype == PTY_TYPE_MASTER) - o_tty->count++; - - /* Establish the links in both directions */ - tty->link = o_tty; - o_tty->link = tty; - } - - /* - * All structures have been allocated, so now we install them. - * Failures after this point use release_mem to clean up, so - * there's no need to null out the local pointers. - */ - driver->table[idx] = tty; - - if (!*tp_loc) - *tp_loc = tp; - if (!*ltp_loc) - *ltp_loc = ltp; - tty->termios = *tp_loc; - tty->termios_locked = *ltp_loc; - (*driver->refcount)++; - tty->count++; - - /* - * Structures all installed ... call the ldisc open routines. - * If we fail here just call release_mem to clean up. No need - * to decrement the use counts, as release_mem doesn't care. - */ - if (tty->ldisc.open) { - retval = (tty->ldisc.open)(tty); - if (retval) - goto release_mem_out; - } - if (o_tty && o_tty->ldisc.open) { - retval = (o_tty->ldisc.open)(o_tty); - if (retval) { - if (tty->ldisc.close) - (tty->ldisc.close)(tty); - goto release_mem_out; - } - set_bit(TTY_LDISC, &o_tty->flags); - tty_ldisc_enable(o_tty); - } - tty_ldisc_enable(tty); - goto success; - - /* - * This fast open can be used if the tty is already open. - * No memory is allocated, and the only failures are from - * attempting to open a closing tty or attempting multiple - * opens on a pty master. - */ -fast_track: - if (test_bit(TTY_CLOSING, &tty->flags)) { - retval = -EIO; - goto end_init; - } - if (driver->type == TTY_DRIVER_TYPE_PTY && - driver->subtype == PTY_TYPE_MASTER) { - /* - * special case for PTY masters: only one open permitted, - * and the slave side open count is incremented as well. - */ - if (tty->count) { - retval = -EIO; - goto end_init; - } - tty->link->count++; - } - tty->count++; - tty->driver = *driver; /* N.B. why do this every time?? */ - /* FIXME */ - if(!test_bit(TTY_LDISC, &tty->flags)) - printk(KERN_ERR "init_dev but no ldisc\n"); -success: - *ret_tty = tty; - - /* All paths come through here to release the semaphore */ -end_init: - up_tty_sem(idx); - return retval; - - /* Release locally allocated memory ... nothing placed in slots */ -free_mem_out: - if (o_tp) - kfree(o_tp); - if (o_tty) - free_tty_struct(o_tty); - if (ltp) - kfree(ltp); - if (tp) - kfree(tp); - free_tty_struct(tty); - -fail_no_mem: - retval = -ENOMEM; - goto end_init; - - /* call the tty release_mem routine to clean out this slot */ -release_mem_out: - printk(KERN_INFO "init_dev: ldisc open failed, " - "clearing slot %d\n", idx); - release_mem(tty, idx); - goto end_init; -} - -/* - * Releases memory associated with a tty structure, and clears out the - * driver table slots. - */ -static void release_mem(struct tty_struct *tty, int idx) -{ - struct tty_struct *o_tty; - struct termios *tp; - - if ((o_tty = tty->link) != NULL) { - o_tty->driver.table[idx] = NULL; - if (o_tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) { - tp = o_tty->driver.termios[idx]; - o_tty->driver.termios[idx] = NULL; - kfree(tp); - } - o_tty->magic = 0; - (*o_tty->driver.refcount)--; - list_del_init(&o_tty->tty_files); - free_tty_struct(o_tty); - } - - tty->driver.table[idx] = NULL; - if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) { - tp = tty->driver.termios[idx]; - tty->driver.termios[idx] = NULL; - kfree(tp); - } - tty->magic = 0; - (*tty->driver.refcount)--; - list_del_init(&tty->tty_files); - free_tty_struct(tty); -} - -/* - * Even releasing the tty structures is a tricky business.. We have - * to be very careful that the structures are all released at the - * same time, as interrupts might otherwise get the wrong pointers. - * - * WSH 09/09/97: rewritten to avoid some nasty race conditions that could - * lead to double frees or releasing memory still in use. - */ -static void release_dev(struct file * filp) -{ - struct tty_struct *tty, *o_tty; - int pty_master, tty_closing, o_tty_closing, do_sleep; - int idx; - char buf[64]; - unsigned long flags; - - tty = (struct tty_struct *)filp->private_data; - if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "release_dev")) - return; - - check_tty_count(tty, "release_dev"); - - tty_fasync(-1, filp, 0); - - idx = MINOR(tty->device) - tty->driver.minor_start; - pty_master = (tty->driver.type == TTY_DRIVER_TYPE_PTY && - tty->driver.subtype == PTY_TYPE_MASTER); - o_tty = tty->link; - -#ifdef TTY_PARANOIA_CHECK - if (idx < 0 || idx >= tty->driver.num) { - printk(KERN_DEBUG "release_dev: bad idx when trying to " - "free (%s)\n", kdevname(tty->device)); - return; - } - if (tty != tty->driver.table[idx]) { - printk(KERN_DEBUG "release_dev: driver.table[%d] not tty " - "for (%s)\n", idx, kdevname(tty->device)); - return; - } - if (tty->termios != tty->driver.termios[idx]) { - printk(KERN_DEBUG "release_dev: driver.termios[%d] not termios " - "for (%s)\n", - idx, kdevname(tty->device)); - return; - } - if (tty->termios_locked != tty->driver.termios_locked[idx]) { - printk(KERN_DEBUG "release_dev: driver.termios_locked[%d] not " - "termios_locked for (%s)\n", - idx, kdevname(tty->device)); - return; - } -#endif - -#ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "release_dev of %s (tty count=%d)...", - tty_name(tty, buf), tty->count); -#endif - -#ifdef TTY_PARANOIA_CHECK - if (tty->driver.other) { - if (o_tty != tty->driver.other->table[idx]) { - printk(KERN_DEBUG "release_dev: other->table[%d] " - "not o_tty for (%s)\n", - idx, kdevname(tty->device)); - return; - } - if (o_tty->termios != tty->driver.other->termios[idx]) { - printk(KERN_DEBUG "release_dev: other->termios[%d] " - "not o_termios for (%s)\n", - idx, kdevname(tty->device)); - return; - } - if (o_tty->termios_locked != - tty->driver.other->termios_locked[idx]) { - printk(KERN_DEBUG "release_dev: other->termios_locked[" - "%d] not o_termios_locked for (%s)\n", - idx, kdevname(tty->device)); - return; - } - if (o_tty->link != tty) { - printk(KERN_DEBUG "release_dev: bad pty pointers\n"); - return; - } - } -#endif - - if (tty->driver.close) - tty->driver.close(tty, filp); - - /* - * Sanity check: if tty->count is going to zero, there shouldn't be - * any waiters on tty->read_wait or tty->write_wait. We test the - * wait queues and kick everyone out _before_ actually starting to - * close. This ensures that we won't block while releasing the tty - * structure. - * - * The test for the o_tty closing is necessary, since the master and - * slave sides may close in any order. If the slave side closes out - * first, its count will be one, since the master side holds an open. - * Thus this test wouldn't be triggered at the time the slave closes, - * so we do it now. - * - * Note that it's possible for the tty to be opened again while we're - * flushing out waiters. By recalculating the closing flags before - * each iteration we avoid any problems. - */ - while (1) { - tty_closing = tty->count <= 1; - o_tty_closing = o_tty && - (o_tty->count <= (pty_master ? 1 : 0)); - do_sleep = 0; - - if (tty_closing) { - if (waitqueue_active(&tty->read_wait)) { - wake_up(&tty->read_wait); - do_sleep++; - } - if (waitqueue_active(&tty->write_wait)) { - wake_up(&tty->write_wait); - do_sleep++; - } - } - if (o_tty_closing) { - if (waitqueue_active(&o_tty->read_wait)) { - wake_up(&o_tty->read_wait); - do_sleep++; - } - if (waitqueue_active(&o_tty->write_wait)) { - wake_up(&o_tty->write_wait); - do_sleep++; - } - } - if (!do_sleep) - break; - - printk(KERN_WARNING "release_dev: %s: read/write wait queue " - "active!\n", tty_name(tty, buf)); - schedule(); - } - - /* - * The closing flags are now consistent with the open counts on - * both sides, and we've completed the last operation that could - * block, so it's safe to proceed with closing. - */ - if (pty_master) { - if (--o_tty->count < 0) { - printk(KERN_WARNING "release_dev: bad pty slave count " - "(%d) for %s\n", - o_tty->count, tty_name(o_tty, buf)); - o_tty->count = 0; - } - } - if (--tty->count < 0) { - printk(KERN_WARNING "release_dev: bad tty->count (%d) for %s\n", - tty->count, tty_name(tty, buf)); - tty->count = 0; - } - - /* - * We've decremented tty->count, so we should zero out - * filp->private_data, to break the link between the tty and - * the file descriptor. Otherwise if filp_close() blocks before - * the file descriptor is removed from the inuse_filp - * list, check_tty_count() could observe a discrepancy and - * printk a warning message to the user. - */ - filp->private_data = 0; - - /* - * Perform some housekeeping before deciding whether to return. - * - * Set the TTY_CLOSING flag if this was the last open. In the - * case of a pty we may have to wait around for the other side - * to close, and TTY_CLOSING makes sure we can't be reopened. - */ - if(tty_closing) - set_bit(TTY_CLOSING, &tty->flags); - if(o_tty_closing) - set_bit(TTY_CLOSING, &o_tty->flags); - - /* - * If _either_ side is closing, make sure there aren't any - * processes that still think tty or o_tty is their controlling - * tty. - */ - if (tty_closing || o_tty_closing) { - struct task_struct *p; - - read_lock(&tasklist_lock); - for_each_task(p) { - if (p->tty == tty || (o_tty && p->tty == o_tty)) - p->tty = NULL; - } - read_unlock(&tasklist_lock); - } - - /* check whether both sides are closing ... */ - if (!tty_closing || (o_tty && !o_tty_closing)) - return; - -#ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "freeing tty structure..."); -#endif - - /* - * Prevent flush_to_ldisc() from rescheduling the work for later. Then - * kill any delayed work. As this is the final close it does not - * race with the set_ldisc code path. - */ - clear_bit(TTY_LDISC, &tty->flags); - clear_bit(TTY_DONT_FLIP, &tty->flags); - - /* - * Wait for ->hangup_work and ->flip.work handlers to terminate - */ - - run_task_queue(&tq_timer); - flush_scheduled_tasks(); - - /* - * Wait for any short term users (we know they are just driver - * side waiters as the file is closing so user count on the file - * side is zero. - */ - - spin_lock_irqsave(&tty_ldisc_lock, flags); - while(tty->ldisc.refcount) - { - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); - spin_lock_irqsave(&tty_ldisc_lock, flags); - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - - /* - * Shutdown the current line discipline, and reset it to N_TTY. - * N.B. why reset ldisc when we're releasing the memory?? - * FIXME: this MUST get fixed for the new reflocking - */ - if (tty->ldisc.close) - (tty->ldisc.close)(tty); - tty_ldisc_put(tty->ldisc.num); - - /* - * Switch the line discipline back - */ - tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); - tty_set_termios_ldisc(tty,N_TTY); - - if (o_tty) { - /* FIXME: could o_tty be in setldisc here ? */ - clear_bit(TTY_LDISC, &o_tty->flags); - if (o_tty->ldisc.close) - (o_tty->ldisc.close)(o_tty); - tty_ldisc_put(o_tty->ldisc.num); - tty_ldisc_assign(o_tty, tty_ldisc_get(N_TTY)); - tty_set_termios_ldisc(o_tty,N_TTY); - } - - /* - * The release_mem function takes care of the details of clearing - * the slots and preserving the termios structure. - */ - release_mem(tty, idx); -} - -/* - * tty_open and tty_release keep up the tty count that contains the - * number of opens done on a tty. We cannot use the inode-count, as - * different inodes might point to the same tty. - * - * Open-counting is needed for pty masters, as well as for keeping - * track of serial lines: DTR is dropped when the last close happens. - * (This is not done solely through tty->count, now. - Ted 1/27/92) - * - * The termios state of a pty is reset on first open so that - * settings don't persist across reuse. - */ -static int tty_open(struct inode * inode, struct file * filp) -{ - struct tty_struct *tty; - int noctty, retval; - kdev_t device; - unsigned short saved_flags; - char buf[64]; - - saved_flags = filp->f_flags; -retry_open: - noctty = filp->f_flags & O_NOCTTY; - device = inode->i_rdev; - if (device == TTY_DEV) { - if (!current->tty) - return -ENXIO; - device = current->tty->device; - filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */ - /* noctty = 1; */ - } -#ifdef CONFIG_VT - if (device == CONSOLE_DEV) { - extern int fg_console; - device = MKDEV(TTY_MAJOR, fg_console + 1); - noctty = 1; - } -#endif - if (device == SYSCONS_DEV) { - struct console *c = console_drivers; - while(c && !c->device) - c = c->next; - if (!c) - return -ENODEV; - device = c->device(c); - filp->f_flags |= O_NONBLOCK; /* Don't let /dev/console block */ - noctty = 1; - } - - if (device == PTMX_DEV) { -#ifdef CONFIG_UNIX98_PTYS - - /* find a free pty. */ - int major, minor; - struct tty_driver *driver; - - /* find a device that is not in use. */ - retval = -1; - for ( major = 0 ; major < UNIX98_NR_MAJORS ; major++ ) { - driver = &ptm_driver[major]; - for (minor = driver->minor_start ; - minor < driver->minor_start + driver->num ; - minor++) { - device = MKDEV(driver->major, minor); - if (!init_dev(device, &tty)) goto ptmx_found; /* ok! */ - } - } - return -EIO; /* no free ptys */ - ptmx_found: - set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ - minor -= driver->minor_start; - devpts_pty_new(driver->other->name_base + minor, MKDEV(driver->other->major, minor + driver->other->minor_start)); - tty_register_devfs(&pts_driver[major], DEVFS_FL_DEFAULT, - pts_driver[major].minor_start + minor); - noctty = 1; - goto init_dev_done; - -#else /* CONFIG_UNIX_98_PTYS */ - - return -ENODEV; - -#endif /* CONFIG_UNIX_98_PTYS */ - } - - retval = init_dev(device, &tty); - if (retval) - return retval; - -#ifdef CONFIG_UNIX98_PTYS -init_dev_done: -#endif - filp->private_data = tty; - file_move(filp, &tty->tty_files); - check_tty_count(tty, "tty_open"); - if (tty->driver.type == TTY_DRIVER_TYPE_PTY && - tty->driver.subtype == PTY_TYPE_MASTER) - noctty = 1; -#ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "opening %s...", tty_name(tty, buf)); -#endif - if (tty->driver.open) - retval = tty->driver.open(tty, filp); - else - retval = -ENODEV; - filp->f_flags = saved_flags; - - if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) && !suser()) - retval = -EBUSY; - - if (retval) { -#ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "error %d in opening %s...", retval, - tty_name(tty, buf)); -#endif - - release_dev(filp); - if (retval != -ERESTARTSYS) - return retval; - if (signal_pending(current)) - return retval; - schedule(); - /* - * Need to reset f_op in case a hangup happened. - */ - filp->f_op = &tty_fops; - goto retry_open; - } - if (!noctty && - current->leader && - !current->tty && - tty->session == 0) { - task_lock(current); - current->tty = tty; - task_unlock(current); - current->tty_old_pgrp = 0; - tty->session = current->session; - tty->pgrp = current->pgrp; - } - if ((tty->driver.type == TTY_DRIVER_TYPE_SERIAL) && - (tty->driver.subtype == SERIAL_TYPE_CALLOUT) && - (tty->count == 1)) { - static int nr_warns; - if (nr_warns < 5) { - printk(KERN_WARNING "tty_io.c: " - "process %d (%s) used obsolete /dev/%s - " - "update software to use /dev/ttyS%d\n", - current->pid, current->comm, - tty_name(tty, buf), TTY_NUMBER(tty)); - nr_warns++; - } - } - return 0; -} - -static int tty_release(struct inode * inode, struct file * filp) -{ - lock_kernel(); - release_dev(filp); - unlock_kernel(); - return 0; -} - -/* No kernel lock held - fine */ -static unsigned int tty_poll(struct file * filp, poll_table * wait) -{ - struct tty_struct * tty; - struct tty_ldisc *ld; - int ret = 0; - - tty = (struct tty_struct *)filp->private_data; - if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_poll")) - return 0; - - ld = tty_ldisc_ref_wait(tty); - if (ld->poll) - ret = (ld->poll)(tty, filp, wait); - tty_ldisc_deref(ld); - return ret; -} - -static int tty_fasync(int fd, struct file * filp, int on) -{ - struct tty_struct * tty; - int retval; - - tty = (struct tty_struct *)filp->private_data; - if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_fasync")) - return 0; - - retval = fasync_helper(fd, filp, on, &tty->fasync); - if (retval <= 0) - return retval; - - if (on) { - if (!waitqueue_active(&tty->read_wait)) - tty->minimum_to_wake = 1; - if (filp->f_owner.pid == 0) { - filp->f_owner.pid = (-tty->pgrp) ? : current->pid; - filp->f_owner.uid = current->uid; - filp->f_owner.euid = current->euid; - } - } else { - if (!tty->fasync && !waitqueue_active(&tty->read_wait)) - tty->minimum_to_wake = N_TTY_BUF_SIZE; - } - return 0; -} - -static int tiocsti(struct tty_struct *tty, char * arg) -{ - char ch, mbz = 0; - struct tty_ldisc *ld; - - if ((current->tty != tty) && !suser()) - return -EPERM; - if (get_user(ch, arg)) - return -EFAULT; - ld = tty_ldisc_ref_wait(tty); - ld->receive_buf(tty, &ch, &mbz, 1); - tty_ldisc_deref(ld); - return 0; -} - -static int tiocgwinsz(struct tty_struct *tty, struct winsize * arg) -{ - if (copy_to_user(arg, &tty->winsize, sizeof(*arg))) - return -EFAULT; - return 0; -} - -static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, - struct winsize * arg) -{ - struct winsize tmp_ws; - - if (copy_from_user(&tmp_ws, arg, sizeof(*arg))) - return -EFAULT; - if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg))) - return 0; - if (tty->pgrp > 0) - kill_pg(tty->pgrp, SIGWINCH, 1); - if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0)) - kill_pg(real_tty->pgrp, SIGWINCH, 1); - tty->winsize = tmp_ws; - real_tty->winsize = tmp_ws; - return 0; -} - -static int tioccons(struct inode *inode, struct file *file) -{ - if (inode->i_rdev == SYSCONS_DEV || - inode->i_rdev == CONSOLE_DEV) { - struct file *f; - if (!suser()) - return -EPERM; - spin_lock(&redirect_lock); - f = redirect; - redirect = NULL; - spin_unlock(&redirect_lock); - if (f) - fput(f); - return 0; - } - spin_lock(&redirect_lock); - if (redirect) { - spin_unlock(&redirect_lock); - return -EBUSY; - } - get_file(file); - redirect = file; - spin_unlock(&redirect_lock); - return 0; -} - - -static int fionbio(struct file *file, int *arg) -{ - int nonblock; - - if (get_user(nonblock, arg)) - return -EFAULT; - - if (nonblock) - file->f_flags |= O_NONBLOCK; - else - file->f_flags &= ~O_NONBLOCK; - return 0; -} - -static int tiocsctty(struct tty_struct *tty, int arg) -{ - if (current->leader && - (current->session == tty->session)) - return 0; - /* - * The process must be a session leader and - * not have a controlling tty already. - */ - if (!current->leader || current->tty) - return -EPERM; - if (tty->session > 0) { - /* - * This tty is already the controlling - * tty for another session group! - */ - if ((arg == 1) && suser()) { - /* - * Steal it away - */ - struct task_struct *p; - - read_lock(&tasklist_lock); - for_each_task(p) - if (p->tty == tty) - p->tty = NULL; - read_unlock(&tasklist_lock); - } else - return -EPERM; - } - task_lock(current); - current->tty = tty; - task_unlock(current); - current->tty_old_pgrp = 0; - tty->session = current->session; - tty->pgrp = current->pgrp; - return 0; -} - -static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg) -{ - /* - * (tty == real_tty) is a cheap way of - * testing if the tty is NOT a master pty. - */ - if (tty == real_tty && current->tty != real_tty) - return -ENOTTY; - return put_user(real_tty->pgrp, arg); -} - -static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg) -{ - pid_t pgrp; - int retval = tty_check_change(real_tty); - - if (retval == -EIO) - return -ENOTTY; - if (retval) - return retval; - if (!current->tty || - (current->tty != real_tty) || - (real_tty->session != current->session)) - return -ENOTTY; - if (get_user(pgrp, (pid_t *) arg)) - return -EFAULT; - if (pgrp < 0) - return -EINVAL; - if (session_of_pgrp(pgrp) != current->session) - return -EPERM; - real_tty->pgrp = pgrp; - return 0; -} - -static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg) -{ - /* - * (tty == real_tty) is a cheap way of - * testing if the tty is NOT a master pty. - */ - if (tty == real_tty && current->tty != real_tty) - return -ENOTTY; - if (real_tty->session <= 0) - return -ENOTTY; - return put_user(real_tty->session, arg); -} - -static int tiocttygstruct(struct tty_struct *tty, struct tty_struct *arg) -{ - if (copy_to_user(arg, tty, sizeof(*arg))) - return -EFAULT; - return 0; -} - -static int tiocsetd(struct tty_struct *tty, int *arg) -{ - int ldisc; - - if (get_user(ldisc, arg)) - return -EFAULT; - return tty_set_ldisc(tty, ldisc); -} - -static int send_break(struct tty_struct *tty, int duration) -{ - tty->driver.break_ctl(tty, -1); - if (!signal_pending(current)) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(duration); - } - tty->driver.break_ctl(tty, 0); - if (signal_pending(current)) - return -EINTR; - return 0; -} - -static int tty_generic_brk(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) -{ - if (cmd == TCSBRK && arg) - { - /* tcdrain case */ - int retval = tty_check_change(tty); - if (retval) - return retval; - tty_wait_until_sent(tty, 0); - if (signal_pending(current)) - return -EINTR; - } - return 0; -} - -/* - * Split this up, as gcc can choke on it otherwise.. - */ -int tty_ioctl(struct inode * inode, struct file * file, - unsigned int cmd, unsigned long arg) -{ - struct tty_struct *tty, *real_tty; - int retval; - struct tty_ldisc *ld; - - tty = (struct tty_struct *)file->private_data; - if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl")) - return -EINVAL; - - real_tty = tty; - if (tty->driver.type == TTY_DRIVER_TYPE_PTY && - tty->driver.subtype == PTY_TYPE_MASTER) - real_tty = tty->link; - - /* - * Break handling by driver - */ - if (!tty->driver.break_ctl) { - switch(cmd) { - case TIOCSBRK: - case TIOCCBRK: - if (tty->driver.ioctl) - return tty->driver.ioctl(tty, file, cmd, arg); - return -EINVAL; - - /* These two ioctl's always return success; even if */ - /* the driver doesn't support them. */ - case TCSBRK: - case TCSBRKP: - retval = -ENOIOCTLCMD; - if (tty->driver.ioctl) - retval = tty->driver.ioctl(tty, file, cmd, arg); - /* Not driver handled */ - if (retval == -ENOIOCTLCMD) - retval = tty_generic_brk(tty, file, cmd, arg); - return retval; - } - } - - /* - * Factor out some common prep work - */ - switch (cmd) { - case TIOCSETD: - case TIOCSBRK: - case TIOCCBRK: - case TCSBRK: - case TCSBRKP: - retval = tty_check_change(tty); - if (retval) - return retval; - if (cmd != TIOCCBRK) { - tty_wait_until_sent(tty, 0); - if (signal_pending(current)) - return -EINTR; - } - break; - } - - switch (cmd) { - case TIOCSTI: - return tiocsti(tty, (char *)arg); - case TIOCGWINSZ: - return tiocgwinsz(tty, (struct winsize *) arg); - case TIOCSWINSZ: - return tiocswinsz(tty, real_tty, (struct winsize *) arg); - case TIOCCONS: - return real_tty!=tty ? -EINVAL : tioccons(inode, file); - case FIONBIO: - return fionbio(file, (int *) arg); - case TIOCEXCL: - set_bit(TTY_EXCLUSIVE, &tty->flags); - return 0; - case TIOCNXCL: - clear_bit(TTY_EXCLUSIVE, &tty->flags); - return 0; - case TIOCNOTTY: - if (current->tty != tty) - return -ENOTTY; - if (current->leader) - disassociate_ctty(0); - task_lock(current); - current->tty = NULL; - task_unlock(current); - return 0; - case TIOCSCTTY: - return tiocsctty(tty, arg); - case TIOCGPGRP: - return tiocgpgrp(tty, real_tty, (pid_t *) arg); - case TIOCSPGRP: - return tiocspgrp(tty, real_tty, (pid_t *) arg); - case TIOCGSID: - return tiocgsid(tty, real_tty, (pid_t *) arg); - case TIOCGETD: - /* FIXME: check this is ok */ - return put_user(tty->ldisc.num, (int *) arg); - case TIOCSETD: - return tiocsetd(tty, (int *) arg); -#ifdef CONFIG_VT - case TIOCLINUX: - return tioclinux(tty, arg); -#endif - case TIOCTTYGSTRUCT: - return tiocttygstruct(tty, (struct tty_struct *) arg); - - /* - * Break handling - */ - case TIOCSBRK: /* Turn break on, unconditionally */ - tty->driver.break_ctl(tty, -1); - return 0; - - case TIOCCBRK: /* Turn break off, unconditionally */ - tty->driver.break_ctl(tty, 0); - return 0; - case TCSBRK: /* SVID version: non-zero arg --> no break */ - /* - * XXX is the above comment correct, or the - * code below correct? Is this ioctl used at - * all by anyone? - */ - if (!arg) - return send_break(tty, HZ/4); - return 0; - case TCSBRKP: /* support for POSIX tcsendbreak() */ - return send_break(tty, arg ? arg*(HZ/10) : HZ/4); - } - if (tty->driver.ioctl) { - retval = (tty->driver.ioctl)(tty, file, cmd, arg); - if (retval != -ENOIOCTLCMD) - return retval; - } - ld = tty_ldisc_ref_wait(tty); - retval = -EINVAL; - if (ld->ioctl) { - retval = ld->ioctl(tty, file, cmd, arg); - if (retval == -ENOIOCTLCMD) - retval = -EINVAL; - } - tty_ldisc_deref(ld); - return retval; -} - - -/* - * This implements the "Secure Attention Key" --- the idea is to - * prevent trojan horses by killing all processes associated with this - * tty when the user hits the "Secure Attention Key". Required for - * super-paranoid applications --- see the Orange Book for more details. - * - * This code could be nicer; ideally it should send a HUP, wait a few - * seconds, then send a INT, and then a KILL signal. But you then - * have to coordinate with the init process, since all processes associated - * with the current tty must be dead before the new getty is allowed - * to spawn. - * - * Now, if it would be correct ;-/ The current code has a nasty hole - - * it doesn't catch files in flight. We may send the descriptor to ourselves - * via AF_UNIX socket, close it and later fetch from socket. FIXME. - * - * Nasty bug: do_SAK is being called in interrupt context. This can - * deadlock. We punt it up to process context. AKPM - 16Mar2001 - */ -static void __do_SAK(void *arg) -{ -#ifdef TTY_SOFT_SAK - tty_hangup(tty); -#else - struct tty_struct *tty = arg; - struct task_struct *p; - int session; - int i; - struct file *filp; - struct tty_ldisc *disc; - - if (!tty) - return; - session = tty->session; - /* We don't want an ldisc switch during this */ - disc = tty_ldisc_ref(tty); - if (disc && disc->flush_buffer) - disc->flush_buffer(tty); - tty_ldisc_deref(disc); - - if (tty->driver.flush_buffer) - tty->driver.flush_buffer(tty); - - read_lock(&tasklist_lock); - for_each_task(p) { - if ((p->tty == tty) || - ((session > 0) && (p->session == session))) { - send_sig(SIGKILL, p, 1); - continue; - } - task_lock(p); - if (p->files) { - read_lock(&p->files->file_lock); - for (i=0; i < p->files->max_fds; i++) { - filp = fcheck_files(p->files, i); - if (filp && (filp->f_op == &tty_fops) && - (filp->private_data == tty)) { - send_sig(SIGKILL, p, 1); - break; - } - } - read_unlock(&p->files->file_lock); - } - task_unlock(p); - } - read_unlock(&tasklist_lock); -#endif -} - -/* - * The tq handling here is a little racy - tty->SAK_tq may already be queued. - * But there's no mechanism to fix that without futzing with tqueue_lock. - * Fortunately we don't need to worry, because if ->SAK_tq is already queued, - * the values which we write to it will be identical to the values which it - * already has. --akpm - */ -void do_SAK(struct tty_struct *tty) -{ - if (!tty) - return; - PREPARE_TQUEUE(&tty->SAK_tq, __do_SAK, tty); - schedule_task(&tty->SAK_tq); -} - -/* - * This routine is called out of the software interrupt to flush data - * from the flip buffer to the line discipline. - */ -static void flush_to_ldisc(void *private_) -{ - struct tty_struct *tty = (struct tty_struct *) private_; - unsigned char *cp; - char *fp; - int count; - unsigned long flags; - struct tty_ldisc *disc; - - disc = tty_ldisc_ref(tty); - if (disc == NULL) /* !TTY_LDISC */ - return; - - if (test_bit(TTY_DONT_FLIP, &tty->flags)) { - queue_task(&tty->flip.tqueue, &tq_timer); - goto out; - } - if (tty->flip.buf_num) { - cp = tty->flip.char_buf + TTY_FLIPBUF_SIZE; - fp = tty->flip.flag_buf + TTY_FLIPBUF_SIZE; - tty->flip.buf_num = 0; - - save_flags(flags); cli(); - tty->flip.char_buf_ptr = tty->flip.char_buf; - tty->flip.flag_buf_ptr = tty->flip.flag_buf; - } else { - cp = tty->flip.char_buf; - fp = tty->flip.flag_buf; - tty->flip.buf_num = 1; - - save_flags(flags); cli(); - tty->flip.char_buf_ptr = tty->flip.char_buf + TTY_FLIPBUF_SIZE; - tty->flip.flag_buf_ptr = tty->flip.flag_buf + TTY_FLIPBUF_SIZE; - } - count = tty->flip.count; - tty->flip.count = 0; - restore_flags(flags); - - disc->receive_buf(tty, cp, fp, count); -out: - tty_ldisc_deref(disc); -} - -/* - * Call the ldisc flush directly from a driver. This function may - * return an error and need retrying by the user. - */ - -int tty_push_data(struct tty_struct *tty, unsigned char *cp, unsigned char *fp, int count) -{ - int ret = 0; - struct tty_ldisc *disc; - - disc = tty_ldisc_ref(tty); - if(test_bit(TTY_DONT_FLIP, &tty->flags)) - ret = -EAGAIN; - else if(disc == NULL) - ret = -EIO; - else - disc->receive_buf(tty, cp, fp, count); - tty_ldisc_deref(disc); - return ret; - -} - -/* - * Routine which returns the baud rate of the tty - * - * Note that the baud_table needs to be kept in sync with the - * include/asm/termbits.h file. - */ -static int baud_table[] = { - 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800, - 9600, 19200, 38400, 57600, 115200, 230400, 460800, -#ifdef __sparc__ - 76800, 153600, 307200, 614400, 921600 -#else - 500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000, - 2500000, 3000000, 3500000, 4000000 -#endif -}; - -static int n_baud_table = sizeof(baud_table)/sizeof(int); - -int tty_get_baud_rate(struct tty_struct *tty) -{ - unsigned int cflag, i; - - cflag = tty->termios->c_cflag; - - i = cflag & CBAUD; - if (i & CBAUDEX) { - i &= ~CBAUDEX; - if (i < 1 || i+15 >= n_baud_table) - tty->termios->c_cflag &= ~CBAUDEX; - else - i += 15; - } - if (i==15 && tty->alt_speed) { - if (!tty->warned) { - printk(KERN_WARNING "Use of setserial/setrocket to " - "set SPD_* flags is deprecated\n"); - tty->warned = 1; - } - return(tty->alt_speed); - } - - return baud_table[i]; -} - -void tty_flip_buffer_push(struct tty_struct *tty) -{ - if (tty->low_latency) - flush_to_ldisc((void *) tty); - else - queue_task(&tty->flip.tqueue, &tq_timer); -} - -/* - * This subroutine initializes a tty structure. - */ -static void initialize_tty_struct(struct tty_struct *tty) -{ - memset(tty, 0, sizeof(struct tty_struct)); - tty->magic = TTY_MAGIC; - tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); - tty->pgrp = -1; - tty->flip.char_buf_ptr = tty->flip.char_buf; - tty->flip.flag_buf_ptr = tty->flip.flag_buf; - tty->flip.tqueue.routine = flush_to_ldisc; - tty->flip.tqueue.data = tty; - init_MUTEX(&tty->flip.pty_sem); - init_MUTEX(&tty->termios_sem); - init_waitqueue_head(&tty->write_wait); - init_waitqueue_head(&tty->read_wait); - tty->tq_hangup.routine = do_tty_hangup; - tty->tq_hangup.data = tty; - sema_init(&tty->atomic_read, 1); - sema_init(&tty->atomic_write, 1); - spin_lock_init(&tty->read_lock); - INIT_LIST_HEAD(&tty->tty_files); - INIT_TQUEUE(&tty->SAK_tq, 0, 0); -} - -/* - * The default put_char routine if the driver did not define one. - */ -void tty_default_put_char(struct tty_struct *tty, unsigned char ch) -{ - tty->driver.write(tty, 0, &ch, 1); -} - -/* - * Register a tty device described by <driver>, with minor number <minor>. - */ -void tty_register_devfs (struct tty_driver *driver, unsigned int flags, unsigned minor) -{ -#ifdef CONFIG_DEVFS_FS - umode_t mode = S_IFCHR | S_IRUSR | S_IWUSR; - kdev_t device = MKDEV (driver->major, minor); - int idx = minor - driver->minor_start; - char buf[32]; - - switch (device) { - case TTY_DEV: - case PTMX_DEV: - mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - break; - default: - if (driver->major == PTY_MASTER_MAJOR) - mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - break; - } - if ( (minor < driver->minor_start) || - (minor >= driver->minor_start + driver->num) ) { - printk(KERN_ERR "Attempt to register invalid minor number " - "with devfs (%d:%d).\n", (int)driver->major,(int)minor); - return; - } -# ifdef CONFIG_UNIX98_PTYS - if ( (driver->major >= UNIX98_PTY_SLAVE_MAJOR) && - (driver->major < UNIX98_PTY_SLAVE_MAJOR + UNIX98_NR_MAJORS) ) - flags |= DEVFS_FL_CURRENT_OWNER; -# endif - sprintf(buf, driver->name, idx + driver->name_base); - devfs_register (NULL, buf, flags | DEVFS_FL_DEFAULT, - driver->major, minor, mode, &tty_fops, NULL); -#endif /* CONFIG_DEVFS_FS */ -} - -void tty_unregister_devfs (struct tty_driver *driver, unsigned minor) -{ -#ifdef CONFIG_DEVFS_FS - void * handle; - int idx = minor - driver->minor_start; - char buf[32]; - - sprintf(buf, driver->name, idx + driver->name_base); - handle = devfs_find_handle (NULL, buf, driver->major, minor, - DEVFS_SPECIAL_CHR, 0); - devfs_unregister (handle); -#endif /* CONFIG_DEVFS_FS */ -} - -EXPORT_SYMBOL(tty_register_devfs); -EXPORT_SYMBOL(tty_unregister_devfs); - -/* - * Called by a tty driver to register itself. - */ -int tty_register_driver(struct tty_driver *driver) -{ - int error; - int i; - - if (driver->flags & TTY_DRIVER_INSTALLED) - return 0; - - error = devfs_register_chrdev(driver->major, driver->name, &tty_fops); - if (error < 0) - return error; - else if(driver->major == 0) - driver->major = error; - - if (!driver->put_char) - driver->put_char = tty_default_put_char; - - driver->prev = 0; - driver->next = tty_drivers; - if (tty_drivers) tty_drivers->prev = driver; - tty_drivers = driver; - - if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) { - for(i = 0; i < driver->num; i++) - tty_register_devfs(driver, 0, driver->minor_start + i); - } - proc_tty_register_driver(driver); - return error; -} - -/* - * Called by a tty driver to unregister itself. - */ -int tty_unregister_driver(struct tty_driver *driver) -{ - int retval; - struct tty_driver *p; - int i, found = 0; - struct termios *tp; - const char *othername = NULL; - - if (*driver->refcount) - return -EBUSY; - - for (p = tty_drivers; p; p = p->next) { - if (p == driver) - found++; - else if (p->major == driver->major) - othername = p->name; - } - - if (!found) - return -ENOENT; - - if (othername == NULL) { - retval = devfs_unregister_chrdev(driver->major, driver->name); - if (retval) - return retval; - } else - devfs_register_chrdev(driver->major, othername, &tty_fops); - - if (driver->prev) - driver->prev->next = driver->next; - else - tty_drivers = driver->next; - - if (driver->next) - driver->next->prev = driver->prev; - - /* - * Free the termios and termios_locked structures because - * we don't want to get memory leaks when modular tty - * drivers are removed from the kernel. - */ - for (i = 0; i < driver->num; i++) { - tp = driver->termios[i]; - if (tp) { - driver->termios[i] = NULL; - kfree(tp); - } - tp = driver->termios_locked[i]; - if (tp) { - driver->termios_locked[i] = NULL; - kfree(tp); - } - tty_unregister_devfs(driver, driver->minor_start + i); - } - proc_tty_unregister_driver(driver); - return 0; -} - - -/* - * Initialize the console device. This is called *early*, so - * we can't necessarily depend on lots of kernel help here. - * Just do some early initializations, and do the complex setup - * later. - */ -void __init console_init(void) -{ - /* Setup the default TTY line discipline. */ - memset(tty_ldiscs, 0, NR_LDISCS*sizeof(struct tty_ldisc)); - (void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY); - - /* - * Set up the standard termios. Individual tty drivers may - * deviate from this; this is used as a template. - */ - memset(&tty_std_termios, 0, sizeof(struct termios)); - memcpy(tty_std_termios.c_cc, INIT_C_CC, NCCS); - tty_std_termios.c_iflag = ICRNL | IXON; - tty_std_termios.c_oflag = OPOST | ONLCR; - tty_std_termios.c_cflag = B38400 | CS8 | CREAD | HUPCL; - tty_std_termios.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK | - ECHOCTL | ECHOKE | IEXTEN; - - /* - * set up the console device so that later boot sequences can - * inform about problems etc.. - */ -#ifdef CONFIG_EARLY_PRINTK - disable_early_printk(); -#endif - -#ifdef CONFIG_XEN_CONSOLE - xen_console_init(); -#endif - -#ifdef CONFIG_VT - con_init(); -#endif -#ifdef CONFIG_AU1X00_SERIAL_CONSOLE - au1x00_serial_console_init(); -#endif -#ifdef CONFIG_SERIAL_CONSOLE -#if (defined(CONFIG_8xx) || defined(CONFIG_CPM2)) - console_8xx_init(); -#elif defined(CONFIG_MAC_SERIAL) && defined(CONFIG_SERIAL) - if (_machine == _MACH_Pmac) - mac_scc_console_init(); - else - serial_console_init(); -#elif defined(CONFIG_MAC_SERIAL) - mac_scc_console_init(); -#elif defined(CONFIG_PARISC) - pdc_console_init(); -#elif defined(CONFIG_SERIAL) - serial_console_init(); -#endif /* CONFIG_8xx */ -#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC) - vme_scc_console_init(); -#endif -#if defined(CONFIG_SERIAL167) - serial167_console_init(); -#endif -#if defined(CONFIG_SH_SCI) - sci_console_init(); -#endif -#endif -#ifdef CONFIG_SERIAL_DEC_CONSOLE - dec_serial_console_init(); -#endif -#ifdef CONFIG_TN3270_CONSOLE - tub3270_con_init(); -#endif -#ifdef CONFIG_TN3215 - con3215_init(); -#endif -#ifdef CONFIG_HWC - hwc_console_init(); -#endif -#ifdef CONFIG_STDIO_CONSOLE - stdio_console_init(); -#endif -#ifdef CONFIG_SERIAL_21285_CONSOLE - rs285_console_init(); -#endif -#ifdef CONFIG_SERIAL_SA1100_CONSOLE - sa1100_rs_console_init(); -#endif -#ifdef CONFIG_ARC_CONSOLE - arc_console_init(); -#endif -#ifdef CONFIG_SERIAL_AMBA_CONSOLE - ambauart_console_init(); -#endif -#ifdef CONFIG_SERIAL_TX3912_CONSOLE - tx3912_console_init(); -#endif -#ifdef CONFIG_TXX927_SERIAL_CONSOLE - txx927_console_init(); -#endif -#ifdef CONFIG_SERIAL_TXX9_CONSOLE - txx9_serial_console_init(); -#endif -#ifdef CONFIG_SIBYTE_SB1250_DUART_CONSOLE - sb1250_serial_console_init(); -#endif -#ifdef CONFIG_IP22_SERIAL - sgi_serial_console_init(); -#endif -} - -static struct tty_driver dev_tty_driver, dev_syscons_driver; -#ifdef CONFIG_UNIX98_PTYS -static struct tty_driver dev_ptmx_driver; -#endif -#ifdef CONFIG_HVC_CONSOLE - hvc_console_init(); -#endif -#ifdef CONFIG_VT -static struct tty_driver dev_console_driver; -#endif - -/* - * Ok, now we can initialize the rest of the tty devices and can count - * on memory allocations, interrupts etc.. - */ -void __init tty_init(void) -{ - /* - * dev_tty_driver and dev_console_driver are actually magic - * devices which get redirected at open time. Nevertheless, - * we register them so that register_chrdev is called - * appropriately. - */ - memset(&dev_tty_driver, 0, sizeof(struct tty_driver)); - dev_tty_driver.magic = TTY_DRIVER_MAGIC; - dev_tty_driver.driver_name = "/dev/tty"; - dev_tty_driver.name = dev_tty_driver.driver_name + 5; - dev_tty_driver.name_base = 0; - dev_tty_driver.major = TTYAUX_MAJOR; - dev_tty_driver.minor_start = 0; - dev_tty_driver.num = 1; - dev_tty_driver.type = TTY_DRIVER_TYPE_SYSTEM; - dev_tty_driver.subtype = SYSTEM_TYPE_TTY; - - if (tty_register_driver(&dev_tty_driver)) - panic("Couldn't register /dev/tty driver\n"); - - dev_syscons_driver = dev_tty_driver; - dev_syscons_driver.driver_name = "/dev/console"; - dev_syscons_driver.name = dev_syscons_driver.driver_name + 5; - dev_syscons_driver.major = TTYAUX_MAJOR; - dev_syscons_driver.minor_start = 1; - dev_syscons_driver.type = TTY_DRIVER_TYPE_SYSTEM; - dev_syscons_driver.subtype = SYSTEM_TYPE_SYSCONS; - - if (tty_register_driver(&dev_syscons_driver)) - panic("Couldn't register /dev/console driver\n"); - - /* console calls tty_register_driver() before kmalloc() works. - * Thus, we can't devfs_register() then. Do so now, instead. - */ -#ifdef CONFIG_VT - con_init_devfs(); -#endif - -#ifdef CONFIG_UNIX98_PTYS - dev_ptmx_driver = dev_tty_driver; - dev_ptmx_driver.driver_name = "/dev/ptmx"; - dev_ptmx_driver.name = dev_ptmx_driver.driver_name + 5; - dev_ptmx_driver.major= MAJOR(PTMX_DEV); - dev_ptmx_driver.minor_start = MINOR(PTMX_DEV); - dev_ptmx_driver.type = TTY_DRIVER_TYPE_SYSTEM; - dev_ptmx_driver.subtype = SYSTEM_TYPE_SYSPTMX; - - if (tty_register_driver(&dev_ptmx_driver)) - panic("Couldn't register /dev/ptmx driver\n"); -#endif - -#ifdef CONFIG_VT - dev_console_driver = dev_tty_driver; - dev_console_driver.driver_name = "/dev/vc/0"; - dev_console_driver.name = dev_console_driver.driver_name + 5; - dev_console_driver.major = TTY_MAJOR; - dev_console_driver.type = TTY_DRIVER_TYPE_SYSTEM; - dev_console_driver.subtype = SYSTEM_TYPE_CONSOLE; - - if (tty_register_driver(&dev_console_driver)) - panic("Couldn't register /dev/tty0 driver\n"); - - kbd_init(); -#endif - -#ifdef CONFIG_SGI_L1_SERIAL_CONSOLE - if (ia64_platform_is("sn2")) { - sn_sal_serial_console_init(); - return; /* only one console right now for SN2 */ - } -#endif -#ifdef CONFIG_ESPSERIAL /* init ESP before rs, so rs doesn't see the port */ - espserial_init(); -#endif -#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC) - vme_scc_init(); -#endif -#ifdef CONFIG_SERIAL_TX3912 - tx3912_rs_init(); -#endif -#ifdef CONFIG_ROCKETPORT - rp_init(); -#endif -#ifdef CONFIG_SERIAL167 - serial167_init(); -#endif -#ifdef CONFIG_CYCLADES - cy_init(); -#endif -#ifdef CONFIG_STALLION - stl_init(); -#endif -#ifdef CONFIG_ISTALLION - stli_init(); -#endif -#ifdef CONFIG_DIGI - pcxe_init(); -#endif -#ifdef CONFIG_DIGIEPCA - pc_init(); -#endif -#ifdef CONFIG_SPECIALIX - specialix_init(); -#endif -#if (defined(CONFIG_8xx) || defined(CONFIG_CPM2)) - rs_8xx_init(); -#endif /* CONFIG_8xx */ - pty_init(); -#ifdef CONFIG_MOXA_SMARTIO - mxser_init(); -#endif -#ifdef CONFIG_MOXA_INTELLIO - moxa_init(); -#endif -#ifdef CONFIG_VT - vcs_init(); -#endif -#ifdef CONFIG_TN3270 - tub3270_init(); -#endif -#ifdef CONFIG_TN3215 - tty3215_init(); -#endif -#ifdef CONFIG_HWC - hwc_tty_init(); -#endif -#ifdef CONFIG_A2232 - a2232board_init(); -#endif -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/drivers/scsi/aic7xxx/Makefile --- a/linux-2.4-xen-sparse/drivers/scsi/aic7xxx/Makefile Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,97 +0,0 @@ -# -# drivers/scsi/aic7xxx/Makefile -# -# Makefile for the Linux aic7xxx SCSI driver. -# - -O_TARGET := aic7xxx_drv.o - -list-multi := aic7xxx.o aic79xx.o - -obj-$(CONFIG_SCSI_AIC7XXX) += aic7xxx.o -ifeq ($(CONFIG_PCI),y) -obj-$(CONFIG_SCSI_AIC79XX) += aic79xx.o -endif - -EXTRA_CFLAGS += -I$(TOPDIR)/drivers/scsi -Werror -#EXTRA_CFLAGS += -g - -# Platform Specific Files -obj-aic7xxx = aic7xxx_osm.o aic7xxx_proc.o - -# Core Files -obj-aic7xxx += aic7xxx_core.o aic7xxx_93cx6.o -ifeq ($(CONFIG_AIC7XXX_REG_PRETTY_PRINT),y) -obj-aic7xxx += aic7xxx_reg_print.o -endif - -#EISA Specific Files -AIC7XXX_EISA_ARCH = $(filter i386 alpha xen,$(ARCH)) -ifneq ($(AIC7XXX_EISA_ARCH),) -obj-aic7xxx += aic7770.o -# Platform Specific EISA Files -obj-aic7xxx += aic7770_osm.o -endif - -#PCI Specific Files -ifeq ($(CONFIG_PCI),y) -obj-aic7xxx += aic7xxx_pci.o -# Platform Specific PCI Files -obj-aic7xxx += aic7xxx_osm_pci.o -endif - -# Platform Specific U320 Files -obj-aic79xx = aic79xx_osm.o aic79xx_proc.o aic79xx_osm_pci.o -# Core Files -obj-aic79xx += aic79xx_core.o aic79xx_pci.o -ifeq ($(CONFIG_AIC79XX_REG_PRETTY_PRINT),y) -obj-aic79xx += aic79xx_reg_print.o -endif - -# Override our module desitnation -MOD_DESTDIR = $(shell cd .. && $(CONFIG_SHELL) $(TOPDIR)/scripts/pathdown.sh) - -include $(TOPDIR)/Rules.make - -aic7xxx_core.o: aic7xxx_seq.h -$(obj-aic7xxx): aic7xxx_reg.h -aic7xxx.o: aic7xxx_seq.h aic7xxx_reg.h $(obj-aic7xxx) - $(LD) $(LD_RFLAG) -r -o $@ $(obj-aic7xxx) - -aic79xx_core.o: aic79xx_seq.h -$(obj-aic79xx): aic79xx_reg.h -aic79xx.o: aic79xx_seq.h aic79xx_reg.h $(obj-aic79xx) - $(LD) $(LD_RFLAG) -r -o $@ $(obj-aic79xx) - -ifeq ($(CONFIG_AIC7XXX_BUILD_FIRMWARE),y) -aic7xxx_gen = aic7xxx_seq.h aic7xxx_reg.h -ifeq ($(CONFIG_AIC7XXX_REG_PRETTY_PRINT),y) -aic7xxx_gen += aic7xxx_reg_print.c -aic7xxx_asm_cmd = aicasm/aicasm -I. -r aic7xxx_reg.h \ - -p aic7xxx_reg_print.c -i aic7xxx_osm.h \ - -o aic7xxx_seq.h aic7xxx.seq -else -aic7xxx_asm_cmd = aicasm/aicasm -I. -r aic7xxx_reg.h \ - -o aic7xxx_seq.h aic7xxx.seq -endif -$(aic7xxx_gen): aic7xxx.seq aic7xxx.reg aicasm/aicasm - $(aic7xxx_asm_cmd) -endif - -ifeq ($(CONFIG_AIC79XX_BUILD_FIRMWARE),y) -aic79xx_gen = aic79xx_seq.h aic79xx_reg.h -ifeq ($(CONFIG_AIC79XX_REG_PRETTY_PRINT),y) -aic79xx_gen += aic79xx_reg_print.c -aic79xx_asm_cmd = aicasm/aicasm -I. -r aic79xx_reg.h \ - -p aic79xx_reg_print.c -i aic79xx_osm.h \ - -o aic79xx_seq.h aic79xx.seq -else -aic79xx_asm_cmd = aicasm/aicasm -I. -r aic79xx_reg.h \ - -o aic79xx_seq.h aic79xx.seq -endif -$(aic79xx_gen): aic79xx.seq aic79xx.reg aicasm/aicasm - $(aic79xx_asm_cmd) -endif - -aicasm/aicasm: aicasm/*.[chyl] - $(MAKE) -C aicasm diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/bugs.h --- a/linux-2.4-xen-sparse/include/asm-xen/bugs.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,53 +0,0 @@ -/* - * include/asm-i386/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), - * <rreilova@xxxxxxxxxxxx> - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -#include <linux/config.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/msr.h> - - -static void __init check_fpu(void) -{ - boot_cpu_data.fdiv_bug = 0; -} - -static void __init check_hlt(void) -{ - boot_cpu_data.hlt_works_ok = 1; -} - -static void __init check_bugs(void) -{ - extern void __init boot_init_fpu(void); - - identify_cpu(&boot_cpu_data); - boot_init_fpu(); -#ifndef CONFIG_SMP - printk("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - check_fpu(); - check_hlt(); - system_utsname.machine[1] = '0' + - (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/desc.h --- a/linux-2.4-xen-sparse/include/asm-xen/desc.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,37 +0,0 @@ -#ifndef __ARCH_DESC_H -#define __ARCH_DESC_H - -#include <asm/ldt.h> - -#ifndef __ASSEMBLY__ - -struct desc_struct { - unsigned long a,b; -}; - -struct Xgt_desc_struct { - unsigned short size; - unsigned long address __attribute__((packed)); -}; - -extern struct desc_struct default_ldt[]; - -static inline void clear_LDT(void) -{ - xen_set_ldt(0, 0); -} - -static inline void load_LDT(mm_context_t *pc) -{ - void *segments = pc->ldt; - int count = pc->size; - - if ( count == 0 ) - segments = NULL; - - xen_set_ldt((unsigned long)segments, count); -} - -#endif /* __ASSEMBLY__ */ - -#endif /* __ARCH_DESC_H__ */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/fixmap.h --- a/linux-2.4-xen-sparse/include/asm-xen/fixmap.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,107 +0,0 @@ -/* - * fixmap.h: compile-time virtual memory allocation - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998 Ingo Molnar - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#ifndef _ASM_FIXMAP_H -#define _ASM_FIXMAP_H - -#include <linux/config.h> -#include <linux/kernel.h> -#include <asm/apicdef.h> -#include <asm/page.h> -#include <asm-xen/gnttab.h> -#ifdef CONFIG_HIGHMEM -#include <linux/threads.h> -#include <asm/kmap_types.h> -#endif - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ - -enum fixed_addresses { -#ifdef CONFIG_HIGHMEM - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#endif - FIX_BLKRING_BASE, - FIX_NETRING0_BASE, - FIX_NETRING1_BASE, - FIX_NETRING2_BASE, - FIX_NETRING3_BASE, - FIX_SHARED_INFO, - FIX_GNTTAB_BEGIN, - FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1, -#ifdef CONFIG_VGA_CONSOLE -#define NR_FIX_BTMAPS 32 /* 128KB For the Dom0 VGA Console A0000-C0000 */ -#else -#define NR_FIX_BTMAPS 1 /* in case anyone wants it in future... */ -#endif - FIX_BTMAP_END, - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, - /* our bt_ioremap is permanent, unlike other architectures */ - - __end_of_permanent_fixed_addresses, - __end_of_fixed_addresses = __end_of_permanent_fixed_addresses -}; - -extern void __set_fixmap (enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - -extern void clear_fixmap(enum fixed_addresses idx); - -/* - * used by vmalloc.c. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap, and leave one page empty - * at the top of mem.. - */ -#define FIXADDR_TOP (HYPERVISOR_VIRT_START - 2*PAGE_SIZE) -#define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) - -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(unsigned int idx) -{ - return __fix_to_virt(idx); -} - -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/highmem.h --- a/linux-2.4-xen-sparse/include/asm-xen/highmem.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,132 +0,0 @@ -/* - * highmem.h: virtual kernel memory mappings for high memory - * - * Used in CONFIG_HIGHMEM systems for memory pages which - * are not addressable by direct kernel virtual addresses. - * - * Copyright (C) 1999 Gerhard Wichert, Siemens AG - * Gerhard.Wichert@xxxxxxxxxxxxxx - * - * - * Redesigned the x86 32-bit VM architecture to deal with - * up to 16 Terabyte physical memory. With current x86 CPUs - * we now support up to 64 Gigabytes physical RAM. - * - * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx> - */ - -#ifndef _ASM_HIGHMEM_H -#define _ASM_HIGHMEM_H - -#ifdef __KERNEL__ - -#include <linux/config.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <asm/kmap_types.h> -#include <asm/pgtable.h> - -#ifdef CONFIG_DEBUG_HIGHMEM -#define HIGHMEM_DEBUG 1 -#else -#define HIGHMEM_DEBUG 0 -#endif - -/* declarations for highmem.c */ -extern unsigned long highstart_pfn, highend_pfn; - -extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; -extern pte_t *pkmap_page_table; - -extern void kmap_init(void) __init; - -/* - * Right now we initialize only a single pte table. It can be extended - * easily, subsequent pte tables have to be allocated in one physical - * chunk of RAM. - */ -#define PKMAP_BASE (HYPERVISOR_VIRT_START - (1<<23)) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif -#define LAST_PKMAP_MASK (LAST_PKMAP-1) -#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) -#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) - -extern void * FASTCALL(kmap_high(struct page *page, int nonblocking)); -extern void FASTCALL(kunmap_high(struct page *page)); - -#define kmap(page) __kmap(page, 0) -#define kmap_nonblock(page) __kmap(page, 1) - -static inline void *__kmap(struct page *page, int nonblocking) -{ - if (in_interrupt()) - out_of_line_bug(); - if (page < highmem_start_page) - return page_address(page); - return kmap_high(page, nonblocking); -} - -static inline void kunmap(struct page *page) -{ - if (in_interrupt()) - out_of_line_bug(); - if (page < highmem_start_page) - return; - kunmap_high(page); -} - -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. - */ -static inline void *kmap_atomic(struct page *page, enum km_type type) -{ - enum fixed_addresses idx; - unsigned long vaddr; - - if (page < highmem_start_page) - return page_address(page); - - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#if HIGHMEM_DEBUG - if (!pte_none(*(kmap_pte-idx))) - out_of_line_bug(); -#endif - set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); - __flush_tlb_one(vaddr); - - return (void*) vaddr; -} - -static inline void kunmap_atomic(void *kvaddr, enum km_type type) -{ -#if HIGHMEM_DEBUG - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - - if (vaddr < FIXADDR_START) // FIXME - return; - - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) - out_of_line_bug(); - - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(kmap_pte-idx); - __flush_tlb_one(vaddr); -#endif -} - -#endif /* __KERNEL__ */ - -#endif /* _ASM_HIGHMEM_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/hw_irq.h --- a/linux-2.4-xen-sparse/include/asm-xen/hw_irq.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,61 +0,0 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* - * linux/include/asm/hw_irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - */ - -#include <linux/config.h> -#include <linux/smp.h> -#include <asm/atomic.h> -#include <asm/irq.h> - -#define SYSCALL_VECTOR 0x80 - -extern int irq_vector[NR_IRQS]; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - -extern char _stext, _etext; - -extern unsigned long prof_cpu_mask; -extern unsigned int * prof_buffer; -extern unsigned long prof_len; -extern unsigned long prof_shift; - -/* - * x86 profiling function, SMP safe. We might want to do this in - * assembly totally? - */ -static inline void x86_do_profile (unsigned long eip) -{ - if (!prof_buffer) - return; - - /* - * Only measure the CPUs specified by /proc/irq/prof_cpu_mask. - * (default is all CPUs.) - */ - if (!((1<<smp_processor_id()) & prof_cpu_mask)) - return; - - eip -= (unsigned long) &_stext; - eip >>= prof_shift; - /* - * Don't ignore out-of-bounds EIP values silently, - * put them into the last histogram slot, so if - * present, they will show up as a sharp peak. - */ - if (eip > prof_len-1) - eip = prof_len-1; - atomic_inc((atomic_t *)&prof_buffer[eip]); -} - -static inline void hw_resend_irq(struct hw_interrupt_type *h, - unsigned int i) -{} - -#endif /* _ASM_HW_IRQ_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/io.h --- a/linux-2.4-xen-sparse/include/asm-xen/io.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,457 +0,0 @@ -#ifndef _ASM_IO_H -#define _ASM_IO_H - -#include <linux/config.h> - -/* - * This file contains the definitions for the x86 IO instructions - * inb/inw/inl/outb/outw/outl and the "string versions" of the same - * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" - * versions of the single-IO instructions (inb_p/inw_p/..). - * - * This file is not meant to be obfuscating: it's just complicated - * to (a) handle it all in a way that makes gcc able to optimize it - * as well as possible and (b) trying to avoid writing the same thing - * over and over again with slight variations and possibly making a - * mistake somewhere. - */ - -/* - * Thanks to James van Artsdalen for a better timing-fix than - * the two short jumps: using outb's to a nonexistent port seems - * to guarantee better timings even on fast machines. - * - * On the other hand, I'd like to be sure of a non-existent port: - * I feel a bit unsafe about using 0x80 (should be safe, though) - * - * Linus - */ - - /* - * Bit simplified and optimized by Jan Hubicka - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. - * - * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, - * isa_read[wl] and isa_write[wl] fixed - * - Arnaldo Carvalho de Melo <acme@xxxxxxxxxxxxxxxx> - */ - -#define IO_SPACE_LIMIT 0xffff - -#define XQUAD_PORTIO_BASE 0xfe400000 -#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ -#define XQUAD_PORTIO_LEN 0x80000 /* Only remapping first 2 quads */ - -#ifdef __KERNEL__ - -#include <linux/vmalloc.h> - -/* - * Temporary debugging check to catch old code using - * unmapped ISA addresses. Will be removed in 2.4. - */ -#if CONFIG_DEBUG_IOVIRT - extern void *__io_virt_debug(unsigned long x, const char *file, int line); - extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); - #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) -//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) -#else - #define __io_virt(x) ((void *)(x)) -//#define __io_phys(x) __pa(x) -#endif - -/** - * virt_to_phys - map virtual addresses to physical - * @address: address to remap - * - * The returned physical address is the physical (CPU) mapping for - * the memory address given. It is only valid to use this function on - * addresses directly mapped or allocated via kmalloc. - * - * This function does not give bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline unsigned long virt_to_phys(volatile void * address) -{ - return __pa(address); -} - -/** - * phys_to_virt - map physical address to virtual - * @address: address to remap - * - * The returned virtual address is a current CPU mapping for - * the memory address given. It is only valid to use this function on - * addresses that have a kernel mapping - * - * This function does not handle bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline void * phys_to_virt(unsigned long address) -{ - return __va(address); -} - -/* - * We define page_to_phys 'incorrectly' because it is used when merging blkdev - * requests, and the correct thing to do there is to use machine addresses. - */ -#define page_to_phys(_x) phys_to_machine(((_x) - mem_map) << PAGE_SHIFT) - -extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); - -/** - * ioremap - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - */ - -static inline void * ioremap (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, 0); -} - -/** - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In paticular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - */ - -static inline void * ioremap_nocache (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, _PAGE_PCD); -} - -extern void iounmap(void *addr); - -/* - * bt_ioremap() and bt_iounmap() are for temporary early boot-time - * mappings, before the real ioremap() is functional. - * A boot-time mapping is currently limited to at most 16 pages. - */ -extern void *bt_ioremap(unsigned long offset, unsigned long size); -extern void bt_iounmap(void *addr, unsigned long size); - -#define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x)) -#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) -#define page_to_bus(_x) phys_to_machine(((_x) - mem_map) << PAGE_SHIFT) -#define bus_to_phys(_x) machine_to_phys(_x) -#define bus_to_page(_x) (mem_map + (bus_to_phys(_x) >> PAGE_SHIFT)) - -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) -#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) -#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl - -#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) -#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) -#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel - -#define memset_io(a,b,c) __memset(__io_virt(a),(b),(c)) -#define memcpy_fromio(a,b,c) __memcpy((a),__io_virt(b),(c)) -#define memcpy_toio(a,b,c) __memcpy(__io_virt(a),(b),(c)) - -/* - * ISA space is 'always mapped' on a typical x86 system, no need to - * explicitly ioremap() it. The fact that the ISA IO space is mapped - * to PAGE_OFFSET is pure coincidence - it does not mean ISA values - * are physical addresses. The following constant pointer can be - * used as the IO-area pointer (it can be iounmapped as well, so the - * analogy with PCI is quite large): - */ -#define __ISA_IO_base ((char *)(PAGE_OFFSET)) - -#define isa_readb(a) readb(__ISA_IO_base + (a)) -#define isa_readw(a) readw(__ISA_IO_base + (a)) -#define isa_readl(a) readl(__ISA_IO_base + (a)) -#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) -#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) -#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) -#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) -#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) -#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) - - -/* - * Again, i386 does not require mem IO specific function. - */ - -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) -#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) - -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ - -static inline int check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -/** - * isa_check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the ISA mmio address io_addr. - * Returns 1 on a match. - * - * This function is deprecated. New drivers should use ioremap and - * check_signature. - */ - - -static inline int isa_check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (isa_readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) - -static inline void flush_write_buffers(void) -{ - __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); -} - -#define dma_cache_inv(_start,_size) flush_write_buffers() -#define dma_cache_wback(_start,_size) flush_write_buffers() -#define dma_cache_wback_inv(_start,_size) flush_write_buffers() - -#else - -/* Nothing to do */ - -#define dma_cache_inv(_start,_size) do { } while (0) -#define dma_cache_wback(_start,_size) do { } while (0) -#define dma_cache_wback_inv(_start,_size) do { } while (0) -#define flush_write_buffers() - -#endif - -#endif /* __KERNEL__ */ - -#ifdef SLOW_IO_BY_JUMPING -#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" -#elif defined(__UNSAFE_IO__) -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" -#else -#define __SLOW_DOWN_IO "\n1: outb %%al,$0x80\n" \ - "2:\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,2b\n" \ - ".previous" -#endif - -#ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO -#else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO -#endif - -#ifdef CONFIG_MULTIQUAD -extern void *xquad_portio; /* Where the IO area was mapped */ -#endif /* CONFIG_MULTIQUAD */ - -/* - * Talk about misusing macros.. - */ -#define __OUT1(s,x) \ -static inline void out##s(unsigned x value, unsigned short port) { - -#ifdef __UNSAFE_IO__ -#define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" -#else -#define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("1: out" #s " %" s1 "0,%" s2 "1\n" \ - "2:\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,2b\n" \ - ".previous" -#endif - -#if defined (CONFIG_MULTIQUAD) && !defined(STANDALONE) -#define __OUTQ(s,ss,x) /* Do the equivalent of the portio op on quads */ \ -static inline void out##ss(unsigned x value, unsigned short port) { \ - if (xquad_portio) \ - write##s(value, (unsigned long) xquad_portio + port); \ - else /* We're still in early boot, running on quad 0 */ \ - out##ss##_local(value, port); \ -} \ -static inline void out##ss##_quad(unsigned x value, unsigned short port, int quad) { \ - if (xquad_portio) \ - write##s(value, (unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ - + port); \ -} - -#define __INQ(s,ss) /* Do the equivalent of the portio op on quads */ \ -static inline RETURN_TYPE in##ss(unsigned short port) { \ - if (xquad_portio) \ - return read##s((unsigned long) xquad_portio + port); \ - else /* We're still in early boot, running on quad 0 */ \ - return in##ss##_local(port); \ -} \ -static inline RETURN_TYPE in##ss##_quad(unsigned short port, int quad) { \ - if (xquad_portio) \ - return read##s((unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ - + port); \ - else\ - return 0;\ -} -#endif /* CONFIG_MULTIQUAD && !STANDALONE */ - -#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) -#define __OUT(s,s1,x) \ -__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} -#else -/* Make the default portio routines operate on quad 0 */ -#define __OUT(s,s1,x) \ -__OUT1(s##_local,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p_local,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ -__OUTQ(s,s,x) \ -__OUTQ(s,s##_p,x) -#endif /* !CONFIG_MULTIQUAD || STANDALONE */ - -#define __IN1(s) \ -static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; - -#ifdef __UNSAFE_IO__ -#define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" -#else -#define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("1: in" #s " %" s2 "1,%" s1 "0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov" #s " $~0,%" s1 "0\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,3b\n" \ - ".previous" -#endif - -#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) -#define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } -#else -/* Make the default portio routines operate on quad 0 */ -#define __IN(s,s1,i...) \ -__IN1(s##_local) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p_local) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__INQ(s,s) \ -__INQ(s,s##_p) -#endif /* !CONFIG_MULTIQUAD || STANDALONE */ - -#define __INS(s) \ -static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; ins" #s \ -: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define __OUTS(s) \ -static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; outs" #s \ -: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define RETURN_TYPE unsigned char -__IN(b,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned short -__IN(w,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned int -__IN(l,"") -#undef RETURN_TYPE - -__OUT(b,"b",char) -__OUT(w,"w",short) -__OUT(l,,int) - -__INS(b) -__INS(w) -__INS(l) - -__OUTS(b) -__OUTS(w) -__OUTS(l) - -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/irq.h --- a/linux-2.4-xen-sparse/include/asm-xen/irq.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,65 +0,0 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H - -/* - * linux/include/asm/irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * IRQ/IPI changes taken from work by Thomas Radke - * <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx> - */ - -#include <linux/config.h> -#include <asm/hypervisor.h> -#include <asm/ptrace.h> - -/* - * The flat IRQ space is divided into two regions: - * 1. A one-to-one mapping of real physical IRQs. This space is only used - * if we have physical device-access privilege. This region is at the - * start of the IRQ space so that existing device drivers do not need - * to be modified to translate physical IRQ numbers into our IRQ space. - * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These - * are bound using the provided bind/unbind functions. - */ - -#define PIRQ_BASE 0 -#define NR_PIRQS 128 - -#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) -#define NR_DYNIRQS 128 - -#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) - -#define pirq_to_irq(_x) ((_x) + PIRQ_BASE) -#define irq_to_pirq(_x) ((_x) - PIRQ_BASE) - -#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE) -#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE) - -/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */ -extern int bind_virq_to_irq(int virq); -extern void unbind_virq_from_irq(int virq); -extern int bind_evtchn_to_irq(int evtchn); -extern void unbind_evtchn_from_irq(int evtchn); - -static __inline__ int irq_cannonicalize(int irq) -{ - return (irq == 2) ? 9 : irq; -} - -extern void disable_irq(unsigned int); -extern void disable_irq_nosync(unsigned int); -extern void enable_irq(unsigned int); - -extern void irq_suspend(void); -extern void irq_resume(void); - - -#define CPU_MASK_NONE 0 - -/* XXX SMH: no-op for compat w/ 2.6 shared files */ -#define irq_ctx_init(cpu) do { ; } while (0) - -#endif /* _ASM_IRQ_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/keyboard.h --- a/linux-2.4-xen-sparse/include/asm-xen/keyboard.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,74 +0,0 @@ -/* - * linux/include/asm-i386/keyboard.h - * - * Created 3 Nov 1996 by Geert Uytterhoeven - */ - -/* - * This file contains the i386 architecture specific keyboard definitions - */ - -#ifndef _I386_KEYBOARD_H -#define _I386_KEYBOARD_H - -#ifdef __KERNEL__ - -#include <linux/kernel.h> -#include <linux/ioport.h> -#include <linux/kd.h> -#include <linux/pm.h> -#include <asm/io.h> - -#define KEYBOARD_IRQ 1 -#define DISABLE_KBD_DURING_INTERRUPTS 0 - -extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode); -extern int pckbd_getkeycode(unsigned int scancode); -extern int pckbd_translate(unsigned char scancode, unsigned char *keycode, - char raw_mode); -extern char pckbd_unexpected_up(unsigned char keycode); -extern void pckbd_leds(unsigned char leds); -extern void pckbd_init_hw(void); -extern int pckbd_pm_resume(struct pm_dev *, pm_request_t, void *); -extern pm_callback pm_kbd_request_override; -extern unsigned char pckbd_sysrq_xlate[128]; - -#define kbd_setkeycode pckbd_setkeycode -#define kbd_getkeycode pckbd_getkeycode -#define kbd_translate pckbd_translate -#define kbd_unexpected_up pckbd_unexpected_up -#define kbd_leds pckbd_leds -#define kbd_init_hw pckbd_init_hw -#define kbd_sysrq_xlate pckbd_sysrq_xlate - -#define SYSRQ_KEY 0x54 - -#define kbd_controller_present() (xen_start_info.flags & SIF_INITDOMAIN) - -/* resource allocation */ -#define kbd_request_region() -#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \ - "keyboard", NULL) - -/* How to access the keyboard macros on this platform. */ -#define kbd_read_input() inb(KBD_DATA_REG) -#define kbd_read_status() inb(KBD_STATUS_REG) -#define kbd_write_output(val) outb(val, KBD_DATA_REG) -#define kbd_write_command(val) outb(val, KBD_CNTL_REG) - -/* Some stoneage hardware needs delays after some operations. */ -#define kbd_pause() do { } while(0) - -/* - * Machine specific bits for the PS/2 driver - */ - -#define AUX_IRQ 12 - -#define aux_request_irq(hand, dev_id) \ - request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id) - -#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id) - -#endif /* __KERNEL__ */ -#endif /* _I386_KEYBOARD_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/mmu_context.h --- a/linux-2.4-xen-sparse/include/asm-xen/mmu_context.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,59 +0,0 @@ -#ifndef __I386_MMU_CONTEXT_H -#define __I386_MMU_CONTEXT_H - -#include <linux/config.h> -#include <asm/desc.h> -#include <asm/atomic.h> -#include <asm/pgalloc.h> - -/* - * hooks to add arch specific data into the mm struct. - * Note that destroy_context is called even if init_new_context - * fails. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void destroy_context(struct mm_struct *mm); - -#ifdef CONFIG_SMP - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) -{ - if(cpu_tlbstate[cpu].state == TLBSTATE_OK) - cpu_tlbstate[cpu].state = TLBSTATE_LAZY; -} -#else -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) -{ -} -#endif - -extern pgd_t *cur_pgd; - -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) -{ - struct mmuext_op _op[2], *op = _op; - if (prev != next) { - /* stop flush ipis for the previous mm */ - clear_bit(cpu, &prev->cpu_vm_mask); - /* Re-load page tables */ - cur_pgd = next->pgd; - op->cmd = MMUEXT_NEW_BASEPTR; - op->mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT); - op++; - /* load_LDT, if either the previous or next thread - * has a non-default LDT. - */ - if (next->context.size+prev->context.size) { - op->cmd = MMUEXT_SET_LDT; - op->linear_addr = (unsigned long)next->context.ldt; - op->nr_ents = next->context.size; - op++; - } - BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF)); - } -} - -#define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL,smp_processor_id()) - -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/module.h --- a/linux-2.4-xen-sparse/include/asm-xen/module.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,14 +0,0 @@ -#ifndef _ASM_I386_MODULE_H -#define _ASM_I386_MODULE_H -/* - * This file contains the i386 architecture specific module code. - */ - -extern int xen_module_init(struct module *mod); - -#define module_map(x) vmalloc(x) -#define module_unmap(x) vfree(x) -#define module_arch_init(x) xen_module_init(x) -#define arch_init_modules(x) do { } while (0) - -#endif /* _ASM_I386_MODULE_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/page.h --- a/linux-2.4-xen-sparse/include/asm-xen/page.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,178 +0,0 @@ -#ifndef _I386_PAGE_H -#define _I386_PAGE_H - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ - -#include <linux/config.h> -#include <linux/string.h> -#include <asm/types.h> -#include <asm-xen/xen-public/xen.h> - -#ifdef CONFIG_XEN_SCRUB_PAGES -#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT) -#else -#define scrub_pages(_p,_n) ((void)0) -#endif - -#ifdef CONFIG_X86_USE_3DNOW - -#include <asm/mmx.h> - -#define clear_page(page) mmx_clear_page((void *)(page)) -#define copy_page(to,from) mmx_copy_page(to,from) - -#else - -/* - * On older X86 processors its not a win to use MMX here it seems. - * Maybe the K6-III ? - */ - -#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) -#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) - -#endif - -#define clear_user_page(page, vaddr) clear_page(page) -#define copy_user_page(to, from, vaddr) copy_page(to, from) - -/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ -extern unsigned int *phys_to_machine_mapping; -#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)])) -#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)])) -static inline unsigned long phys_to_machine(unsigned long phys) -{ - unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT); - machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); - return machine; -} -static inline unsigned long machine_to_phys(unsigned long machine) -{ - unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT); - phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); - return phys; -} - -/* - * These are used to make use of C type-checking.. - */ -#if CONFIG_X86_PAE -typedef struct { unsigned long pte_low, pte_high; } pte_t; -typedef struct { unsigned long long pmd; } pmd_t; -typedef struct { unsigned long long pgd; } pgd_t; -#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) -#else -typedef struct { unsigned long pte_low; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; -typedef struct { unsigned long pgd; } pgd_t; -static inline unsigned long pte_val(pte_t x) -{ - unsigned long ret = x.pte_low; - if ( (ret & 1) ) ret = machine_to_phys(ret); - return ret; -} -#define pte_val_ma(x) ((x).pte_low) -#endif -#define PTE_MASK PAGE_MASK - -typedef struct { unsigned long pgprot; } pgprot_t; - -static inline unsigned long pmd_val(pmd_t x) -{ - unsigned long ret = x.pmd; - if ( ret ) ret = machine_to_phys(ret) | 1; - return ret; -} -#define pmd_val_ma(x) ((x).pmd) -#define pgd_val(x) ({ BUG(); (unsigned long)0; }) -#define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); }) -#define __pte_ma(x) ((pte_t) { (x) } ) -#define __pmd(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); }) -#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; }) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#endif /* !__ASSEMBLY__ */ - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) - -/* - * This handles the memory map.. We could make this a config - * option, but too many people screw it up, and too few need - * it. - * - * A __PAGE_OFFSET of 0xC0000000 means that the kernel has - * a virtual address space of one gigabyte, which limits the - * amount of physical memory you can use to about 950MB. - * - * If you want more physical memory than this then see the CONFIG_HIGHMEM4G - * and CONFIG_HIGHMEM64G options in the kernel configuration. - */ - -#define __PAGE_OFFSET (0xC0000000) - -#ifndef __ASSEMBLY__ - -/* - * Tell the user there is some problem. Beep too, so we can - * see^H^H^Hhear bugs in early bootup as well! - * The offending file and line are encoded after the "officially - * undefined" opcode for parsing in the trap handler. - */ - -#if 1 /* Set to zero for a slightly smaller kernel */ -#define BUG() \ - __asm__ __volatile__( "ud2\n" \ - "\t.word %c0\n" \ - "\t.long %c1\n" \ - : : "i" (__LINE__), "i" (__FILE__)) -#else -#define BUG() __asm__ __volatile__("ud2\n") -#endif - -#define PAGE_BUG(page) do { \ - BUG(); \ -} while (0) - -/* Pure 2^n version of get_order */ -static __inline__ int get_order(unsigned long size) -{ - int order; - - size = (size-1) >> (PAGE_SHIFT-1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} - -#endif /* __ASSEMBLY__ */ - -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) -#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) -#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) - -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -/* VIRT <-> MACHINE conversion */ -#define virt_to_machine(_a) (phys_to_machine(__pa(_a))) -#define machine_to_virt(_m) (__va(machine_to_phys(_m))) - -#endif /* __KERNEL__ */ - -#endif /* _I386_PAGE_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/pci.h --- a/linux-2.4-xen-sparse/include/asm-xen/pci.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,283 +0,0 @@ -#ifndef __i386_PCI_H -#define __i386_PCI_H - -#include <linux/config.h> - -#ifdef __KERNEL__ - -/* Can be used to override the logic in pci_scan_bus for skipping - already-configured bus numbers - to be used for buggy BIOSes - or architectures with incomplete PCI setup by the loader */ - -#ifdef CONFIG_PCI -extern unsigned int pcibios_assign_all_busses(void); -#else -#define pcibios_assign_all_busses() 0 -#endif -#define pcibios_scan_all_fns() 0 - -extern unsigned long pci_mem_start; -#define PCIBIOS_MIN_IO 0x1000 -#define PCIBIOS_MIN_MEM (pci_mem_start) - -void pcibios_config_init(void); -struct pci_bus * pcibios_scan_root(int bus); -extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value); -extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); - -void pcibios_set_master(struct pci_dev *dev); -void pcibios_penalize_isa_irq(int irq); -struct irq_routing_table *pcibios_get_irq_routing_table(void); -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); - -/* Dynamic DMA mapping stuff. - * i386 has everything mapped statically. - */ - -#include <linux/types.h> -#include <linux/slab.h> -#include <asm/scatterlist.h> -#include <linux/string.h> -#include <asm/io.h> - -struct pci_dev; - -/* The networking and block device layers use this boolean for bounce - * buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - -/* Allocate and map kernel buffer using consistent mode DMA for a device. - * hwdev should be valid struct pci_dev pointer for PCI devices, - * NULL for PCI-like buses (ISA, EISA). - * Returns non-NULL cpu-view pointer to the buffer if successful and - * sets *dma_addrp to the pci side dma address as well, else *dma_addrp - * is undefined. - */ -extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle); - -/* Free and unmap a consistent DMA buffer. - * cpu_addr is what was returned from pci_alloc_consistent, - * size must be the same as what as passed into pci_alloc_consistent, - * and likewise dma_addr must be the same as what *dma_addrp was set to. - * - * References to the memory and mappings associated with cpu_addr/dma_addr - * past this call are illegal. - */ -extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); - -/* Map a single buffer of the indicated size for DMA in streaming mode. - * The 32-bit bus address to use is returned. - * - * Once the device is given the dma address, the device owns this memory - * until either pci_unmap_single or pci_dma_sync_single is performed. - */ -static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); - return virt_to_bus(ptr); -} - -/* Unmap a single streaming mode DMA translation. The dma_addr and size - * must match what was provided for in a previous pci_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guarenteed to see - * whatever the device wrote there. - */ -static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -/* - * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical - * to pci_map_single, but takes a struct page instead of a virtual address - */ -static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - - return page_to_bus(page) + offset; -} - -static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -/* pci_unmap_{page,single} is a nop so... */ -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scather-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - int i; - - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - - /* - * temporary 2.4 hack - */ - for (i = 0; i < nents; i++ ) { - if (sg[i].address && sg[i].page) - out_of_line_bug(); - else if (!sg[i].address && !sg[i].page) - out_of_line_bug(); - - if (sg[i].address) - sg[i].dma_address = virt_to_bus(sg[i].address); - else - sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; - } - - flush_write_buffers(); - return nents; -} - -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -/* Make physical memory consistent for a single - * streaming mode DMA translation after a transfer. - * - * If you perform a pci_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, the - * device again owns the buffer. - */ -static inline void pci_dma_sync_single(struct pci_dev *hwdev, - dma_addr_t dma_handle, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); -} - -/* Make physical memory consistent for a set of streaming - * mode DMA translations after a transfer. - * - * The same as pci_dma_sync_single but for a scatter-gather list, - * same rules and usage. - */ -static inline void pci_dma_sync_sg(struct pci_dev *hwdev, - struct scatterlist *sg, - int nelems, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); -} - -/* Return whether the given PCI device DMA address mask can - * be supported properly. For example, if your device can - * only drive the low 24-bits during PCI bus mastering, then - * you would pass 0x00ffffff as the mask to this function. - */ -static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if(mask < 0x00ffffff) - return 0; - - return 1; -} - -/* This is always fine. */ -#define pci_dac_dma_supported(pci_dev, mask) (1) - -static __inline__ dma64_addr_t -pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction) -{ - return ((dma64_addr_t) page_to_bus(page) + - (dma64_addr_t) offset); -} - -static __inline__ struct page * -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return bus_to_page(dma_addr); -} - -static __inline__ unsigned long -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return (dma_addr & ~PAGE_MASK); -} - -static __inline__ void -pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) -{ - flush_write_buffers(); -} - -/* These macros should be used after a pci_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->length) - -/* Return the index of the PCI controller for device. */ -static inline int pci_controller_num(struct pci_dev *dev) -{ - return 0; -} - -#define HAVE_PCI_MMAP -extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine); - -#endif /* __KERNEL__ */ - -#endif /* __i386_PCI_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/pgalloc.h --- a/linux-2.4-xen-sparse/include/asm-xen/pgalloc.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,280 +0,0 @@ -#ifndef _I386_PGALLOC_H -#define _I386_PGALLOC_H - -#include <linux/config.h> -#include <asm/processor.h> -#include <asm/fixmap.h> -#include <asm/hypervisor.h> -#include <linux/threads.h> - -/* - * Quick lists are aligned so that least significant bits of array pointer - * are all zero when list is empty, and all one when list is full. - */ -#define QUICKLIST_ENTRIES 256 -#define QUICKLIST_EMPTY(_l) !((unsigned long)(_l) & ((QUICKLIST_ENTRIES*4)-1)) -#define QUICKLIST_FULL(_l) QUICKLIST_EMPTY((_l)+1) -#define pgd_quicklist (current_cpu_data.pgd_quick) -#define pmd_quicklist (current_cpu_data.pmd_quick) -#define pte_quicklist (current_cpu_data.pte_quick) -#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) - -#define pmd_populate(mm, pmd, pte) \ - do { \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ - } while ( 0 ) - -/* - * Allocate and free page tables. - */ - -#if defined (CONFIG_X86_PAE) - -#error "no PAE support as yet" - -/* - * We can't include <linux/slab.h> here, thus these uglinesses. - */ -struct kmem_cache_s; - -extern struct kmem_cache_s *pae_pgd_cachep; -extern void *kmem_cache_alloc(struct kmem_cache_s *, int); -extern void kmem_cache_free(struct kmem_cache_s *, void *); - - -static inline pgd_t *get_pgd_slow(void) -{ - int i; - pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); - - if (pgd) { - for (i = 0; i < USER_PTRS_PER_PGD; i++) { - unsigned long pmd = __get_free_page(GFP_KERNEL); - if (!pmd) - goto out_oom; - clear_page(pmd); - set_pgd(pgd + i, __pgd(1 + __pa(pmd))); - } - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - } - return pgd; -out_oom: - for (i--; i >= 0; i--) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kmem_cache_free(pae_pgd_cachep, pgd); - return NULL; -} - -#else - -static inline pgd_t *get_pgd_slow(void) -{ - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); - - if (pgd) { - memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - __make_page_readonly(pgd); - xen_pgd_pin(__pa(pgd)); - } - return pgd; -} - -#endif /* CONFIG_X86_PAE */ - -static inline pgd_t *get_pgd_fast(void) -{ - unsigned long ret; - - if ( !QUICKLIST_EMPTY(pgd_quicklist) ) { - ret = *(--pgd_quicklist); - pgtable_cache_size--; - - } else - ret = (unsigned long)get_pgd_slow(); - return (pgd_t *)ret; -} - -static inline void free_pgd_slow(pgd_t *pgd) -{ -#if defined(CONFIG_X86_PAE) -#error - int i; - - for (i = 0; i < USER_PTRS_PER_PGD; i++) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kmem_cache_free(pae_pgd_cachep, pgd); -#else - xen_pgd_unpin(__pa(pgd)); - __make_page_writable(pgd); - free_page((unsigned long)pgd); -#endif -} - -static inline void free_pgd_fast(pgd_t *pgd) -{ - if ( !QUICKLIST_FULL(pgd_quicklist) ) { - *(pgd_quicklist++) = (unsigned long)pgd; - pgtable_cache_size++; - } else - free_pgd_slow(pgd); -} - -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte; - - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pte) - { - clear_page(pte); - __make_page_readonly(pte); - xen_pte_pin(__pa(pte)); - } - return pte; - -} - -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, - unsigned long address) -{ - unsigned long ret = 0; - if ( !QUICKLIST_EMPTY(pte_quicklist) ) { - ret = *(--pte_quicklist); - pgtable_cache_size--; - } - return (pte_t *)ret; -} - -static __inline__ void pte_free_slow(pte_t *pte) -{ - xen_pte_unpin(__pa(pte)); - __make_page_writable(pte); - free_page((unsigned long)pte); -} - -static inline void pte_free_fast(pte_t *pte) -{ - if ( !QUICKLIST_FULL(pte_quicklist) ) { - *(pte_quicklist++) = (unsigned long)pte; - pgtable_cache_size++; - } else - pte_free_slow(pte); -} - -#define pte_free(pte) pte_free_fast(pte) -#define pgd_free(pgd) free_pgd_fast(pgd) -#define pgd_alloc(mm) get_pgd_fast() - -/* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - * (In the PAE case we free the pmds as part of the pgd.) - */ - -#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free_slow(x) do { } while (0) -#define pmd_free_fast(x) do { } while (0) -#define pmd_free(x) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() - -extern int do_check_pgt_cache(int, int); - -/* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(mm, start, end) flushes a range of pages - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables - * - * ..but the i386 has somewhat limited tlb flushing capabilities, - * and page-granular flushes are available only on i486 and up. - */ - -#ifndef CONFIG_SMP - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() __flush_tlb_all() -#define local_flush_tlb() __flush_tlb() - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) xen_tlb_flush(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) xen_invlpg(addr); -} - -static inline void flush_tlb_range(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - if (mm == current->active_mm) xen_tlb_flush(); -} - -#else -#error no kernel SMP support yet... -#include <asm/smp.h> - -#define local_flush_tlb() \ - __flush_tlb() - -extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); -extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - -#define flush_tlb() flush_tlb_current_task() - -static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end) -{ - flush_tlb_mm(mm); -} - -#define TLBSTATE_OK 1 -#define TLBSTATE_LAZY 2 - -struct tlb_state -{ - struct mm_struct *active_mm; - int state; -} ____cacheline_aligned; -extern struct tlb_state cpu_tlbstate[NR_CPUS]; - -#endif /* CONFIG_SMP */ - -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* i386 does not keep any page table caches in TLB */ -} - -/* - * NB. The 'domid' field should be zero if mapping I/O space (non RAM). - * Otherwise it identifies the owner of the memory that is being mapped. - */ -extern int direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long machine_addr, - unsigned long size, - pgprot_t prot, - domid_t domid); - -extern int __direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long size, - mmu_update_t *v); - - - -#endif /* _I386_PGALLOC_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/pgtable-2level.h --- a/linux-2.4-xen-sparse/include/asm-xen/pgtable-2level.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,97 +0,0 @@ -#ifndef _I386_PGTABLE_2LEVEL_H -#define _I386_PGTABLE_2LEVEL_H - -/* - * traditional i386 two-level paging structure: - */ - -#define PGDIR_SHIFT 22 -#define PTRS_PER_PGD 1024 - -/* - * the i386 is two-level, so we don't really have any - * PMD directory physically. - */ -#define PMD_SHIFT 22 -#define PTRS_PER_PMD 1 - -#define PTRS_PER_PTE 1024 - -#define pte_ERROR(e) \ - printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) -#define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) -#define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) - -/* - * The "pgd_xxx()" functions here are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) - */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -#define pgd_clear(xp) do { } while (0) - -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -#define set_pte(pteptr, pteval) (*(pteptr) = pteval) -#define set_pte_atomic(pteptr, pteval) (*(pteptr) = pteval) - -/* - * (pmds are folded into pgds so this doesnt get actually called, - * but the define is needed for a generic inline function.) - */ -#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval)) -#define set_pgd(pgdptr, pgdval) ((void)0) - -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) - -static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) -{ - return (pmd_t *) dir; -} - -#define ptep_get_and_clear(xp) __pte_ma(xchg(&(xp)->pte_low, 0)) -#define pte_same(a, b) ((a).pte_low == (b).pte_low) - -/* - * We detect special mappings in one of two ways: - * 1. If the MFN is an I/O page then Xen will set the m2p entry - * to be outside our maximum possible pseudophys range. - * 2. If the MFN belongs to a different domain then we will certainly - * not have MFN in our p2m table. Conversely, if the page is ours, - * then we'll have p2m(m2p(MFN))==MFN. - * If we detect a special mapping then it doesn't have a 'struct page'. - * We force !VALID_PAGE() by returning an out-of-range pointer. - * - * NB. These checks require that, for any MFN that is not in our reservation, - * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if - * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. - * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. - * - * NB2. When deliberately mapping foreign pages into the p2m table, you *must* - * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we - * require. In all the cases we care about, the high bit gets shifted out - * (e.g., phys_to_machine()) so behaviour there is correct. - */ -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) -#define pte_page(_pte) \ -({ \ - unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \ - unsigned long pfn = mfn_to_pfn(mfn); \ - if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) ) \ - pfn = max_mapnr; /* specia: force !VALID_PAGE() */ \ - &mem_map[pfn]; \ -}) - -#define pte_none(x) (!(x).pte_low) -#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) - -#endif /* _I386_PGTABLE_2LEVEL_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/pgtable.h --- a/linux-2.4-xen-sparse/include/asm-xen/pgtable.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,371 +0,0 @@ -#ifndef _I386_PGTABLE_H -#define _I386_PGTABLE_H - -#include <linux/config.h> - -/* - * The Linux memory management assumes a three-level page table setup. On - * the i386, we use that, but "fold" the mid level into the top-level page - * table, so that we physically have the same two-level page table as the - * i386 mmu expects. - * - * This file contains the functions and defines necessary to modify and use - * the i386 page table tree. - */ -#ifndef __ASSEMBLY__ -#include <asm/processor.h> -#include <asm/hypervisor.h> -#include <linux/threads.h> -#include <asm/fixmap.h> - -#ifndef _I386_BITOPS_H -#include <asm/bitops.h> -#endif - -#define swapper_pg_dir 0 -extern void paging_init(void); - -/* Caches aren't brain-dead on the intel. */ -#define flush_cache_all() do { } while (0) -#define flush_cache_mm(mm) do { } while (0) -#define flush_cache_range(mm, start, end) do { } while (0) -#define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) -#define flush_dcache_page(page) do { } while (0) -#define flush_icache_range(start, end) do { } while (0) -#define flush_icache_page(vma,pg) do { } while (0) -#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) - -extern unsigned long pgkern_mask; - -#define __flush_tlb() xen_tlb_flush() -#define __flush_tlb_global() __flush_tlb() -#define __flush_tlb_all() __flush_tlb_global() -#define __flush_tlb_one(addr) xen_invlpg(addr) -#define __flush_tlb_single(addr) xen_invlpg(addr) - -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[1024]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) - -#endif /* !__ASSEMBLY__ */ - -/* - * The Linux x86 paging architecture is 'compile-time dual-mode', it - * implements both the traditional 2-level x86 page tables and the - * newer 3-level PAE-mode page tables. - */ -#ifndef __ASSEMBLY__ -#if CONFIG_X86_PAE -# include <asm/pgtable-3level.h> - -/* - * Need to initialise the X86 PAE caches - */ -extern void pgtable_cache_init(void); - -#else -# include <asm/pgtable-2level.h> - -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - -#endif -#endif - -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_PGD_NR 0 - -#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) -#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) - -#define TWOLEVEL_PGDIR_SHIFT 22 -#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) -#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) - - -#ifndef __ASSEMBLY__ -/* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */ -#define VMALLOC_OFFSET (4*1024*1024) -extern void * high_memory; -#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ - ~(VMALLOC_OFFSET-1)) -#define VMALLOC_VMADDR(x) ((unsigned long)(x)) -#if CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) -#else -# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) -#endif - -#define _PAGE_BIT_PRESENT 0 -#define _PAGE_BIT_RW 1 -#define _PAGE_BIT_USER 2 -#define _PAGE_BIT_PWT 3 -#define _PAGE_BIT_PCD 4 -#define _PAGE_BIT_ACCESSED 5 -#define _PAGE_BIT_DIRTY 6 -#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ -#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ - -#define _PAGE_PRESENT 0x001 -#define _PAGE_RW 0x002 -#define _PAGE_USER 0x004 -#define _PAGE_PWT 0x008 -#define _PAGE_PCD 0x010 -#define _PAGE_ACCESSED 0x020 -#define _PAGE_DIRTY 0x040 -#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ -#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ - -#define _PAGE_PROTNONE 0x080 /* If not present */ - -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) - -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) - -#define __PAGE_KERNEL \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -#define __PAGE_KERNEL_NOCACHE \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) -#define __PAGE_KERNEL_RO \ - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) - -#if 0 -#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) -#else -#define MAKE_GLOBAL(x) __pgprot(x) -#endif - -#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) -#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) -#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) - -/* - * The i386 can't do page protection for execute, and considers that - * the same are read. Also, write permissions imply read permissions. - * This is the closest we can get.. - */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY -#define __P101 PAGE_READONLY -#define __P110 PAGE_COPY -#define __P111 PAGE_COPY - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY -#define __S101 PAGE_READONLY -#define __S110 PAGE_SHARED -#define __S111 PAGE_SHARED - -#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) -#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0) - -#define pmd_none(x) (!pmd_val(x)) -/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. - can temporarily clear it. */ -#define pmd_present(x) (pmd_val(x)) -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) - - -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) - -/* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } -static inline int pte_exec(pte_t pte) { return (pte).pte_low & _PAGE_USER; } -static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } -static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } - -static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } -static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } -static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } -static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } -static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } -static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } -static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } -static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } -static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } -static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } - -static inline int ptep_test_and_clear_dirty(pte_t *ptep) -{ - if (!pte_dirty(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); -} - -static inline int ptep_test_and_clear_young(pte_t *ptep) -{ - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); -} - -static inline void ptep_set_wrprotect(pte_t *ptep) -{ - if (pte_write(*ptep)) - clear_bit(_PAGE_BIT_RW, &ptep->pte_low); -} - -static inline void ptep_mkdirty(pte_t *ptep) -{ - if (!pte_dirty(*ptep)) - set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); -} - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ - -#define mk_pte(page, pgprot) __mk_pte((page) - mem_map, (pgprot)) - -/* This takes a physical page address that is used by the remapping functions */ -#define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot) - -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -{ - pte.pte_low &= _PAGE_CHG_MASK; - pte.pte_low |= pgprot_val(newprot); - return pte; -} - -#define page_pte(page) page_pte_prot(page, __pgprot(0)) - -#define pmd_page(pmd) \ -((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) - -/* to find an entry in a page-table-directory. */ -#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) - -#define __pgd_offset(address) pgd_index(address) - -#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) - -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) - -#define __pmd_offset(address) \ - (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) - -/* Find an entry in the third-level page table.. */ -#define __pte_offset(address) \ - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ - __pte_offset(address)) - -/* - * The i386 doesn't have any external MMU info: the kernel page - * tables contain all the necessary information. - */ -#define update_mmu_cache(vma,address,pte) do { } while (0) - -/* Encode and de-code a swap entry */ -#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) -#define SWP_OFFSET(x) ((x).val >> 8) -#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) -#define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) -#define swp_entry_to_pte(x) ((pte_t) { (x).val }) - -struct page; -int change_page_attr(struct page *, int, pgprot_t prot); - -static inline void __make_page_readonly(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - set_pte(pte, pte_wrprotect(*pte)); -} - -static inline void __make_page_writable(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - set_pte(pte, pte_mkwrite(*pte)); -} - -static inline void make_page_readonly(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - set_pte(pte, pte_wrprotect(*pte)); - if ( (unsigned long)va >= VMALLOC_START ) - __make_page_readonly(machine_to_virt( - *(unsigned long *)pte&PAGE_MASK)); -} - -static inline void make_page_writable(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - set_pte(pte, pte_mkwrite(*pte)); - if ( (unsigned long)va >= VMALLOC_START ) - __make_page_writable(machine_to_virt( - *(unsigned long *)pte&PAGE_MASK)); -} - -static inline void make_pages_readonly(void *va, unsigned int nr) -{ - while ( nr-- != 0 ) - { - make_page_readonly(va); - va = (void *)((unsigned long)va + PAGE_SIZE); - } -} - -static inline void make_pages_writable(void *va, unsigned int nr) -{ - while ( nr-- != 0 ) - { - make_page_writable(va); - va = (void *)((unsigned long)va + PAGE_SIZE); - } -} - -static inline unsigned long arbitrary_virt_to_machine(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK; - return pa | ((unsigned long)va & (PAGE_SIZE-1)); -} - -#endif /* !__ASSEMBLY__ */ - -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -#define PageSkip(page) (0) -#define kern_addr_valid(addr) (1) - -#define io_remap_page_range remap_page_range - -#endif /* _I386_PGTABLE_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/processor.h --- a/linux-2.4-xen-sparse/include/asm-xen/processor.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,483 +0,0 @@ -/* - * include/asm-i386/processor.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -#ifndef __ASM_I386_PROCESSOR_H -#define __ASM_I386_PROCESSOR_H - -#include <asm/vm86.h> -#include <asm/math_emu.h> -#include <asm/segment.h> -#include <asm/page.h> -#include <asm/types.h> -#include <asm/sigcontext.h> -#include <asm/cpufeature.h> -#include <linux/cache.h> -#include <linux/config.h> -#include <linux/threads.h> - -/* - * Default implementation of macro that returns current - * instruction pointer ("program counter"). - */ -#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) - -/* - * CPU type and hardware bug flags. Kept separately for each CPU. - * Members of this structure are referenced in head.S, so think twice - * before touching them. [mj] - */ - -struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; - __u8 x86_mask; - char wp_works_ok; /* It doesn't on 386's */ - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ - char hard_math; - char rfu; - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - __u32 x86_capability[NCAPINTS]; - char x86_vendor_id[16]; - char x86_model_id[64]; - int x86_cache_size; /* in KB - valid for CPUS which support this - call */ - int fdiv_bug; - int f00f_bug; - int coma_bug; - unsigned long loops_per_jiffy; - unsigned long *pgd_quick; - unsigned long *pmd_quick; - unsigned long *pte_quick; - unsigned long pgtable_cache_sz; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); - -#define X86_VENDOR_INTEL 0 -#define X86_VENDOR_CYRIX 1 -#define X86_VENDOR_AMD 2 -#define X86_VENDOR_UMC 3 -#define X86_VENDOR_NEXGEN 4 -#define X86_VENDOR_CENTAUR 5 -#define X86_VENDOR_RISE 6 -#define X86_VENDOR_TRANSMETA 7 -#define X86_VENDOR_NSC 8 -#define X86_VENDOR_SIS 9 -#define X86_VENDOR_UNKNOWN 0xff - -/* - * capabilities of CPUs - */ - -extern struct cpuinfo_x86 boot_cpu_data; -extern struct tss_struct init_tss[NR_CPUS]; - -#ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] -#else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data -#endif - -extern char ignore_irq13; - -extern void identify_cpu(struct cpuinfo_x86 *); -extern void print_cpu_info(struct cpuinfo_x86 *); - -/* - * EFLAGS bits - */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - -/* - * Generic CPUID function - */ -static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) -{ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op)); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax; - - __asm__("cpuid" - : "=a" (eax) - : "0" (op) - : "bx", "cx", "dx"); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx; - - __asm__("cpuid" - : "=a" (eax), "=b" (ebx) - : "0" (op) - : "cx", "dx" ); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ecx; - - __asm__("cpuid" - : "=a" (eax), "=c" (ecx) - : "0" (op) - : "bx", "dx" ); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, edx; - - __asm__("cpuid" - : "=a" (eax), "=d" (edx) - : "0" (op) - : "bx", "cx"); - return edx; -} - -/* - * Intel CPU features in CR4 - */ -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x0040 /* Machine check enable */ -#define X86_CR4_PGE 0x0080 /* enable global pages */ -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ - -#define load_cr3(pgdir) \ - asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir))); - -extern unsigned long mmu_cr4_features; - -#include <asm/hypervisor.h> - -static inline void set_in_cr4 (unsigned long mask) -{ - BUG(); -} - -static inline void clear_in_cr4 (unsigned long mask) -{ - BUG(); -} - -/* - * Cyrix CPU configuration register indexes - */ -#define CX86_CCR0 0xc0 -#define CX86_CCR1 0xc1 -#define CX86_CCR2 0xc2 -#define CX86_CCR3 0xc3 -#define CX86_CCR4 0xe8 -#define CX86_CCR5 0xe9 -#define CX86_CCR6 0xea -#define CX86_CCR7 0xeb -#define CX86_DIR0 0xfe -#define CX86_DIR1 0xff -#define CX86_ARR_BASE 0xc4 -#define CX86_RCR_BASE 0xdc - -/* - * Cyrix CPU indexed register access macros - */ - -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - -/* - * Bus types (default is ISA, but people can check others with these..) - */ -#ifdef CONFIG_EISA -extern int EISA_bus; -#else -#define EISA_bus (0) -#endif -extern int MCA_bus; - -/* from system description table in BIOS. Mostly for MCA use, but -others may find it useful. */ -extern unsigned int machine_id; -extern unsigned int machine_submodel_id; -extern unsigned int BIOS_revision; -extern unsigned int mca_pentium_flag; - -/* - * User space process size: 3GB (default). - */ -#define TASK_SIZE (PAGE_OFFSET) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) - -/* - * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. - */ -#define IO_BITMAP_SIZE 32 -#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4) -#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) -#define INVALID_IO_BITMAP_OFFSET 0x8000 - -struct i387_fsave_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - long status; /* software status information */ -}; - -struct i387_fxsave_struct { - unsigned short cwd; - unsigned short swd; - unsigned short twd; - unsigned short fop; - long fip; - long fcs; - long foo; - long fos; - long mxcsr; - long reserved; - long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ - long padding[56]; -} __attribute__ ((aligned (16))); - -struct i387_soft_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - unsigned char ftop, changed, lookahead, no_update, rm, alimit; - struct info *info; - unsigned long entry_eip; -}; - -union i387_union { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; - struct i387_soft_struct soft; -}; - -typedef struct { - unsigned long seg; -} mm_segment_t; - -struct tss_struct { - unsigned short back_link,__blh; - unsigned long esp0; - unsigned short ss0,__ss0h; - unsigned long esp1; - unsigned short ss1,__ss1h; - unsigned long esp2; - unsigned short ss2,__ss2h; - unsigned long __cr3; - unsigned long eip; - unsigned long eflags; - unsigned long eax,ecx,edx,ebx; - unsigned long esp; - unsigned long ebp; - unsigned long esi; - unsigned long edi; - unsigned short es, __esh; - unsigned short cs, __csh; - unsigned short ss, __ssh; - unsigned short ds, __dsh; - unsigned short fs, __fsh; - unsigned short gs, __gsh; - unsigned short ldt, __ldth; - unsigned short trace, bitmap; - unsigned long io_bitmap[IO_BITMAP_SIZE+1]; - /* - * pads the TSS to be cacheline-aligned (size is 0x100) - */ - unsigned long __cacheline_filler[5]; -}; - -struct thread_struct { - unsigned long esp0; - unsigned long eip; - unsigned long esp; - unsigned long fs; - unsigned long gs; - unsigned int io_pl; -/* Hardware debugging registers */ - unsigned long debugreg[8]; /* %%db0-7 debug registers */ -/* fault info */ - unsigned long cr2, trap_no, error_code; -/* floating point info */ - union i387_union i387; -/* virtual 86 mode info */ - struct vm86_struct * vm86_info; - unsigned long screen_bitmap; - unsigned long v86flags, v86mask, saved_esp0; -}; - -#define INIT_THREAD { sizeof(init_stack) + (long) &init_stack, \ - 0, 0, 0, 0, 0, 0, {0}, 0, 0, 0, {{0}}, 0, 0, 0, 0, 0 } - -#define INIT_TSS { \ - 0,0, /* back_link, __blh */ \ - sizeof(init_stack) + (long) &init_stack, /* esp0 */ \ - __KERNEL_DS, 0, /* ss0 */ \ - 0,0,0,0,0,0, /* stack1, stack2 */ \ - 0, /* cr3 */ \ - 0,0, /* eip,eflags */ \ - 0,0,0,0, /* eax,ecx,edx,ebx */ \ - 0,0,0,0, /* esp,ebp,esi,edi */ \ - 0,0,0,0,0,0, /* es,cs,ss */ \ - 0,0,0,0,0,0, /* ds,fs,gs */ \ - 0,0, /* ldt */ \ - 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ - {~0, } /* ioperm */ \ -} - -#define start_thread(regs, new_eip, new_esp) do { \ - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ - set_fs(USER_DS); \ - regs->xds = __USER_DS; \ - regs->xes = __USER_DS; \ - regs->xss = __USER_DS; \ - regs->xcs = __USER_CS; \ - regs->eip = new_eip; \ - regs->esp = new_esp; \ -} while (0) - -/* Forward declaration, a strange C thing */ -struct task_struct; -struct mm_struct; - -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); -/* - * create a kernel thread without removing it from tasklists - */ -extern int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); - -/* Copy and release all segment info associated with a VM - * Unusable due to lack of error handling, use {init_new,destroy}_context - * instead. - */ -static inline void copy_segments(struct task_struct *p, struct mm_struct * mm) { } -static inline void release_segments(struct mm_struct * mm) { } - -/* - * Return saved PC of a blocked thread. - */ -static inline unsigned long thread_saved_pc(struct thread_struct *t) -{ - return ((unsigned long *)t->esp)[3]; -} - -unsigned long get_wchan(struct task_struct *p); -#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) -#define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) - -#define THREAD_SIZE (2*PAGE_SIZE) -#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) -#define free_task_struct(p) free_pages((unsigned long) (p), 1) -#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) - -#define init_task (init_task_union.task) -#define init_stack (init_task_union.stack) - -struct microcode { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int reserved[5]; - unsigned int bits[500]; -}; - -/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ -#define MICROCODE_IOCFREE _IO('6',0) - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) -{ - __asm__ __volatile__("rep;nop" ::: "memory"); -} - -#define cpu_relax() rep_nop() - -/* Prefetch instructions for Pentium III and AMD Athlon */ -#if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4) - -#define ARCH_HAS_PREFETCH -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); -} - -#elif CONFIG_X86_USE_3DNOW - -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); -} - -extern inline void prefetchw(const void *x) -{ - __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); -} -#define spin_lock_prefetch(x) prefetchw(x) - -#endif - -#endif /* __ASM_I386_PROCESSOR_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/queues.h --- a/linux-2.4-xen-sparse/include/asm-xen/queues.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,20 +0,0 @@ - -/* Work-queue emulation over task queues. Pretty simple. */ - -#ifndef __QUEUES_H__ -#define __QUEUES_H__ - -#include <linux/version.h> -#include <linux/list.h> -#include <linux/tqueue.h> - -#define DECLARE_TQUEUE(_name, _fn, _arg) \ - struct tq_struct _name = { LIST_HEAD_INIT((_name).list), 0, _fn, _arg } -#define DECLARE_WORK(_name, _fn, _arg) DECLARE_TQUEUE(_name, _fn, _arg) - -#define work_struct tq_struct -#define INIT_WORK(_work, _fn, _arg) INIT_TQUEUE(_work, _fn, _arg) - -#define schedule_work(_w) schedule_task(_w) - -#endif /* __QUEUES_H__ */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/segment.h --- a/linux-2.4-xen-sparse/include/asm-xen/segment.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,15 +0,0 @@ -#ifndef _ASM_SEGMENT_H -#define _ASM_SEGMENT_H - -#ifndef __ASSEMBLY__ -#include <linux/types.h> -#endif -#include <asm-xen/xen-public/xen.h> - -#define __KERNEL_CS FLAT_RING1_CS -#define __KERNEL_DS FLAT_RING1_DS - -#define __USER_CS FLAT_RING3_CS -#define __USER_DS FLAT_RING3_DS - -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/smp.h --- a/linux-2.4-xen-sparse/include/asm-xen/smp.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,102 +0,0 @@ -#ifndef __ASM_SMP_H -#define __ASM_SMP_H - -/* - * We need the APIC definitions automatically as part of 'smp.h' - */ -#ifndef __ASSEMBLY__ -#include <linux/config.h> -#include <linux/threads.h> -#include <linux/ptrace.h> -#endif - -#ifdef CONFIG_X86_LOCAL_APIC -#ifndef __ASSEMBLY__ -#include <asm/bitops.h> -#include <asm/mpspec.h> -#ifdef CONFIG_X86_IO_APIC -#include <asm/io_apic.h> -#endif -#include <asm/apic.h> -#endif -#endif - -#ifdef CONFIG_SMP -#ifndef __ASSEMBLY__ - -/* - * Private routines/data - */ - -extern void smp_alloc_memory(void); -extern unsigned long phys_cpu_present_map; -extern unsigned long cpu_online_map; -extern volatile unsigned long smp_invalidate_needed; -extern int pic_mode; -extern int smp_num_siblings; -extern int cpu_sibling_map[]; - -extern void smp_flush_tlb(void); -extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); -extern void fastcall smp_send_reschedule(int cpu); -extern void smp_invalidate_rcv(void); /* Process an NMI */ -extern void (*mtrr_hook) (void); -extern void zap_low_mappings (void); - -/* - * On x86 all CPUs are mapped 1:1 to the APIC space. - * This simplifies scheduling and IPI sending and - * compresses data structures. - */ -static inline int cpu_logical_map(int cpu) -{ - return cpu; -} -static inline int cpu_number_map(int cpu) -{ - return cpu; -} - -/* - * Some lowlevel functions might want to know about - * the real APIC ID <-> CPU # mapping. - */ -#define MAX_APICID 256 -extern volatile int cpu_to_physical_apicid[NR_CPUS]; -extern volatile int physical_apicid_to_cpu[MAX_APICID]; -extern volatile int cpu_to_logical_apicid[NR_CPUS]; -extern volatile int logical_apicid_to_cpu[MAX_APICID]; - -/* - * General functions that each host system must provide. - */ - -extern void smp_boot_cpus(void); -extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ - -/* - * This function is needed by all SMP systems. It must _always_ be valid - * from the initial startup. We map APIC_BASE very early in page_setup(), - * so this is correct in the x86 case. - */ - -#define smp_processor_id() (current->processor) - -#endif /* !__ASSEMBLY__ */ - -#define NO_PROC_ID 0xFF /* No processor magic marker */ - -/* - * This magic constant controls our willingness to transfer - * a process across CPUs. Such a transfer incurs misses on the L1 - * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My - * gut feeling is this will vary by board in value. For a board - * with separate L2 cache it probably depends also on the RSS, and - * for a board with shared L2 cache it ought to decay fast as other - * processes are run. - */ - -#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ - -#endif -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/system.h --- a/linux-2.4-xen-sparse/include/asm-xen/system.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,424 +0,0 @@ -#ifndef __ASM_SYSTEM_H -#define __ASM_SYSTEM_H - -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/bitops.h> -#include <asm/synch_bitops.h> -#include <asm/segment.h> -#include <asm/hypervisor.h> -#include <asm/evtchn.h> - -#ifdef __KERNEL__ - -struct task_struct; -extern void FASTCALL(__switch_to(struct task_struct *prev, - struct task_struct *next)); - -#define prepare_to_switch() \ -do { \ - struct thread_struct *__t = &current->thread; \ - __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (*(int *)&__t->fs) ); \ - __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (*(int *)&__t->gs) ); \ -} while (0) -#define switch_to(prev,next,last) do { \ - asm volatile("pushl %%esi\n\t" \ - "pushl %%edi\n\t" \ - "pushl %%ebp\n\t" \ - "movl %%esp,%0\n\t" /* save ESP */ \ - "movl %3,%%esp\n\t" /* restore ESP */ \ - "movl $1f,%1\n\t" /* save EIP */ \ - "pushl %4\n\t" /* restore EIP */ \ - "jmp __switch_to\n" \ - "1:\t" \ - "popl %%ebp\n\t" \ - "popl %%edi\n\t" \ - "popl %%esi\n\t" \ - :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ - "=b" (last) \ - :"m" (next->thread.esp),"m" (next->thread.eip), \ - "a" (prev), "d" (next), \ - "b" (prev)); \ -} while (0) - -#define _set_base(addr,base) do { unsigned long __pr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %%dl,%2\n\t" \ - "movb %%dh,%3" \ - :"=&d" (__pr) \ - :"m" (*((addr)+2)), \ - "m" (*((addr)+4)), \ - "m" (*((addr)+7)), \ - "0" (base) \ - ); } while(0) - -#define _set_limit(addr,limit) do { unsigned long __lr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %2,%%dh\n\t" \ - "andb $0xf0,%%dh\n\t" \ - "orb %%dh,%%dl\n\t" \ - "movb %%dl,%2" \ - :"=&d" (__lr) \ - :"m" (*(addr)), \ - "m" (*((addr)+6)), \ - "0" (limit) \ - ); } while(0) - -#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) -#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 ) - -static inline unsigned long _get_base(char * addr) -{ - unsigned long __base; - __asm__("movb %3,%%dh\n\t" - "movb %2,%%dl\n\t" - "shll $16,%%edx\n\t" - "movw %1,%%dx" - :"=&d" (__base) - :"m" (*((addr)+2)), - "m" (*((addr)+4)), - "m" (*((addr)+7))); - return __base; -} - -#define get_base(ldt) _get_base( ((char *)&(ldt)) ) - -/* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. - */ -#define loadsegment(seg,value) \ - asm volatile("\n" \ - "1:\t" \ - "movl %0,%%" #seg "\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "pushl $0\n\t" \ - "popl %%" #seg "\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,3b\n" \ - ".previous" \ - : :"m" (*(unsigned int *)&(value))) - -/* NB. 'clts' is done for us by Xen during virtual trap. */ -#define clts() ((void)0) -#define stts() (HYPERVISOR_fpu_taskswitch(1)) - -#endif /* __KERNEL__ */ - -/** - * __ffs - find first bit in word. - * @word: The word to search - * - * Undefined if no bit exists, so code should check against 0 first. - * - * Taken from 2.6 for Xen. - */ -static inline unsigned long __ffs(unsigned long word) -{ - __asm__("bsfl %1,%0" - :"=r" (word) - :"rm" (word)); - return word; -} - -static inline unsigned long get_limit(unsigned long segment) -{ - unsigned long __limit; - __asm__("lsll %1,%0" - :"=r" (__limit):"r" (segment)); - return __limit+1; -} - -#define nop() __asm__ __volatile__ ("nop") - -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) - -#define tas(ptr) (xchg((ptr),1)) - -struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((struct __xchg_dummy *)(x)) - - -/* - * The semantics of XCHGCMP8B are a bit strange, this is why - * there is a loop and the loading of %%eax and %%edx has to - * be inside. This inlines well in most cases, the cached - * cost is around ~38 cycles. (in the future we might want - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that - * might have an implicit FPU-save as a cost, so it's not - * clear which path to go.) - * - * chmxchg8b must be used with the lock prefix here to allow - * the instruction to be executed atomically, see page 3-102 - * of the instruction set reference 24319102.pdf. We need - * the reader side to see the coherent 64bit value. - */ -static inline void __set_64bit (unsigned long long * ptr, - unsigned int low, unsigned int high) -{ - __asm__ __volatile__ ( - "\n1:\t" - "movl (%0), %%eax\n\t" - "movl 4(%0), %%edx\n\t" - "lock cmpxchg8b (%0)\n\t" - "jnz 1b" - : /* no outputs */ - : "D"(ptr), - "b"(low), - "c"(high) - : "ax","dx","memory"); -} - -static inline void __set_64bit_constant (unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); -} -#define ll_low(x) *(((unsigned int*)&(x))+0) -#define ll_high(x) *(((unsigned int*)&(x))+1) - -static inline void __set_64bit_var (unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr,ll_low(value), ll_high(value)); -} - -#define set_64bit(ptr,value) \ -(__builtin_constant_p(value) ? \ - __set_64bit_constant(ptr, value) : \ - __set_64bit_var(ptr, value) ) - -#define _set_64bit(ptr,value) \ -(__builtin_constant_p(value) ? \ - __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ - __set_64bit(ptr, ll_low(value), ll_high(value)) ) - -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("xchgb %b0,%1" - :"=q" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 2: - __asm__ __volatile__("xchgw %w0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 4: - __asm__ __volatile__("xchgl %0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - } - return x; -} - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -#ifdef CONFIG_X86_CMPXCHG -#define __HAVE_ARCH_CMPXCHG 1 - -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - __asm__ __volatile__("lock cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - __asm__ __volatile__("lock cmpxchgw %w1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - __asm__ __volatile__("lock cmpxchgl %1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -#define cmpxchg(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ - (unsigned long)(n),sizeof(*(ptr)))) - -#else -/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */ -#endif - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * For now, "wmb()" doesn't actually do anything, as all - * Intel CPU's follow what Intel calls a *Processor Order*, - * in which all writes are seen in the program order even - * outside the CPU. - * - * I expect future Intel CPU's to have a weaker ordering, - * but I'd also expect them to finally get their act together - * and add some real memory barriers if so. - * - * Some non intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ - -#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#define rmb() mb() - -#ifdef CONFIG_X86_OOSTORE -#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#else -#define wmb() __asm__ __volatile__ ("": : :"memory") -#endif - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define set_mb(var, value) do { xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - -#define safe_halt() ((void)0) - -/* - * The use of 'barrier' in the following reflects their use as local-lock - * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following - * critical operations are executed. All critical operatiosn must complete - * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also - * includes these barriers, for example. - */ - -#define __cli() \ -do { \ - HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \ - barrier(); \ -} while (0) - -#define __sti() \ -do { \ - shared_info_t *_shared = HYPERVISOR_shared_info; \ - barrier(); \ - _shared->vcpu_data[0].evtchn_upcall_mask = 0; \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ -} while (0) - -#define __save_flags(x) \ -do { \ - (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ -} while (0) - -#define __restore_flags(x) \ -do { \ - shared_info_t *_shared = HYPERVISOR_shared_info; \ - barrier(); \ - if ( (_shared->vcpu_data[0].evtchn_upcall_mask = x) == 0 ) { \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ - } \ -} while (0) - -#define __save_and_cli(x) \ -do { \ - (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ - HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \ - barrier(); \ -} while (0) - -#define __save_and_sti(x) \ -do { \ - shared_info_t *_shared = HYPERVISOR_shared_info; \ - barrier(); \ - (x) = _shared->vcpu_data[0].evtchn_upcall_mask; \ - _shared->vcpu_data[0].evtchn_upcall_mask = 0; \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ -} while (0) - -#define local_irq_save(x) __save_and_cli(x) -#define local_irq_set(x) __save_and_sti(x) -#define local_irq_restore(x) __restore_flags(x) -#define local_irq_disable() __cli() -#define local_irq_enable() __sti() - - -#ifdef CONFIG_SMP -#error no SMP -extern void __global_cli(void); -extern void __global_sti(void); -extern unsigned long __global_save_flags(void); -extern void __global_restore_flags(unsigned long); -#define cli() __global_cli() -#define sti() __global_sti() -#define save_flags(x) ((x)=__global_save_flags()) -#define restore_flags(x) __global_restore_flags(x) -#define save_and_cli(x) do { save_flags(x); cli(); } while(0); -#define save_and_sti(x) do { save_flags(x); sti(); } while(0); - -#else - -#define cli() __cli() -#define sti() __sti() -#define save_flags(x) __save_flags(x) -#define restore_flags(x) __restore_flags(x) -#define save_and_cli(x) __save_and_cli(x) -#define save_and_sti(x) __save_and_sti(x) - -#endif - -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -void disable_hlt(void); -void enable_hlt(void); - -extern unsigned long dmi_broken; -extern int is_sony_vaio_laptop; - -#define BROKEN_ACPI_Sx 0x0001 -#define BROKEN_INIT_AFTER_S1 0x0002 -#define BROKEN_PNP_BIOS 0x0004 - -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/vga.h --- a/linux-2.4-xen-sparse/include/asm-xen/vga.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,42 +0,0 @@ -/* - * Access to VGA videoram - * - * (c) 1998 Martin Mares <mj@xxxxxx> - */ - -#ifndef _LINUX_ASM_VGA_H_ -#define _LINUX_ASM_VGA_H_ - -#include <asm/io.h> - -extern unsigned char *vgacon_mmap; - -static unsigned long VGA_MAP_MEM(unsigned long x) -{ - if( vgacon_mmap == NULL ) - { - /* This is our first time in this function. This whole thing - is a rather grim hack. We know we're going to get asked - to map a 32KB region between 0xb0000 and 0xb8000 because - that's what VGAs are. We used the boot time permanent - fixed map region, and map it to machine pages. - */ - if( x != 0xb8000 ) - panic("Argghh! VGA Console is weird. 1:%08lx\n",x); - - vgacon_mmap = (unsigned char*) bt_ioremap( 0xa0000, 128*1024 ); - return (unsigned long) (vgacon_mmap+x-0xa0000); - } - else - { - if( x != 0xc0000 && x != 0xa0000 ) /* vidmem_end or charmap fonts */ - panic("Argghh! VGA Console is weird. 2:%08lx\n",x); - return (unsigned long) (vgacon_mmap+x-0xa0000); - } - return 0; -} - -static inline unsigned char vga_readb(unsigned char * x) { return (*(x)); } -static inline void vga_writeb(unsigned char x, unsigned char *y) { *(y) = (x); } - -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/asm-xen/xor.h --- a/linux-2.4-xen-sparse/include/asm-xen/xor.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,879 +0,0 @@ -/* - * include/asm-i386/xor.h - * - * Optimized RAID-5 checksumming functions for MMX and SSE. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * You should have received a copy of the GNU General Public License - * (for example /usr/src/linux/COPYING); if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * High-speed RAID5 checksumming functions utilizing MMX instructions. - * Copyright (C) 1998 Ingo Molnar. - */ - -#define FPU_SAVE \ - do { \ - if (!(current->flags & PF_USEDFPU)) \ - clts(); \ - __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0])); \ - } while (0) - -#define FPU_RESTORE \ - do { \ - __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0])); \ - if (!(current->flags & PF_USEDFPU)) \ - stts(); \ - } while (0) - -#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n" -#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n" -#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" -#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" -#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" -#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" - - -static void -xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned long lines = bytes >> 7; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - XO1(i,0) \ - ST(i,0) \ - XO1(i+1,1) \ - ST(i+1,1) \ - XO1(i+2,2) \ - ST(i+2,2) \ - XO1(i+3,3) \ - ST(i+3,3) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $128, %1 ;\n" - " addl $128, %2 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2) - : - : "memory"); - - FPU_RESTORE; -} - -static void -xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned long lines = bytes >> 7; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - XO2(i,0) \ - ST(i,0) \ - XO2(i+1,1) \ - ST(i+1,1) \ - XO2(i+2,2) \ - ST(i+2,2) \ - XO2(i+3,3) \ - ST(i+3,3) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $128, %1 ;\n" - " addl $128, %2 ;\n" - " addl $128, %3 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : - : "memory"); - - FPU_RESTORE; -} - -static void -xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned long lines = bytes >> 7; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - XO3(i,0) \ - ST(i,0) \ - XO3(i+1,1) \ - ST(i+1,1) \ - XO3(i+2,2) \ - ST(i+2,2) \ - XO3(i+3,3) \ - ST(i+3,3) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $128, %1 ;\n" - " addl $128, %2 ;\n" - " addl $128, %3 ;\n" - " addl $128, %4 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) - : - : "memory"); - - FPU_RESTORE; -} - - -static void -xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned long lines = bytes >> 7; - char fpu_save[108]; - - FPU_SAVE; - - /* need to save/restore p4/p5 manually otherwise gcc's 10 argument - limit gets exceeded (+ counts as two arguments) */ - __asm__ __volatile__ ( - " pushl %4\n" - " pushl %5\n" -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - XO3(i,0) \ - XO3(i+1,1) \ - XO3(i+2,2) \ - XO3(i+3,3) \ - XO4(i,0) \ - ST(i,0) \ - XO4(i+1,1) \ - ST(i+1,1) \ - XO4(i+2,2) \ - ST(i+2,2) \ - XO4(i+3,3) \ - ST(i+3,3) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $128, %1 ;\n" - " addl $128, %2 ;\n" - " addl $128, %3 ;\n" - " addl $128, %4 ;\n" - " addl $128, %5 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - " popl %5\n" - " popl %4\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : "r" (p4), "r" (p5) - : "memory"); - - FPU_RESTORE; -} - -#undef LD -#undef XO1 -#undef XO2 -#undef XO3 -#undef XO4 -#undef ST -#undef BLOCK - -static void -xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned long lines = bytes >> 6; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( - " .align 32 ;\n" - " 1: ;\n" - " movq (%1), %%mm0 ;\n" - " movq 8(%1), %%mm1 ;\n" - " pxor (%2), %%mm0 ;\n" - " movq 16(%1), %%mm2 ;\n" - " movq %%mm0, (%1) ;\n" - " pxor 8(%2), %%mm1 ;\n" - " movq 24(%1), %%mm3 ;\n" - " movq %%mm1, 8(%1) ;\n" - " pxor 16(%2), %%mm2 ;\n" - " movq 32(%1), %%mm4 ;\n" - " movq %%mm2, 16(%1) ;\n" - " pxor 24(%2), %%mm3 ;\n" - " movq 40(%1), %%mm5 ;\n" - " movq %%mm3, 24(%1) ;\n" - " pxor 32(%2), %%mm4 ;\n" - " movq 48(%1), %%mm6 ;\n" - " movq %%mm4, 32(%1) ;\n" - " pxor 40(%2), %%mm5 ;\n" - " movq 56(%1), %%mm7 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 48(%2), %%mm6 ;\n" - " pxor 56(%2), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" - - " addl $64, %1 ;\n" - " addl $64, %2 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2) - : - : "memory"); - - FPU_RESTORE; -} - -static void -xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned long lines = bytes >> 6; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( - " .align 32,0x90 ;\n" - " 1: ;\n" - " movq (%1), %%mm0 ;\n" - " movq 8(%1), %%mm1 ;\n" - " pxor (%2), %%mm0 ;\n" - " movq 16(%1), %%mm2 ;\n" - " pxor 8(%2), %%mm1 ;\n" - " pxor (%3), %%mm0 ;\n" - " pxor 16(%2), %%mm2 ;\n" - " movq %%mm0, (%1) ;\n" - " pxor 8(%3), %%mm1 ;\n" - " pxor 16(%3), %%mm2 ;\n" - " movq 24(%1), %%mm3 ;\n" - " movq %%mm1, 8(%1) ;\n" - " movq 32(%1), %%mm4 ;\n" - " movq 40(%1), %%mm5 ;\n" - " pxor 24(%2), %%mm3 ;\n" - " movq %%mm2, 16(%1) ;\n" - " pxor 32(%2), %%mm4 ;\n" - " pxor 24(%3), %%mm3 ;\n" - " pxor 40(%2), %%mm5 ;\n" - " movq %%mm3, 24(%1) ;\n" - " pxor 32(%3), %%mm4 ;\n" - " pxor 40(%3), %%mm5 ;\n" - " movq 48(%1), %%mm6 ;\n" - " movq %%mm4, 32(%1) ;\n" - " movq 56(%1), %%mm7 ;\n" - " pxor 48(%2), %%mm6 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 56(%2), %%mm7 ;\n" - " pxor 48(%3), %%mm6 ;\n" - " pxor 56(%3), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" - - " addl $64, %1 ;\n" - " addl $64, %2 ;\n" - " addl $64, %3 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : - : "memory" ); - - FPU_RESTORE; -} - -static void -xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned long lines = bytes >> 6; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( - " .align 32,0x90 ;\n" - " 1: ;\n" - " movq (%1), %%mm0 ;\n" - " movq 8(%1), %%mm1 ;\n" - " pxor (%2), %%mm0 ;\n" - " movq 16(%1), %%mm2 ;\n" - " pxor 8(%2), %%mm1 ;\n" - " pxor (%3), %%mm0 ;\n" - " pxor 16(%2), %%mm2 ;\n" - " pxor 8(%3), %%mm1 ;\n" - " pxor (%4), %%mm0 ;\n" - " movq 24(%1), %%mm3 ;\n" - " pxor 16(%3), %%mm2 ;\n" - " pxor 8(%4), %%mm1 ;\n" - " movq %%mm0, (%1) ;\n" - " movq 32(%1), %%mm4 ;\n" - " pxor 24(%2), %%mm3 ;\n" - " pxor 16(%4), %%mm2 ;\n" - " movq %%mm1, 8(%1) ;\n" - " movq 40(%1), %%mm5 ;\n" - " pxor 32(%2), %%mm4 ;\n" - " pxor 24(%3), %%mm3 ;\n" - " movq %%mm2, 16(%1) ;\n" - " pxor 40(%2), %%mm5 ;\n" - " pxor 32(%3), %%mm4 ;\n" - " pxor 24(%4), %%mm3 ;\n" - " movq %%mm3, 24(%1) ;\n" - " movq 56(%1), %%mm7 ;\n" - " movq 48(%1), %%mm6 ;\n" - " pxor 40(%3), %%mm5 ;\n" - " pxor 32(%4), %%mm4 ;\n" - " pxor 48(%2), %%mm6 ;\n" - " movq %%mm4, 32(%1) ;\n" - " pxor 56(%2), %%mm7 ;\n" - " pxor 40(%4), %%mm5 ;\n" - " pxor 48(%3), %%mm6 ;\n" - " pxor 56(%3), %%mm7 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 48(%4), %%mm6 ;\n" - " pxor 56(%4), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" - - " addl $64, %1 ;\n" - " addl $64, %2 ;\n" - " addl $64, %3 ;\n" - " addl $64, %4 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) - : - : "memory"); - - FPU_RESTORE; -} - -static void -xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned long lines = bytes >> 6; - char fpu_save[108]; - - FPU_SAVE; - - /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ - __asm__ __volatile__ ( - " pushl %4\n" - " pushl %5\n" - " .align 32,0x90 ;\n" - " 1: ;\n" - " movq (%1), %%mm0 ;\n" - " movq 8(%1), %%mm1 ;\n" - " pxor (%2), %%mm0 ;\n" - " pxor 8(%2), %%mm1 ;\n" - " movq 16(%1), %%mm2 ;\n" - " pxor (%3), %%mm0 ;\n" - " pxor 8(%3), %%mm1 ;\n" - " pxor 16(%2), %%mm2 ;\n" - " pxor (%4), %%mm0 ;\n" - " pxor 8(%4), %%mm1 ;\n" - " pxor 16(%3), %%mm2 ;\n" - " movq 24(%1), %%mm3 ;\n" - " pxor (%5), %%mm0 ;\n" - " pxor 8(%5), %%mm1 ;\n" - " movq %%mm0, (%1) ;\n" - " pxor 16(%4), %%mm2 ;\n" - " pxor 24(%2), %%mm3 ;\n" - " movq %%mm1, 8(%1) ;\n" - " pxor 16(%5), %%mm2 ;\n" - " pxor 24(%3), %%mm3 ;\n" - " movq 32(%1), %%mm4 ;\n" - " movq %%mm2, 16(%1) ;\n" - " pxor 24(%4), %%mm3 ;\n" - " pxor 32(%2), %%mm4 ;\n" - " movq 40(%1), %%mm5 ;\n" - " pxor 24(%5), %%mm3 ;\n" - " pxor 32(%3), %%mm4 ;\n" - " pxor 40(%2), %%mm5 ;\n" - " movq %%mm3, 24(%1) ;\n" - " pxor 32(%4), %%mm4 ;\n" - " pxor 40(%3), %%mm5 ;\n" - " movq 48(%1), %%mm6 ;\n" - " movq 56(%1), %%mm7 ;\n" - " pxor 32(%5), %%mm4 ;\n" - " pxor 40(%4), %%mm5 ;\n" - " pxor 48(%2), %%mm6 ;\n" - " pxor 56(%2), %%mm7 ;\n" - " movq %%mm4, 32(%1) ;\n" - " pxor 48(%3), %%mm6 ;\n" - " pxor 56(%3), %%mm7 ;\n" - " pxor 40(%5), %%mm5 ;\n" - " pxor 48(%4), %%mm6 ;\n" - " pxor 56(%4), %%mm7 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 48(%5), %%mm6 ;\n" - " pxor 56(%5), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" - - " addl $64, %1 ;\n" - " addl $64, %2 ;\n" - " addl $64, %3 ;\n" - " addl $64, %4 ;\n" - " addl $64, %5 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - " popl %5\n" - " popl %4\n" - : "+g" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : "r" (p4), "r" (p5) - : "memory"); - - FPU_RESTORE; -} - -static struct xor_block_template xor_block_pII_mmx = { - name: "pII_mmx", - do_2: xor_pII_mmx_2, - do_3: xor_pII_mmx_3, - do_4: xor_pII_mmx_4, - do_5: xor_pII_mmx_5, -}; - -static struct xor_block_template xor_block_p5_mmx = { - name: "p5_mmx", - do_2: xor_p5_mmx_2, - do_3: xor_p5_mmx_3, - do_4: xor_p5_mmx_4, - do_5: xor_p5_mmx_5, -}; - -#undef FPU_SAVE -#undef FPU_RESTORE - -/* - * Cache avoiding checksumming functions utilizing KNI instructions - * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) - */ - -#define XMMS_SAVE \ - if (!(current->flags & PF_USEDFPU)) \ - clts(); \ - __asm__ __volatile__ ( \ - "movups %%xmm0,(%1) ;\n\t" \ - "movups %%xmm1,0x10(%1) ;\n\t" \ - "movups %%xmm2,0x20(%1) ;\n\t" \ - "movups %%xmm3,0x30(%1) ;\n\t" \ - : "=&r" (cr0) \ - : "r" (xmm_save) \ - : "memory") - -#define XMMS_RESTORE \ - __asm__ __volatile__ ( \ - "sfence ;\n\t" \ - "movups (%1),%%xmm0 ;\n\t" \ - "movups 0x10(%1),%%xmm1 ;\n\t" \ - "movups 0x20(%1),%%xmm2 ;\n\t" \ - "movups 0x30(%1),%%xmm3 ;\n\t" \ - : \ - : "r" (cr0), "r" (xmm_save) \ - : "memory"); \ - if (!(current->flags & PF_USEDFPU)) \ - stts() - -#define ALIGN16 __attribute__((aligned(16))) - -#define OFFS(x) "16*("#x")" -#define PF_OFFS(x) "256+16*("#x")" -#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" -#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" -#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" -#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" -#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" -#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" -#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" -#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" -#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" -#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" -#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" -#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" -#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" - - -static void -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - - XMMS_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - PF1(i) \ - PF1(i+2) \ - LD(i+2,2) \ - LD(i+3,3) \ - PF0(i+4) \ - PF0(i+6) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - ST(i,0) \ - ST(i+1,1) \ - ST(i+2,2) \ - ST(i+3,3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2) - : - : "memory"); - - XMMS_RESTORE; -} - -static void -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - - XMMS_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i+2) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - PF2(i) \ - PF2(i+2) \ - PF0(i+4) \ - PF0(i+6) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - ST(i,0) \ - ST(i+1,1) \ - ST(i+2,2) \ - ST(i+3,3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r"(p2), "+r"(p3) - : - : "memory" ); - - XMMS_RESTORE; -} - -static void -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - - XMMS_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i+2) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - PF2(i) \ - PF2(i+2) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - PF3(i) \ - PF3(i+2) \ - PF0(i+4) \ - PF0(i+6) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - XO3(i,0) \ - XO3(i+1,1) \ - XO3(i+2,2) \ - XO3(i+3,3) \ - ST(i,0) \ - ST(i+1,1) \ - ST(i+2,2) \ - ST(i+3,3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " addl $256, %4 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) - : - : "memory" ); - - XMMS_RESTORE; -} - -static void -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - - XMMS_SAVE; - - /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ - __asm__ __volatile__ ( - " pushl %4\n" - " pushl %5\n" -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i+2) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - PF2(i) \ - PF2(i+2) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - PF3(i) \ - PF3(i+2) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - PF4(i) \ - PF4(i+2) \ - PF0(i+4) \ - PF0(i+6) \ - XO3(i,0) \ - XO3(i+1,1) \ - XO3(i+2,2) \ - XO3(i+3,3) \ - XO4(i,0) \ - XO4(i+1,1) \ - XO4(i+2,2) \ - XO4(i+3,3) \ - ST(i,0) \ - ST(i+1,1) \ - ST(i+2,2) \ - ST(i+3,3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " addl $256, %4 ;\n" - " addl $256, %5 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - " popl %5\n" - " popl %4\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : "r" (p4), "r" (p5) - : "memory"); - - XMMS_RESTORE; -} - -static struct xor_block_template xor_block_pIII_sse = { - name: "pIII_sse", - do_2: xor_sse_2, - do_3: xor_sse_3, - do_4: xor_sse_4, - do_5: xor_sse_5, -}; - -/* Also try the generic routines. */ -#include <asm-generic/xor.h> - -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ - do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_32regs); \ - if (cpu_has_xmm) \ - xor_speed(&xor_block_pIII_sse); \ - if (md_cpu_has_mmx()) { \ - xor_speed(&xor_block_pII_mmx); \ - xor_speed(&xor_block_p5_mmx); \ - } \ - } while (0) - -/* We force the use of the SSE xor block because it can write around L2. - We may also be able to load into the L1 only depending on how the cpu - deals with a load to a line that is being prefetched. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ - (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/linux/blk.h --- a/linux-2.4-xen-sparse/include/linux/blk.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,409 +0,0 @@ -#ifndef _BLK_H -#define _BLK_H - -#include <linux/blkdev.h> -#include <linux/locks.h> -#include <linux/config.h> -#include <linux/spinlock.h> - -/* - * Spinlock for protecting the request queue which - * is mucked around with in interrupts on potentially - * multiple CPU's.. - */ -extern spinlock_t io_request_lock; - -/* - * Initialization functions. - */ -extern int isp16_init(void); -extern int cdu31a_init(void); -extern int acsi_init(void); -extern int mcd_init(void); -extern int mcdx_init(void); -extern int sbpcd_init(void); -extern int aztcd_init(void); -extern int sony535_init(void); -extern int gscd_init(void); -extern int cm206_init(void); -extern int optcd_init(void); -extern int sjcd_init(void); -extern int cdi_init(void); -extern int hd_init(void); -extern int ide_init(void); -extern int xd_init(void); -extern int mfm_init(void); -extern int loop_init(void); -extern int md_init(void); -extern int ap_init(void); -extern int ddv_init(void); -extern int z2_init(void); -extern int swim3_init(void); -extern int swimiop_init(void); -extern int amiga_floppy_init(void); -extern int atari_floppy_init(void); -extern int ez_init(void); -extern int bpcd_init(void); -extern int ps2esdi_init(void); -extern int jsfd_init(void); -extern int viodasd_init(void); -extern int viocd_init(void); - -#if defined(CONFIG_ARCH_S390) -extern int dasd_init(void); -extern int xpram_init(void); -extern int tapeblock_init(void); -#endif /* CONFIG_ARCH_S390 */ - -#if defined(CONFIG_XEN) -extern int xlblk_init(void); -#endif /* CONFIG_XEN */ - -extern void set_device_ro(kdev_t dev,int flag); -void add_blkdev_randomness(int major); - -extern int floppy_init(void); -extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */ -extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */ -extern int rd_image_start; /* starting block # of image */ - -#ifdef CONFIG_BLK_DEV_INITRD - -#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ - -extern unsigned long initrd_start,initrd_end; -extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */ -void initrd_init(void); - -#endif - - -/* - * end_request() and friends. Must be called with the request queue spinlock - * acquired. All functions called within end_request() _must_be_ atomic. - * - * Several drivers define their own end_request and call - * end_that_request_first() and end_that_request_last() - * for parts of the original function. This prevents - * code duplication in drivers. - */ - -static inline void blkdev_dequeue_request(struct request * req) -{ - list_del(&req->queue); -} - -int end_that_request_first(struct request *req, int uptodate, char *name); -void end_that_request_last(struct request *req); - -#if defined(MAJOR_NR) || defined(IDE_DRIVER) - -#undef DEVICE_ON -#undef DEVICE_OFF - -/* - * Add entries as needed. - */ - -#ifdef IDE_DRIVER - -#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) -#define DEVICE_NAME "ide" - -#elif (MAJOR_NR == RAMDISK_MAJOR) - -/* ram disk */ -#define DEVICE_NAME "ramdisk" -#define DEVICE_NR(device) (MINOR(device)) -#define DEVICE_NO_RANDOM - -#elif (MAJOR_NR == Z2RAM_MAJOR) - -/* Zorro II Ram */ -#define DEVICE_NAME "Z2RAM" -#define DEVICE_REQUEST do_z2_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == FLOPPY_MAJOR) - -static void floppy_off(unsigned int nr); - -#define DEVICE_NAME "floppy" -#define DEVICE_INTR do_floppy -#define DEVICE_REQUEST do_fd_request -#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 )) -#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device)) - -#elif (MAJOR_NR == HD_MAJOR) - -/* Hard disk: timeout is 6 seconds. */ -#define DEVICE_NAME "hard disk" -#define DEVICE_INTR do_hd -#define TIMEOUT_VALUE (6*HZ) -#define DEVICE_REQUEST do_hd_request -#define DEVICE_NR(device) (MINOR(device)>>6) - -#elif (SCSI_DISK_MAJOR(MAJOR_NR)) - -#define DEVICE_NAME "scsidisk" -#define TIMEOUT_VALUE (2*HZ) -#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4)) - -/* Kludge to use the same number for both char and block major numbers */ -#elif (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER) - -#define DEVICE_NAME "Multiple devices driver" -#define DEVICE_REQUEST do_md_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == SCSI_TAPE_MAJOR) - -#define DEVICE_NAME "scsitape" -#define DEVICE_INTR do_st -#define DEVICE_NR(device) (MINOR(device) & 0x7f) - -#elif (MAJOR_NR == OSST_MAJOR) - -#define DEVICE_NAME "onstream" -#define DEVICE_INTR do_osst -#define DEVICE_NR(device) (MINOR(device) & 0x7f) -#define DEVICE_ON(device) -#define DEVICE_OFF(device) - -#elif (MAJOR_NR == SCSI_CDROM_MAJOR) - -#define DEVICE_NAME "CD-ROM" -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == XT_DISK_MAJOR) - -#define DEVICE_NAME "xt disk" -#define DEVICE_REQUEST do_xd_request -#define DEVICE_NR(device) (MINOR(device) >> 6) - -#elif (MAJOR_NR == PS2ESDI_MAJOR) - -#define DEVICE_NAME "PS/2 ESDI" -#define DEVICE_REQUEST do_ps2esdi_request -#define DEVICE_NR(device) (MINOR(device) >> 6) - -#elif (MAJOR_NR == CDU31A_CDROM_MAJOR) - -#define DEVICE_NAME "CDU31A" -#define DEVICE_REQUEST do_cdu31a_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE)) - -#define DEVICE_NAME "ACSI" -#define DEVICE_INTR do_acsi -#define DEVICE_REQUEST do_acsi_request -#define DEVICE_NR(device) (MINOR(device) >> 4) - -#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR) - -#define DEVICE_NAME "Mitsumi CD-ROM" -/* #define DEVICE_INTR do_mcd */ -#define DEVICE_REQUEST do_mcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR) - -#define DEVICE_NAME "Mitsumi CD-ROM" -/* #define DEVICE_INTR do_mcdx */ -#define DEVICE_REQUEST do_mcdx_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR) - -#define DEVICE_NAME "Matsushita CD-ROM controller #1" -#define DEVICE_REQUEST do_sbpcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR) - -#define DEVICE_NAME "Matsushita CD-ROM controller #2" -#define DEVICE_REQUEST do_sbpcd2_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR) - -#define DEVICE_NAME "Matsushita CD-ROM controller #3" -#define DEVICE_REQUEST do_sbpcd3_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR) - -#define DEVICE_NAME "Matsushita CD-ROM controller #4" -#define DEVICE_REQUEST do_sbpcd4_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == AZTECH_CDROM_MAJOR) - -#define DEVICE_NAME "Aztech CD-ROM" -#define DEVICE_REQUEST do_aztcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == CDU535_CDROM_MAJOR) - -#define DEVICE_NAME "SONY-CDU535" -#define DEVICE_INTR do_cdu535 -#define DEVICE_REQUEST do_cdu535_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR) - -#define DEVICE_NAME "Goldstar R420" -#define DEVICE_REQUEST do_gscd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == CM206_CDROM_MAJOR) -#define DEVICE_NAME "Philips/LMS CD-ROM cm206" -#define DEVICE_REQUEST do_cm206_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == OPTICS_CDROM_MAJOR) - -#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM" -#define DEVICE_REQUEST do_optcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == SANYO_CDROM_MAJOR) - -#define DEVICE_NAME "Sanyo H94A CD-ROM" -#define DEVICE_REQUEST do_sjcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == APBLOCK_MAJOR) - -#define DEVICE_NAME "apblock" -#define DEVICE_REQUEST ap_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == DDV_MAJOR) - -#define DEVICE_NAME "ddv" -#define DEVICE_REQUEST ddv_request -#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS) - -#elif (MAJOR_NR == MFM_ACORN_MAJOR) - -#define DEVICE_NAME "mfm disk" -#define DEVICE_INTR do_mfm -#define DEVICE_REQUEST do_mfm_request -#define DEVICE_NR(device) (MINOR(device) >> 6) - -#elif (MAJOR_NR == NBD_MAJOR) - -#define DEVICE_NAME "nbd" -#define DEVICE_REQUEST do_nbd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MDISK_MAJOR) - -#define DEVICE_NAME "mdisk" -#define DEVICE_REQUEST mdisk_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == DASD_MAJOR) - -#define DEVICE_NAME "dasd" -#define DEVICE_REQUEST do_dasd_request -#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) - -#elif (MAJOR_NR == I2O_MAJOR) - -#define DEVICE_NAME "I2O block" -#define DEVICE_REQUEST i2ob_request -#define DEVICE_NR(device) (MINOR(device)>>4) - -#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR) - -#define DEVICE_NAME "ida" -#define TIMEOUT_VALUE (25*HZ) -#define DEVICE_REQUEST do_ida_request -#define DEVICE_NR(device) (MINOR(device) >> 4) - -#endif /* MAJOR_NR == whatever */ - -/* provide DEVICE_xxx defaults, if not explicitly defined - * above in the MAJOR_NR==xxx if-elif tree */ -#ifndef DEVICE_ON -#define DEVICE_ON(device) do {} while (0) -#endif -#ifndef DEVICE_OFF -#define DEVICE_OFF(device) do {} while (0) -#endif - -#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR) -#if !defined(IDE_DRIVER) - -#ifndef CURRENT -#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) -#endif -#ifndef QUEUE_EMPTY -#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) -#endif - -#ifndef DEVICE_NAME -#define DEVICE_NAME "unknown" -#endif - -#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev) - -#ifdef DEVICE_INTR -static void (*DEVICE_INTR)(void) = NULL; -#endif - -#define SET_INTR(x) (DEVICE_INTR = (x)) - -#ifdef DEVICE_REQUEST -static void (DEVICE_REQUEST)(request_queue_t *); -#endif - -#ifdef DEVICE_INTR -#define CLEAR_INTR SET_INTR(NULL) -#else -#define CLEAR_INTR -#endif - -#define INIT_REQUEST \ - if (QUEUE_EMPTY) {\ - CLEAR_INTR; \ - return; \ - } \ - if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ - panic(DEVICE_NAME ": request list destroyed"); \ - if (CURRENT->bh) { \ - if (!buffer_locked(CURRENT->bh)) \ - panic(DEVICE_NAME ": block not locked"); \ - } - -#endif /* !defined(IDE_DRIVER) */ - - -#ifndef LOCAL_END_REQUEST /* If we have our own end_request, we do not want to include this mess */ - -#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) - -static inline void end_request(int uptodate) { - struct request *req = CURRENT; - - if (end_that_request_first(req, uptodate, DEVICE_NAME)) - return; - -#ifndef DEVICE_NO_RANDOM - add_blkdev_randomness(MAJOR(req->rq_dev)); -#endif - DEVICE_OFF(req->rq_dev); - blkdev_dequeue_request(req); - end_that_request_last(req); -} - -#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */ -#endif /* LOCAL_END_REQUEST */ - -#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */ -#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */ - -#endif /* _BLK_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/linux/highmem.h --- a/linux-2.4-xen-sparse/include/linux/highmem.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,137 +0,0 @@ -#ifndef _LINUX_HIGHMEM_H -#define _LINUX_HIGHMEM_H - -#include <linux/config.h> -#include <asm/pgalloc.h> - -#ifdef CONFIG_HIGHMEM - -extern struct page *highmem_start_page; - -#include <asm/highmem.h> - -/* declarations for linux/mm/highmem.c */ -unsigned int nr_free_highpages(void); -void kmap_flush_unused(void); - -extern struct buffer_head *create_bounce(int rw, struct buffer_head * bh_orig); - -static inline char *bh_kmap(struct buffer_head *bh) -{ - return kmap(bh->b_page) + bh_offset(bh); -} - -static inline void bh_kunmap(struct buffer_head *bh) -{ - kunmap(bh->b_page); -} - -/* - * remember to add offset! and never ever reenable interrupts between a - * bh_kmap_irq and bh_kunmap_irq!! - */ -static inline char *bh_kmap_irq(struct buffer_head *bh, unsigned long *flags) -{ - unsigned long addr; - - __save_flags(*flags); - - /* - * could be low - */ - if (!PageHighMem(bh->b_page)) - return bh->b_data; - - /* - * it's a highmem page - */ - __cli(); - addr = (unsigned long) kmap_atomic(bh->b_page, KM_BH_IRQ); - - if (addr & ~PAGE_MASK) - BUG(); - - return (char *) addr + bh_offset(bh); -} - -static inline void bh_kunmap_irq(char *buffer, unsigned long *flags) -{ - unsigned long ptr = (unsigned long) buffer & PAGE_MASK; - - kunmap_atomic((void *) ptr, KM_BH_IRQ); - __restore_flags(*flags); -} - -#else /* CONFIG_HIGHMEM */ - -static inline unsigned int nr_free_highpages(void) { return 0; } -static inline void kmap_flush_unused(void) { } - -static inline void *kmap(struct page *page) { return page_address(page); } - -#define kunmap(page) do { } while (0) - -#define kmap_atomic(page,idx) kmap(page) -#define kunmap_atomic(page,idx) kunmap(page) - -#define bh_kmap(bh) ((bh)->b_data) -#define bh_kunmap(bh) do { } while (0) -#define kmap_nonblock(page) kmap(page) -#define bh_kmap_irq(bh, flags) ((bh)->b_data) -#define bh_kunmap_irq(bh, flags) do { *(flags) = 0; } while (0) - -#endif /* CONFIG_HIGHMEM */ - -/* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ -static inline void clear_user_highpage(struct page *page, unsigned long vaddr) -{ - void *addr = kmap_atomic(page, KM_USER0); - clear_user_page(addr, vaddr); - kunmap_atomic(addr, KM_USER0); -} - -static inline void clear_highpage(struct page *page) -{ - clear_page(kmap(page)); - kunmap(page); -} - -/* - * Same but also flushes aliased cache contents to RAM. - */ -static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size) -{ - char *kaddr; - - if (offset + size > PAGE_SIZE) - out_of_line_bug(); - kaddr = kmap(page); - memset(kaddr + offset, 0, size); - flush_dcache_page(page); - flush_page_to_ram(page); - kunmap(page); -} - -static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr) -{ - char *vfrom, *vto; - - vfrom = kmap_atomic(from, KM_USER0); - vto = kmap_atomic(to, KM_USER1); - copy_user_page(vto, vfrom, vaddr); - kunmap_atomic(vfrom, KM_USER0); - kunmap_atomic(vto, KM_USER1); -} - -static inline void copy_highpage(struct page *to, struct page *from) -{ - char *vfrom, *vto; - - vfrom = kmap_atomic(from, KM_USER0); - vto = kmap_atomic(to, KM_USER1); - copy_page(vto, vfrom); - kunmap_atomic(vfrom, KM_USER0); - kunmap_atomic(vto, KM_USER1); -} - -#endif /* _LINUX_HIGHMEM_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/linux/irq.h --- a/linux-2.4-xen-sparse/include/linux/irq.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,80 +0,0 @@ -#ifndef __irq_h -#define __irq_h - -/* - * Please do not include this file in generic code. There is currently - * no requirement for any architecture to implement anything held - * within this file. - * - * Thanks. --rmk - */ - -#include <linux/config.h> - -#if !defined(CONFIG_ARCH_S390) - -#include <linux/cache.h> -#include <linux/spinlock.h> - -#include <asm/irq.h> -#include <asm/ptrace.h> - -/* - * IRQ line status. - */ -#define IRQ_INPROGRESS 1 /* IRQ handler active - do not enter! */ -#define IRQ_DISABLED 2 /* IRQ disabled - do not enter! */ -#define IRQ_PENDING 4 /* IRQ pending - replay on enable */ -#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */ -#define IRQ_AUTODETECT 16 /* IRQ is being autodetected */ -#define IRQ_WAITING 32 /* IRQ not yet seen - for autodetection */ -#define IRQ_LEVEL 64 /* IRQ level triggered */ -#define IRQ_MASKED 128 /* IRQ masked - shouldn't be seen again */ -#define IRQ_PER_CPU 256 /* IRQ is per CPU */ - -/* - * Interrupt controller descriptor. This is all we need - * to describe about the low-level hardware. - */ -struct hw_interrupt_type { - const char * typename; - unsigned int (*startup)(unsigned int irq); - void (*shutdown)(unsigned int irq); - void (*enable)(unsigned int irq); - void (*disable)(unsigned int irq); - void (*ack)(unsigned int irq); - void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, unsigned long mask); -}; - -typedef struct hw_interrupt_type hw_irq_controller; - -/* - * This is the "IRQ descriptor", which contains various information - * about the irq, including what kind of hardware handling it has, - * whether it is disabled etc etc. - * - * Pad this out to 32 bytes for cache and indexing reasons. - */ -typedef struct { - unsigned int status; /* IRQ status */ - hw_irq_controller *handler; - struct irqaction *action; /* IRQ action list */ - unsigned int depth; /* nested irq disables */ - spinlock_t lock; -} ____cacheline_aligned irq_desc_t; - -extern irq_desc_t irq_desc [NR_IRQS]; - -#include <asm/hw_irq.h> /* the arch dependent stuff */ - -extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); -extern int setup_irq(unsigned int , struct irqaction * ); -extern int teardown_irq(unsigned int , struct irqaction * ); - -extern hw_irq_controller no_irq_type; /* needed in every arch ? */ -extern void no_action(int cpl, void *dev_id, struct pt_regs *regs); - -#endif - -#endif /* __irq_h */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/linux/mm.h --- a/linux-2.4-xen-sparse/include/linux/mm.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,727 +0,0 @@ -#ifndef _LINUX_MM_H -#define _LINUX_MM_H - -#include <linux/sched.h> -#include <linux/errno.h> - -#ifdef __KERNEL__ - -#include <linux/config.h> -#include <linux/string.h> -#include <linux/list.h> -#include <linux/mmzone.h> -#include <linux/swap.h> -#include <linux/rbtree.h> - -extern unsigned long max_mapnr; -extern unsigned long num_physpages; -extern unsigned long num_mappedpages; -extern void * high_memory; -extern int page_cluster; -/* The inactive_clean lists are per zone. */ -extern struct list_head active_list; -extern struct list_head inactive_list; - -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/atomic.h> - -/* - * Linux kernel virtual memory manager primitives. - * The idea being to have a "virtual" mm in the same way - * we have a virtual fs - giving a cleaner interface to the - * mm details, and allowing different kinds of memory mappings - * (from shared memory to executable loading to arbitrary - * mmap() functions). - */ - -/* - * This struct defines a memory VMM memory area. There is one of these - * per VM-area/task. A VM area is any part of the process virtual memory - * space that has a special rule for the page-fault handlers (ie a shared - * library, the executable area etc). - */ -struct vm_area_struct { - struct mm_struct * vm_mm; /* The address space we belong to. */ - unsigned long vm_start; /* Our start address within vm_mm. */ - unsigned long vm_end; /* The first byte after our end address - within vm_mm. */ - - /* linked list of VM areas per task, sorted by address */ - struct vm_area_struct *vm_next; - - pgprot_t vm_page_prot; /* Access permissions of this VMA. */ - unsigned long vm_flags; /* Flags, listed below. */ - - rb_node_t vm_rb; - - /* - * For areas with an address space and backing store, - * one of the address_space->i_mmap{,shared} lists, - * for shm areas, the list of attaches, otherwise unused. - */ - struct vm_area_struct *vm_next_share; - struct vm_area_struct **vm_pprev_share; - - /* Function pointers to deal with this struct. */ - struct vm_operations_struct * vm_ops; - - /* Information about our backing store: */ - unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE - units, *not* PAGE_CACHE_SIZE */ - struct file * vm_file; /* File we map to (can be NULL). */ - unsigned long vm_raend; /* XXX: put full readahead info here. */ - void * vm_private_data; /* was vm_pte (shared mem) */ -}; - -/* - * vm_flags.. - */ -#define VM_READ 0x00000001 /* currently active flags */ -#define VM_WRITE 0x00000002 -#define VM_EXEC 0x00000004 -#define VM_SHARED 0x00000008 - -#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ -#define VM_MAYWRITE 0x00000020 -#define VM_MAYEXEC 0x00000040 -#define VM_MAYSHARE 0x00000080 - -#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ -#define VM_GROWSUP 0x00000200 -#define VM_SHM 0x00000400 /* shared memory area, don't swap out */ -#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ - -#define VM_EXECUTABLE 0x00001000 -#define VM_LOCKED 0x00002000 -#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ - - /* Used by sys_madvise() */ -#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ -#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ - -#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ -#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ -#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ - -#ifndef VM_STACK_FLAGS -#define VM_STACK_FLAGS 0x00000177 -#endif - -#define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ) -#define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK -#define VM_NormalReadHint(v) (!((v)->vm_flags & VM_READHINTMASK)) -#define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) -#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) - -/* read ahead limits */ -extern int vm_min_readahead; -extern int vm_max_readahead; - -/* - * mapping from the currently active vm_flags protection bits (the - * low four bits) to a page protection mask.. - */ -extern pgprot_t protection_map[16]; - - -/* - * These are the virtual MM functions - opening of an area, closing and - * unmapping it (needed to keep files on disk up-to-date etc), pointer - * to the functions called when a no-page or a wp-page exception occurs. - */ -struct vm_operations_struct { - void (*open)(struct vm_area_struct * area); - void (*close)(struct vm_area_struct * area); - struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused); -}; - -/* - * Each physical page in the system has a struct page associated with - * it to keep track of whatever it is we are using the page for at the - * moment. Note that we have no way to track which tasks are using - * a page. - * - * Try to keep the most commonly accessed fields in single cache lines - * here (16 bytes or greater). This ordering should be particularly - * beneficial on 32-bit processors. - * - * The first line is data used in page cache lookup, the second line - * is used for linear searches (eg. clock algorithm scans). - * - * TODO: make this structure smaller, it could be as small as 32 bytes. - */ -typedef struct page { - struct list_head list; /* ->mapping has some page lists. */ - struct address_space *mapping; /* The inode (or ...) we belong to. */ - unsigned long index; /* Our offset within mapping. */ - struct page *next_hash; /* Next page sharing our hash bucket in - the pagecache hash table. */ - atomic_t count; /* Usage count, see below. */ - unsigned long flags; /* atomic flags, some possibly - updated asynchronously */ - struct list_head lru; /* Pageout list, eg. active_list; - protected by pagemap_lru_lock !! */ - struct page **pprev_hash; /* Complement to *next_hash. */ - struct buffer_head * buffers; /* Buffer maps us to a disk block. */ - - /* - * On machines where all RAM is mapped into kernel address space, - * we can simply calculate the virtual address. On machines with - * highmem some memory is mapped into kernel virtual memory - * dynamically, so we need a place to store that address. - * Note that this field could be 16 bits on x86 ... ;) - * - * Architectures with slow multiplication can define - * WANT_PAGE_VIRTUAL in asm/page.h - */ -#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) - void *virtual; /* Kernel virtual address (NULL if - not kmapped, ie. highmem) */ -#endif /* CONFIG_HIGMEM || WANT_PAGE_VIRTUAL */ -} mem_map_t; - -/* - * Methods to modify the page usage count. - * - * What counts for a page usage: - * - cache mapping (page->mapping) - * - disk mapping (page->buffers) - * - page mapped in a task's page tables, each mapping - * is counted separately - * - * Also, many kernel routines increase the page count before a critical - * routine so they can be sure the page doesn't go away from under them. - */ -#define get_page(p) atomic_inc(&(p)->count) -#define put_page(p) __free_page(p) -#define put_page_testzero(p) atomic_dec_and_test(&(p)->count) -#define page_count(p) atomic_read(&(p)->count) -#define set_page_count(p,v) atomic_set(&(p)->count, v) - -static inline struct page *nth_page(struct page *page, int n) -{ - return page + n; -} - -/* - * Various page->flags bits: - * - * PG_reserved is set for special pages, which can never be swapped - * out. Some of them might not even exist (eg empty_bad_page)... - * - * Multiple processes may "see" the same page. E.g. for untouched - * mappings of /dev/null, all processes see the same page full of - * zeroes, and text pages of executables and shared libraries have - * only one copy in memory, at most, normally. - * - * For the non-reserved pages, page->count denotes a reference count. - * page->count == 0 means the page is free. - * page->count == 1 means the page is used for exactly one purpose - * (e.g. a private data page of one process). - * - * A page may be used for kmalloc() or anyone else who does a - * __get_free_page(). In this case the page->count is at least 1, and - * all other fields are unused but should be 0 or NULL. The - * management of this page is the responsibility of the one who uses - * it. - * - * The other pages (we may call them "process pages") are completely - * managed by the Linux memory manager: I/O, buffers, swapping etc. - * The following discussion applies only to them. - * - * A page may belong to an inode's memory mapping. In this case, - * page->mapping is the pointer to the inode, and page->index is the - * file offset of the page, in units of PAGE_CACHE_SIZE. - * - * A page may have buffers allocated to it. In this case, - * page->buffers is a circular list of these buffer heads. Else, - * page->buffers == NULL. - * - * For pages belonging to inodes, the page->count is the number of - * attaches, plus 1 if buffers are allocated to the page, plus one - * for the page cache itself. - * - * All pages belonging to an inode are in these doubly linked lists: - * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages; - * using the page->list list_head. These fields are also used for - * freelist managemet (when page->count==0). - * - * There is also a hash table mapping (mapping,index) to the page - * in memory if present. The lists for this hash table use the fields - * page->next_hash and page->pprev_hash. - * - * All process pages can do I/O: - * - inode pages may need to be read from disk, - * - inode pages which have been modified and are MAP_SHARED may need - * to be written to disk, - * - private pages which have been modified may need to be swapped out - * to swap space and (later) to be read back into memory. - * During disk I/O, PG_locked is used. This bit is set before I/O - * and reset when I/O completes. page_waitqueue(page) is a wait queue of all - * tasks waiting for the I/O on this page to complete. - * PG_uptodate tells whether the page's contents is valid. - * When a read completes, the page becomes uptodate, unless a disk I/O - * error happened. - * - * For choosing which pages to swap out, inode pages carry a - * PG_referenced bit, which is set any time the system accesses - * that page through the (mapping,index) hash table. This referenced - * bit, together with the referenced bit in the page tables, is used - * to manipulate page->age and move the page across the active, - * inactive_dirty and inactive_clean lists. - * - * Note that the referenced bit, the page->lru list_head and the - * active, inactive_dirty and inactive_clean lists are protected by - * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit! - * - * PG_skip is used on sparc/sparc64 architectures to "skip" certain - * parts of the address space. - * - * PG_error is set to indicate that an I/O error occurred on this page. - * - * PG_arch_1 is an architecture specific page state bit. The generic - * code guarantees that this bit is cleared for a page when it first - * is entered into the page cache. - * - * PG_highmem pages are not permanently mapped into the kernel virtual - * address space, they need to be kmapped separately for doing IO on - * the pages. The struct page (these bits with information) are always - * mapped into kernel address space... - */ -#define PG_locked 0 /* Page is locked. Don't touch. */ -#define PG_error 1 -#define PG_referenced 2 -#define PG_uptodate 3 -#define PG_dirty 4 -#define PG_unused 5 -#define PG_lru 6 -#define PG_active 7 -#define PG_slab 8 -#define PG_skip 10 -#define PG_highmem 11 -#define PG_checked 12 /* kill me in 2.5.<early>. */ -#define PG_arch_1 13 -#define PG_reserved 14 -#define PG_launder 15 /* written out by VM pressure.. */ -#define PG_fs_1 16 /* Filesystem specific */ -#define PG_foreign 21 /* Page belongs to foreign allocator */ - -#ifndef arch_set_page_uptodate -#define arch_set_page_uptodate(page) -#endif - -/* Make it prettier to test the above... */ -#define UnlockPage(page) unlock_page(page) -#define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags) -#ifndef SetPageUptodate -#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) -#endif -#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) -#define PageDirty(page) test_bit(PG_dirty, &(page)->flags) -#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) -#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) -#define PageLocked(page) test_bit(PG_locked, &(page)->flags) -#define LockPage(page) set_bit(PG_locked, &(page)->flags) -#define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags) -#define PageChecked(page) test_bit(PG_checked, &(page)->flags) -#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) -#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) -#define PageLaunder(page) test_bit(PG_launder, &(page)->flags) -#define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags) -#define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags) -#define ClearPageArch1(page) clear_bit(PG_arch_1, &(page)->flags) - -/* A foreign page uses a custom destructor rather than the buddy allocator. */ -#ifdef CONFIG_FOREIGN_PAGES -#define PageForeign(page) test_bit(PG_foreign, &(page)->flags) -#define SetPageForeign(page, dtor) do { \ - set_bit(PG_foreign, &(page)->flags); \ - (page)->mapping = (void *)dtor; \ -} while (0) -#define ClearPageForeign(page) do { \ - clear_bit(PG_foreign, &(page)->flags); \ - (page)->mapping = NULL; \ -} while (0) -#define PageForeignDestructor(page) \ - ( (void (*) (struct page *)) (page)->mapping ) -#else -#define PageForeign(page) 0 -#define PageForeignDestructor(page) void -#endif - -/* - * The zone field is never updated after free_area_init_core() - * sets it, so none of the operations on it need to be atomic. - */ -#define NODE_SHIFT 4 -#define ZONE_SHIFT (BITS_PER_LONG - 8) - -struct zone_struct; -extern struct zone_struct *zone_table[]; - -static inline zone_t *page_zone(struct page *page) -{ - return zone_table[page->flags >> ZONE_SHIFT]; -} - -static inline void set_page_zone(struct page *page, unsigned long zone_num) -{ - page->flags &= ~(~0UL << ZONE_SHIFT); - page->flags |= zone_num << ZONE_SHIFT; -} - -/* - * In order to avoid #ifdefs within C code itself, we define - * set_page_address to a noop for non-highmem machines, where - * the field isn't useful. - * The same is true for page_address() in arch-dependent code. - */ -#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) - -#define set_page_address(page, address) \ - do { \ - (page)->virtual = (address); \ - } while(0) - -#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ -#define set_page_address(page, address) do { } while(0) -#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ - -/* - * Permanent address of a page. Obviously must never be - * called on a highmem page. - */ -#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) - -#define page_address(page) ((page)->virtual) - -#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ - -#define page_address(page) \ - __va( (((page) - page_zone(page)->zone_mem_map) << PAGE_SHIFT) \ - + page_zone(page)->zone_start_paddr) - -#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ - -extern void FASTCALL(set_page_dirty(struct page *)); - -/* - * The first mb is necessary to safely close the critical section opened by the - * TryLockPage(), the second mb is necessary to enforce ordering between - * the clear_bit and the read of the waitqueue (to avoid SMP races with a - * parallel wait_on_page). - */ -#define PageError(page) test_bit(PG_error, &(page)->flags) -#define SetPageError(page) set_bit(PG_error, &(page)->flags) -#define ClearPageError(page) clear_bit(PG_error, &(page)->flags) -#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) -#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) -#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) -#define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) -#define PageSlab(page) test_bit(PG_slab, &(page)->flags) -#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags) -#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags) -#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) - -#define PageActive(page) test_bit(PG_active, &(page)->flags) -#define SetPageActive(page) set_bit(PG_active, &(page)->flags) -#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) - -#define PageLRU(page) test_bit(PG_lru, &(page)->flags) -#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) -#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) - -#ifdef CONFIG_HIGHMEM -#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) -#else -#define PageHighMem(page) 0 /* needed to optimize away at compile time */ -#endif - -#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) -#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) - -/* - * Error return values for the *_nopage functions - */ -#define NOPAGE_SIGBUS (NULL) -#define NOPAGE_OOM ((struct page *) (-1)) - -/* The array of struct pages */ -extern mem_map_t * mem_map; - -/* - * There is only one page-allocator function, and two main namespaces to - * it. The alloc_page*() variants return 'struct page *' and as such - * can allocate highmem pages, the *get*page*() variants return - * virtual kernel addresses to the allocated page(s). - */ -extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order)); -extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); -extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); - -static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order) -{ - /* - * Gets optimized away by the compiler. - */ - if (order >= MAX_ORDER) - return NULL; - return _alloc_pages(gfp_mask, order); -} - -#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) - -extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order)); -extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask)); - -#define __get_free_page(gfp_mask) \ - __get_free_pages((gfp_mask),0) - -#define __get_dma_pages(gfp_mask, order) \ - __get_free_pages((gfp_mask) | GFP_DMA,(order)) - -/* - * The old interface name will be removed in 2.5: - */ -#define get_free_page get_zeroed_page - -/* - * There is only one 'core' page-freeing function. - */ -extern void FASTCALL(__free_pages(struct page *page, unsigned int order)); -extern void FASTCALL(free_pages(unsigned long addr, unsigned int order)); - -#define __free_page(page) __free_pages((page), 0) -#define free_page(addr) free_pages((addr),0) - -extern void show_free_areas(void); -extern void show_free_areas_node(pg_data_t *pgdat); - -extern void clear_page_tables(struct mm_struct *, unsigned long, int); - -extern int fail_writepage(struct page *); -struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused); -struct file *shmem_file_setup(char * name, loff_t size); -extern void shmem_lock(struct file * file, int lock); -extern int shmem_zero_setup(struct vm_area_struct *); - -extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size); -extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); -extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); -extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot); - -extern int vmtruncate(struct inode * inode, loff_t offset); -extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); -extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); -extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); -extern int make_pages_present(unsigned long addr, unsigned long end); -extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); -extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len); -extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len); -extern int ptrace_attach(struct task_struct *tsk); -extern int ptrace_detach(struct task_struct *, unsigned int); -extern void ptrace_disable(struct task_struct *); -extern int ptrace_check_attach(struct task_struct *task, int kill); - -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); - -/* - * On a two-level page table, this ends up being trivial. Thus the - * inlining and the symmetry break with pte_alloc() that does all - * of this out-of-line. - */ -static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) -{ - if (pgd_none(*pgd)) - return __pmd_alloc(mm, pgd, address); - return pmd_offset(pgd, address); -} - -extern int pgt_cache_water[2]; -extern int check_pgt_cache(void); - -extern void free_area_init(unsigned long * zones_size); -extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap, - unsigned long * zones_size, unsigned long zone_start_paddr, - unsigned long *zholes_size); -extern void mem_init(void); -extern void show_mem(void); -extern void si_meminfo(struct sysinfo * val); -extern void swapin_readahead(swp_entry_t); - -extern struct address_space swapper_space; -#define PageSwapCache(page) ((page)->mapping == &swapper_space) - -static inline int is_page_cache_freeable(struct page * page) -{ - return page_count(page) - !!page->buffers == 1; -} - -extern int FASTCALL(can_share_swap_page(struct page *)); -extern int FASTCALL(remove_exclusive_swap_page(struct page *)); - -extern void __free_pte(pte_t); - -/* mmap.c */ -extern void lock_vma_mappings(struct vm_area_struct *); -extern void unlock_vma_mappings(struct vm_area_struct *); -extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void build_mmap_rb(struct mm_struct *); -extern void exit_mmap(struct mm_struct *); - -extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - -extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long pgoff); - -static inline unsigned long do_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret = -EINVAL; - if ((offset + PAGE_ALIGN(len)) < offset) - goto out; - if (!(offset & ~PAGE_MASK)) - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -out: - return ret; -} - -extern int do_munmap(struct mm_struct *, unsigned long, size_t); - -extern unsigned long do_brk(unsigned long, unsigned long); - -static inline void __vma_unlink(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev) -{ - prev->vm_next = vma->vm_next; - rb_erase(&vma->vm_rb, &mm->mm_rb); - if (mm->mmap_cache == vma) - mm->mmap_cache = prev; -} - -static inline int can_vma_merge(struct vm_area_struct * vma, unsigned long vm_flags) -{ - if (!vma->vm_file && vma->vm_flags == vm_flags) - return 1; - else - return 0; -} - -struct zone_t; -/* filemap.c */ -extern void remove_inode_page(struct page *); -extern unsigned long page_unuse(struct page *); -extern void truncate_inode_pages(struct address_space *, loff_t); - -/* generic vm_area_ops exported for stackable file systems */ -extern int filemap_sync(struct vm_area_struct *, unsigned long, size_t, unsigned int); -extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int); - -/* - * GFP bitmasks.. - */ -/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */ -#define __GFP_DMA 0x01 -#define __GFP_HIGHMEM 0x02 - -/* Action modifiers - doesn't change the zoning */ -#define __GFP_WAIT 0x10 /* Can wait and reschedule? */ -#define __GFP_HIGH 0x20 /* Should access emergency pools? */ -#define __GFP_IO 0x40 /* Can start low memory physical IO? */ -#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ -#define __GFP_FS 0x100 /* Can call down to low-level FS? */ - -#define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) -#define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) -#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO) -#define GFP_ATOMIC (__GFP_HIGH) -#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) -#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM) -#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) -#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) -#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) - -/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some - platforms, used as appropriate on others */ - -#define GFP_DMA __GFP_DMA - -static inline unsigned int pf_gfp_mask(unsigned int gfp_mask) -{ - /* avoid all memory balancing I/O methods if this task cannot block on I/O */ - if (current->flags & PF_NOIO) - gfp_mask &= ~(__GFP_IO | __GFP_HIGHIO | __GFP_FS); - - return gfp_mask; -} - -/* vma is the first one with address < vma->vm_end, - * and even address < vma->vm_start. Have to extend vma. */ -static inline int expand_stack(struct vm_area_struct * vma, unsigned long address) -{ - unsigned long grow; - - /* - * vma->vm_start/vm_end cannot change under us because the caller - * is required to hold the mmap_sem in read mode. We need the - * page_table_lock lock to serialize against concurrent expand_stacks. - */ - address &= PAGE_MASK; - spin_lock(&vma->vm_mm->page_table_lock); - - /* already expanded while we were spinning? */ - if (vma->vm_start <= address) { - spin_unlock(&vma->vm_mm->page_table_lock); - return 0; - } - - grow = (vma->vm_start - address) >> PAGE_SHIFT; - if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || - ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { - spin_unlock(&vma->vm_mm->page_table_lock); - return -ENOMEM; - } - - if ((vma->vm_flags & VM_LOCKED) && - ((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_MEMLOCK].rlim_cur) { - spin_unlock(&vma->vm_mm->page_table_lock); - return -ENOMEM; - } - - - vma->vm_start = address; - vma->vm_pgoff -= grow; - vma->vm_mm->total_vm += grow; - if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; - spin_unlock(&vma->vm_mm->page_table_lock); - return 0; -} - -/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ -extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); -extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, - struct vm_area_struct **pprev); - -/* Look up the first VMA which intersects the interval start_addr..end_addr-1, - NULL if none. Assume start_addr < end_addr. */ -static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) -{ - struct vm_area_struct * vma = find_vma(mm,start_addr); - - if (vma && end_addr <= vma->vm_start) - vma = NULL; - return vma; -} - -extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); - -extern struct page * vmalloc_to_page(void *addr); - -#endif /* __KERNEL__ */ - -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/linux/sched.h --- a/linux-2.4-xen-sparse/include/linux/sched.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,971 +0,0 @@ -#ifndef _LINUX_SCHED_H -#define _LINUX_SCHED_H - -#include <asm/param.h> /* for HZ */ - -extern unsigned long event; - -#include <linux/config.h> -#include <linux/binfmts.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/times.h> -#include <linux/timex.h> -#include <linux/rbtree.h> - -#include <asm/system.h> -#include <asm/semaphore.h> -#include <asm/page.h> -#include <asm/ptrace.h> -#include <asm/mmu.h> - -#include <linux/smp.h> -#include <linux/tty.h> -#include <linux/sem.h> -#include <linux/signal.h> -#include <linux/securebits.h> -#include <linux/fs_struct.h> - -struct exec_domain; - -/* - * cloning flags: - */ -#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ -#define CLONE_VM 0x00000100 /* set if VM shared between processes */ -#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ -#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ -#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ -#define CLONE_PID 0x00001000 /* set if pid shared */ -#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ -#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ -#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ -#define CLONE_THREAD 0x00010000 /* Same thread group? */ -#define CLONE_NEWNS 0x00020000 /* New namespace group? */ - -#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) - -/* - * These are the constant used to fake the fixed-point load-average - * counting. Some notes: - * - 11 bit fractions expand to 22 bits by the multiplies: this gives - * a load-average precision of 10 bits integer + 11 bits fractional - * - if you want to count load-averages more often, you need more - * precision, or rounding will get you. With 2-second counting freq, - * the EXP_n values would be 1981, 2034 and 2043 if still using only - * 11 bit fractions. - */ -extern unsigned long avenrun[]; /* Load averages */ - -#define FSHIFT 11 /* nr of bits of precision */ -#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ -#define LOAD_FREQ (5*HZ) /* 5 sec intervals */ -#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ -#define EXP_5 2014 /* 1/exp(5sec/5min) */ -#define EXP_15 2037 /* 1/exp(5sec/15min) */ - -#define CALC_LOAD(load,exp,n) \ - load *= exp; \ - load += n*(FIXED_1-exp); \ - load >>= FSHIFT; - -#define CT_TO_SECS(x) ((x) / HZ) -#define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ) - -extern int nr_running, nr_threads; -extern int last_pid; - -#include <linux/fs.h> -#include <linux/time.h> -#include <linux/param.h> -#include <linux/resource.h> -#ifdef __KERNEL__ -#include <linux/timer.h> -#endif - -#include <asm/processor.h> - -#define TASK_RUNNING 0 -#define TASK_INTERRUPTIBLE 1 -#define TASK_UNINTERRUPTIBLE 2 -#define TASK_ZOMBIE 4 -#define TASK_STOPPED 8 - -#define __set_task_state(tsk, state_value) \ - do { (tsk)->state = (state_value); } while (0) -#define set_task_state(tsk, state_value) \ - set_mb((tsk)->state, (state_value)) - -#define __set_current_state(state_value) \ - do { current->state = (state_value); } while (0) -#define set_current_state(state_value) \ - set_mb(current->state, (state_value)) - -/* - * Scheduling policies - */ -#define SCHED_OTHER 0 -#define SCHED_FIFO 1 -#define SCHED_RR 2 - -/* - * This is an additional bit set when we want to - * yield the CPU for one re-schedule.. - */ -#define SCHED_YIELD 0x10 - -struct sched_param { - int sched_priority; -}; - -struct completion; - -#ifdef __KERNEL__ - -#include <linux/spinlock.h> - -/* - * This serializes "schedule()" and also protects - * the run-queue from deletions/modifications (but - * _adding_ to the beginning of the run-queue has - * a separate lock). - */ -extern rwlock_t tasklist_lock; -extern spinlock_t runqueue_lock; -extern spinlock_t mmlist_lock; - -extern void sched_init(void); -extern void init_idle(void); -extern void show_state(void); -extern void cpu_init (void); -extern void trap_init(void); -extern void update_process_times(int user); -#ifdef CONFIG_NO_IDLE_HZ -extern void update_process_times_us(int user, int system); -#endif -extern void update_one_process(struct task_struct *p, unsigned long user, - unsigned long system, int cpu); - -#define MAX_SCHEDULE_TIMEOUT LONG_MAX -extern signed long FASTCALL(schedule_timeout(signed long timeout)); -asmlinkage void schedule(void); - -extern int schedule_task(struct tq_struct *task); -extern void flush_scheduled_tasks(void); -extern int start_context_thread(void); -extern int current_is_keventd(void); - -#if CONFIG_SMP -extern void set_cpus_allowed(struct task_struct *p, unsigned long new_mask); -#else -# define set_cpus_allowed(p, new_mask) do { } while (0) -#endif - -/* - * The default fd array needs to be at least BITS_PER_LONG, - * as this is the granularity returned by copy_fdset(). - */ -#define NR_OPEN_DEFAULT BITS_PER_LONG - -struct namespace; -/* - * Open file table structure - */ -struct files_struct { - atomic_t count; - rwlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ - int max_fds; - int max_fdset; - int next_fd; - struct file ** fd; /* current fd array */ - fd_set *close_on_exec; - fd_set *open_fds; - fd_set close_on_exec_init; - fd_set open_fds_init; - struct file * fd_array[NR_OPEN_DEFAULT]; -}; - -#define INIT_FILES \ -{ \ - count: ATOMIC_INIT(1), \ - file_lock: RW_LOCK_UNLOCKED, \ - max_fds: NR_OPEN_DEFAULT, \ - max_fdset: __FD_SETSIZE, \ - next_fd: 0, \ - fd: &init_files.fd_array[0], \ - close_on_exec: &init_files.close_on_exec_init, \ - open_fds: &init_files.open_fds_init, \ - close_on_exec_init: { { 0, } }, \ - open_fds_init: { { 0, } }, \ - fd_array: { NULL, } \ -} - -/* Maximum number of active map areas.. This is a random (large) number */ -#define DEFAULT_MAX_MAP_COUNT (65536) - -extern int max_map_count; - -struct mm_struct { - struct vm_area_struct * mmap; /* list of VMAs */ - rb_root_t mm_rb; - struct vm_area_struct * mmap_cache; /* last find_vma result */ - pgd_t * pgd; - atomic_t mm_users; /* How many users with user space? */ - atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ - int map_count; /* number of VMAs */ - struct rw_semaphore mmap_sem; - spinlock_t page_table_lock; /* Protects task page tables and mm->rss */ - - struct list_head mmlist; /* List of all active mm's. These are globally strung - * together off init_mm.mmlist, and are protected - * by mmlist_lock - */ - - unsigned long start_code, end_code, start_data, end_data; - unsigned long start_brk, brk, start_stack; - unsigned long arg_start, arg_end, env_start, env_end; - unsigned long rss, total_vm, locked_vm; - unsigned long def_flags; - unsigned long cpu_vm_mask; - unsigned long swap_address; - - unsigned dumpable:1; - - /* Architecture-specific MM context */ - mm_context_t context; -}; - -extern int mmlist_nr; - -#define INIT_MM(name) \ -{ \ - mm_rb: RB_ROOT, \ - pgd: swapper_pg_dir, \ - mm_users: ATOMIC_INIT(2), \ - mm_count: ATOMIC_INIT(1), \ - mmap_sem: __RWSEM_INITIALIZER(name.mmap_sem), \ - page_table_lock: SPIN_LOCK_UNLOCKED, \ - mmlist: LIST_HEAD_INIT(name.mmlist), \ -} - -struct signal_struct { - atomic_t count; - struct k_sigaction action[_NSIG]; - spinlock_t siglock; -}; - - -#define INIT_SIGNALS { \ - count: ATOMIC_INIT(1), \ - action: { {{0,}}, }, \ - siglock: SPIN_LOCK_UNLOCKED \ -} - -/* - * Some day this will be a full-fledged user tracking system.. - */ -struct user_struct { - atomic_t __count; /* reference count */ - atomic_t processes; /* How many processes does this user have? */ - atomic_t files; /* How many open files does this user have? */ - - /* Hash table maintenance information */ - struct user_struct *next, **pprev; - uid_t uid; -}; - -#define get_current_user() ({ \ - struct user_struct *__tmp_user = current->user; \ - atomic_inc(&__tmp_user->__count); \ - __tmp_user; }) - -extern struct user_struct root_user; -#define INIT_USER (&root_user) - -struct task_struct { - /* - * offsets of these are hardcoded elsewhere - touch with care - */ - volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - unsigned long flags; /* per process flags, defined below */ - int sigpending; - mm_segment_t addr_limit; /* thread address space: - 0-0xBFFFFFFF for user-thead - 0-0xFFFFFFFF for kernel-thread - */ - struct exec_domain *exec_domain; - volatile long need_resched; - unsigned long ptrace; - - int lock_depth; /* Lock depth */ - -/* - * offset 32 begins here on 32-bit platforms. We keep - * all fields in a single cacheline that are needed for - * the goodness() loop in schedule(). - */ - long counter; - long nice; - unsigned long policy; - struct mm_struct *mm; - int processor; - /* - * cpus_runnable is ~0 if the process is not running on any - * CPU. It's (1 << cpu) if it's running on a CPU. This mask - * is updated under the runqueue lock. - * - * To determine whether a process might run on a CPU, this - * mask is AND-ed with cpus_allowed. - */ - unsigned long cpus_runnable, cpus_allowed; - /* - * (only the 'next' pointer fits into the cacheline, but - * that's just fine.) - */ - struct list_head run_list; - unsigned long sleep_time; - - struct task_struct *next_task, *prev_task; - struct mm_struct *active_mm; - struct list_head local_pages; - unsigned int allocation_order, nr_local_pages; - -/* task state */ - struct linux_binfmt *binfmt; - int exit_code, exit_signal; - int pdeath_signal; /* The signal sent when the parent dies */ - /* ??? */ - unsigned long personality; - int did_exec:1; - unsigned task_dumpable:1; - pid_t pid; - pid_t pgrp; - pid_t tty_old_pgrp; - pid_t session; - pid_t tgid; - /* boolean value for session group leader */ - int leader; - /* - * pointers to (original) parent process, youngest child, younger sibling, - * older sibling, respectively. (p->father can be replaced with - * p->p_pptr->pid) - */ - struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr; - struct list_head thread_group; - - /* PID hash table linkage. */ - struct task_struct *pidhash_next; - struct task_struct **pidhash_pprev; - - wait_queue_head_t wait_chldexit; /* for wait4() */ - struct completion *vfork_done; /* for vfork() */ - unsigned long rt_priority; - unsigned long it_real_value, it_prof_value, it_virt_value; - unsigned long it_real_incr, it_prof_incr, it_virt_incr; - struct timer_list real_timer; - struct tms times; - unsigned long start_time; - long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS]; -/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ - unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap; - int swappable:1; -/* process credentials */ - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - int ngroups; - gid_t groups[NGROUPS]; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted; - int keep_capabilities:1; - struct user_struct *user; -/* limits */ - struct rlimit rlim[RLIM_NLIMITS]; - unsigned short used_math; - char comm[16]; -/* file system info */ - int link_count, total_link_count; - struct tty_struct *tty; /* NULL if no tty */ - unsigned int locks; /* How many file locks are being held */ -/* ipc stuff */ - struct sem_undo *semundo; - struct sem_queue *semsleeping; -/* CPU-specific state of this task */ - struct thread_struct thread; -/* filesystem information */ - struct fs_struct *fs; -/* open file information */ - struct files_struct *files; -/* namespace */ - struct namespace *namespace; -/* signal handlers */ - spinlock_t sigmask_lock; /* Protects signal and blocked */ - struct signal_struct *sig; - - sigset_t blocked; - struct sigpending pending; - - unsigned long sas_ss_sp; - size_t sas_ss_size; - int (*notifier)(void *priv); - void *notifier_data; - sigset_t *notifier_mask; - -/* Thread group tracking */ - u32 parent_exec_id; - u32 self_exec_id; -/* Protection of (de-)allocation: mm, files, fs, tty */ - spinlock_t alloc_lock; - -/* journalling filesystem info */ - void *journal_info; -}; - -/* - * Per process flags - */ -#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ - /* Not implemented yet, only for 486*/ -#define PF_STARTING 0x00000002 /* being created */ -#define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ -#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ -#define PF_DUMPCORE 0x00000200 /* dumped core */ -#define PF_SIGNALED 0x00000400 /* killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_MEMDIE 0x00001000 /* Killed for out-of-memory */ -#define PF_FREE_PAGES 0x00002000 /* per process page freeing */ -#define PF_NOIO 0x00004000 /* avoid generating further I/O */ -#define PF_FSTRANS 0x00008000 /* inside a filesystem transaction */ - -#define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */ - -/* - * Ptrace flags - */ - -#define PT_PTRACED 0x00000001 -#define PT_TRACESYS 0x00000002 -#define PT_DTRACE 0x00000004 /* delayed trace (used on m68k, i386) */ -#define PT_TRACESYSGOOD 0x00000008 -#define PT_PTRACE_CAP 0x00000010 /* ptracer can follow suid-exec */ - -#define is_dumpable(tsk) ((tsk)->task_dumpable && (tsk)->mm && (tsk)->mm->dumpable) - -/* - * Limit the stack by to some sane default: root can always - * increase this limit if needed.. 8MB seems reasonable. - */ -#define _STK_LIM (8*1024*1024) - -#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */ -#define MAX_COUNTER (20*HZ/100) -#define DEF_NICE (0) - -extern void yield(void); - -/* - * The default (Linux) execution domain. - */ -extern struct exec_domain default_exec_domain; - -/* - * INIT_TASK is used to set up the first task table, touch at - * your own risk!. Base=0, limit=0x1fffff (=2MB) - */ -#define INIT_TASK(tsk) \ -{ \ - state: 0, \ - flags: 0, \ - sigpending: 0, \ - addr_limit: KERNEL_DS, \ - exec_domain: &default_exec_domain, \ - lock_depth: -1, \ - counter: DEF_COUNTER, \ - nice: DEF_NICE, \ - policy: SCHED_OTHER, \ - mm: NULL, \ - active_mm: &init_mm, \ - cpus_runnable: ~0UL, \ - cpus_allowed: ~0UL, \ - run_list: LIST_HEAD_INIT(tsk.run_list), \ - next_task: &tsk, \ - prev_task: &tsk, \ - p_opptr: &tsk, \ - p_pptr: &tsk, \ - thread_group: LIST_HEAD_INIT(tsk.thread_group), \ - wait_chldexit: __WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\ - real_timer: { \ - function: it_real_fn \ - }, \ - cap_effective: CAP_INIT_EFF_SET, \ - cap_inheritable: CAP_INIT_INH_SET, \ - cap_permitted: CAP_FULL_SET, \ - keep_capabilities: 0, \ - rlim: INIT_RLIMITS, \ - user: INIT_USER, \ - comm: "swapper", \ - thread: INIT_THREAD, \ - fs: &init_fs, \ - files: &init_files, \ - sigmask_lock: SPIN_LOCK_UNLOCKED, \ - sig: &init_signals, \ - pending: { NULL, &tsk.pending.head, {{0}}}, \ - blocked: {{0}}, \ - alloc_lock: SPIN_LOCK_UNLOCKED, \ - journal_info: NULL, \ -} - - -#ifndef INIT_TASK_SIZE -# define INIT_TASK_SIZE 2048*sizeof(long) -#endif - -union task_union { - struct task_struct task; - unsigned long stack[INIT_TASK_SIZE/sizeof(long)]; -}; - -extern union task_union init_task_union; - -extern struct mm_struct init_mm; -extern struct task_struct *init_tasks[NR_CPUS]; - -/* PID hashing. (shouldnt this be dynamic?) */ -#define PIDHASH_SZ (4096 >> 2) -extern struct task_struct *pidhash[PIDHASH_SZ]; - -#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) - -static inline void hash_pid(struct task_struct *p) -{ - struct task_struct **htable = &pidhash[pid_hashfn(p->pid)]; - - if((p->pidhash_next = *htable) != NULL) - (*htable)->pidhash_pprev = &p->pidhash_next; - *htable = p; - p->pidhash_pprev = htable; -} - -static inline void unhash_pid(struct task_struct *p) -{ - if(p->pidhash_next) - p->pidhash_next->pidhash_pprev = p->pidhash_pprev; - *p->pidhash_pprev = p->pidhash_next; -} - -static inline struct task_struct *find_task_by_pid(int pid) -{ - struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)]; - - for(p = *htable; p && p->pid != pid; p = p->pidhash_next) - ; - - return p; -} - -#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL) - -static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu) -{ - tsk->processor = cpu; - tsk->cpus_runnable = 1UL << cpu; -} - -static inline void task_release_cpu(struct task_struct *tsk) -{ - tsk->cpus_runnable = ~0UL; -} - -/* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); -extern void free_uid(struct user_struct *); -extern void switch_uid(struct user_struct *); - -#include <asm/current.h> - -extern unsigned long volatile jiffies; -extern unsigned long itimer_ticks; -extern unsigned long itimer_next; -extern struct timeval xtime; -extern void do_timer(struct pt_regs *); -#ifdef CONFIG_NO_IDLE_HZ -extern void do_timer_ticks(int ticks); -#endif - -extern unsigned int * prof_buffer; -extern unsigned long prof_len; -extern unsigned long prof_shift; - -#define CURRENT_TIME (xtime.tv_sec) - -extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr)); -extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)); -extern void FASTCALL(sleep_on(wait_queue_head_t *q)); -extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q, - signed long timeout)); -extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q)); -extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q, - signed long timeout)); -extern int FASTCALL(wake_up_process(struct task_struct * tsk)); - -#define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1) -#define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr) -#define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0) -#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1) -#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr) -#define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1) -#define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr) -#define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0) -#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1) -#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr) -asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru); - -extern int in_group_p(gid_t); -extern int in_egroup_p(gid_t); - -extern void proc_caches_init(void); -extern void flush_signals(struct task_struct *); -extern void flush_signal_handlers(struct task_struct *); -extern void sig_exit(int, int, struct siginfo *); -extern int dequeue_signal(sigset_t *, siginfo_t *); -extern void block_all_signals(int (*notifier)(void *priv), void *priv, - sigset_t *mask); -extern void unblock_all_signals(void); -extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sig_info(int, struct siginfo *, struct task_struct *); -extern int kill_pg_info(int, struct siginfo *, pid_t); -extern int kill_sl_info(int, struct siginfo *, pid_t); -extern int kill_proc_info(int, struct siginfo *, pid_t); -extern void notify_parent(struct task_struct *, int); -extern void do_notify_parent(struct task_struct *, int); -extern void force_sig(int, struct task_struct *); -extern int send_sig(int, struct task_struct *, int); -extern int kill_pg(pid_t, int, int); -extern int kill_sl(pid_t, int, int); -extern int kill_proc(pid_t, int, int); -extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *); -extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long); - -static inline int signal_pending(struct task_struct *p) -{ - return (p->sigpending != 0); -} - -/* - * Re-calculate pending state from the set of locally pending - * signals, globally pending signals, and blocked signals. - */ -static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) -{ - unsigned long ready; - long i; - - switch (_NSIG_WORDS) { - default: - for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;) - ready |= signal->sig[i] &~ blocked->sig[i]; - break; - - case 4: ready = signal->sig[3] &~ blocked->sig[3]; - ready |= signal->sig[2] &~ blocked->sig[2]; - ready |= signal->sig[1] &~ blocked->sig[1]; - ready |= signal->sig[0] &~ blocked->sig[0]; - break; - - case 2: ready = signal->sig[1] &~ blocked->sig[1]; - ready |= signal->sig[0] &~ blocked->sig[0]; - break; - - case 1: ready = signal->sig[0] &~ blocked->sig[0]; - } - return ready != 0; -} - -/* Reevaluate whether the task has signals pending delivery. - This is required every time the blocked sigset_t changes. - All callers should have t->sigmask_lock. */ - -static inline void recalc_sigpending(struct task_struct *t) -{ - t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked); -} - -/* True if we are on the alternate signal stack. */ - -static inline int on_sig_stack(unsigned long sp) -{ - return (sp - current->sas_ss_sp < current->sas_ss_size); -} - -static inline int sas_ss_flags(unsigned long sp) -{ - return (current->sas_ss_size == 0 ? SS_DISABLE - : on_sig_stack(sp) ? SS_ONSTACK : 0); -} - -extern int request_irq(unsigned int, - void (*handler)(int, void *, struct pt_regs *), - unsigned long, const char *, void *); -extern void free_irq(unsigned int, void *); - -/* - * This has now become a routine instead of a macro, it sets a flag if - * it returns true (to do BSD-style accounting where the process is flagged - * if it uses root privs). The implication of this is that you should do - * normal permissions checks first, and check suser() last. - * - * [Dec 1997 -- Chris Evans] - * For correctness, the above considerations need to be extended to - * fsuser(). This is done, along with moving fsuser() checks to be - * last. - * - * These will be removed, but in the mean time, when the SECURE_NOROOT - * flag is set, uids don't grant privilege. - */ -static inline int suser(void) -{ - if (!issecure(SECURE_NOROOT) && current->euid == 0) { - current->flags |= PF_SUPERPRIV; - return 1; - } - return 0; -} - -static inline int fsuser(void) -{ - if (!issecure(SECURE_NOROOT) && current->fsuid == 0) { - current->flags |= PF_SUPERPRIV; - return 1; - } - return 0; -} - -/* - * capable() checks for a particular capability. - * New privilege checks should use this interface, rather than suser() or - * fsuser(). See include/linux/capability.h for defined capabilities. - */ - -static inline int capable(int cap) -{ -#if 1 /* ok now */ - if (cap_raised(current->cap_effective, cap)) -#else - if (cap_is_fs_cap(cap) ? current->fsuid == 0 : current->euid == 0) -#endif - { - current->flags |= PF_SUPERPRIV; - return 1; - } - return 0; -} - -/* - * Routines for handling mm_structs - */ -extern struct mm_struct * mm_alloc(void); - -extern struct mm_struct * start_lazy_tlb(void); -extern void end_lazy_tlb(struct mm_struct *mm); - -/* mmdrop drops the mm and the page tables */ -extern void FASTCALL(__mmdrop(struct mm_struct *)); -static inline void mmdrop(struct mm_struct * mm) -{ - if (atomic_dec_and_test(&mm->mm_count)) - __mmdrop(mm); -} - -/* mmput gets rid of the mappings and all user-space */ -extern void mmput(struct mm_struct *); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(void); - -/* - * Routines for handling the fd arrays - */ -extern struct file ** alloc_fd_array(int); -extern int expand_fd_array(struct files_struct *, int nr); -extern void free_fd_array(struct file **, int); - -extern fd_set *alloc_fdset(int); -extern int expand_fdset(struct files_struct *, int nr); -extern void free_fdset(fd_set *, int); - -extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); -extern void flush_thread(void); -extern void exit_thread(void); - -extern void exit_mm(struct task_struct *); -extern void exit_files(struct task_struct *); -extern void exit_sighand(struct task_struct *); - -extern void reparent_to_init(void); -extern void daemonize(void); - -extern int do_execve(char *, char **, char **, struct pt_regs *); -extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long); - -extern void set_task_comm(struct task_struct *tsk, char *from); -extern void get_task_comm(char *to, struct task_struct *tsk); - -extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); -extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)); -extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); - -extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); - -#define __wait_event(wq, condition) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - schedule(); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_event(wq, condition) \ -do { \ - if (condition) \ - break; \ - __wait_event(wq, condition); \ -} while (0) - -#define __wait_event_interruptible(wq, condition, ret) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (!signal_pending(current)) { \ - schedule(); \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - break; \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_event_interruptible(wq, condition) \ -({ \ - int __ret = 0; \ - if (!(condition)) \ - __wait_event_interruptible(wq, condition, __ret); \ - __ret; \ -}) - -#define REMOVE_LINKS(p) do { \ - (p)->next_task->prev_task = (p)->prev_task; \ - (p)->prev_task->next_task = (p)->next_task; \ - if ((p)->p_osptr) \ - (p)->p_osptr->p_ysptr = (p)->p_ysptr; \ - if ((p)->p_ysptr) \ - (p)->p_ysptr->p_osptr = (p)->p_osptr; \ - else \ - (p)->p_pptr->p_cptr = (p)->p_osptr; \ - } while (0) - -#define SET_LINKS(p) do { \ - (p)->next_task = &init_task; \ - (p)->prev_task = init_task.prev_task; \ - init_task.prev_task->next_task = (p); \ - init_task.prev_task = (p); \ - (p)->p_ysptr = NULL; \ - if (((p)->p_osptr = (p)->p_pptr->p_cptr) != NULL) \ - (p)->p_osptr->p_ysptr = p; \ - (p)->p_pptr->p_cptr = p; \ - } while (0) - -#define for_each_task(p) \ - for (p = &init_task ; (p = p->next_task) != &init_task ; ) - -#define for_each_thread(task) \ - for (task = next_thread(current) ; task != current ; task = next_thread(task)) - -#define next_thread(p) \ - list_entry((p)->thread_group.next, struct task_struct, thread_group) - -#define thread_group_leader(p) (p->pid == p->tgid) - -static inline void del_from_runqueue(struct task_struct * p) -{ - nr_running--; - p->sleep_time = jiffies; - list_del(&p->run_list); - p->run_list.next = NULL; -} - -static inline int task_on_runqueue(struct task_struct *p) -{ - return (p->run_list.next != NULL); -} - -static inline void unhash_process(struct task_struct *p) -{ - if (task_on_runqueue(p)) - out_of_line_bug(); - write_lock_irq(&tasklist_lock); - nr_threads--; - unhash_pid(p); - REMOVE_LINKS(p); - list_del(&p->thread_group); - write_unlock_irq(&tasklist_lock); -} - -/* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */ -static inline void task_lock(struct task_struct *p) -{ - spin_lock(&p->alloc_lock); -} - -static inline void task_unlock(struct task_struct *p) -{ - spin_unlock(&p->alloc_lock); -} - -/* write full pathname into buffer and return start of pathname */ -static inline char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, - char *buf, int buflen) -{ - char *res; - struct vfsmount *rootmnt; - struct dentry *root; - read_lock(&current->fs->lock); - rootmnt = mntget(current->fs->rootmnt); - root = dget(current->fs->root); - read_unlock(&current->fs->lock); - spin_lock(&dcache_lock); - res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); - spin_unlock(&dcache_lock); - dput(root); - mntput(rootmnt); - return res; -} - -static inline int need_resched(void) -{ - return (unlikely(current->need_resched)); -} - -extern void __cond_resched(void); -static inline void cond_resched(void) -{ - if (need_resched()) - __cond_resched(); -} - -#endif /* __KERNEL__ */ -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/linux/skbuff.h --- a/linux-2.4-xen-sparse/include/linux/skbuff.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,1181 +0,0 @@ -/* - * Definitions for the 'struct sk_buff' memory handlers. - * - * Authors: - * Alan Cox, <gw4pts@xxxxxxxxxxxxxxx> - * Florian La Roche, <rzsfl@xxxxxxxxxxxx> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_SKBUFF_H -#define _LINUX_SKBUFF_H - -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/time.h> -#include <linux/cache.h> - -#include <asm/atomic.h> -#include <asm/types.h> -#include <linux/spinlock.h> -#include <linux/mm.h> -#include <linux/highmem.h> - -#define HAVE_ALLOC_SKB /* For the drivers to know */ -#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ -#define SLAB_SKB /* Slabified skbuffs */ - -#define CHECKSUM_NONE 0 -#define CHECKSUM_HW 1 -#define CHECKSUM_UNNECESSARY 2 - -#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) -#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) -#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) -#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) - -/* A. Checksumming of received packets by device. - * - * NONE: device failed to checksum this packet. - * skb->csum is undefined. - * - * UNNECESSARY: device parsed packet and wouldbe verified checksum. - * skb->csum is undefined. - * It is bad option, but, unfortunately, many of vendors do this. - * Apparently with secret goal to sell you new device, when you - * will add new protocol to your host. F.e. IPv6. 8) - * - * HW: the most generic way. Device supplied checksum of _all_ - * the packet as seen by netif_rx in skb->csum. - * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use HW, - * not UNNECESSARY. - * - * B. Checksumming on output. - * - * NONE: skb is checksummed by protocol or csum is not required. - * - * HW: device is required to csum packet as seen by hard_start_xmit - * from skb->h.raw to the end and to record the checksum - * at skb->h.raw+skb->csum. - * - * Device must show its capabilities in dev->features, set - * at device setup time. - * NETIF_F_HW_CSUM - it is clever device, it is able to checksum - * everything. - * NETIF_F_NO_CSUM - loopback or reliable single hop media. - * NETIF_F_IP_CSUM - device is dumb. It is able to csum only - * TCP/UDP over IPv4. Sigh. Vendors like this - * way by an unknown reason. Though, see comment above - * about CHECKSUM_UNNECESSARY. 8) - * - * Any questions? No questions, good. --ANK - */ - -#ifdef __i386__ -#define NET_CALLER(arg) (*(((void**)&arg)-1)) -#else -#define NET_CALLER(arg) __builtin_return_address(0) -#endif - -#ifdef CONFIG_NETFILTER -struct nf_conntrack { - atomic_t use; - void (*destroy)(struct nf_conntrack *); -}; - -struct nf_ct_info { - struct nf_conntrack *master; -}; -#endif - -struct sk_buff_head { - /* These two members must be first. */ - struct sk_buff * next; - struct sk_buff * prev; - - __u32 qlen; - spinlock_t lock; -}; - -struct sk_buff; - -#define MAX_SKB_FRAGS 6 - -typedef struct skb_frag_struct skb_frag_t; - -struct skb_frag_struct -{ - struct page *page; - __u16 page_offset; - __u16 size; -}; - -/* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ -struct skb_shared_info { - atomic_t dataref; - unsigned int nr_frags; - struct sk_buff *frag_list; - skb_frag_t frags[MAX_SKB_FRAGS]; -}; - -struct sk_buff { - /* These two members must be first. */ - struct sk_buff * next; /* Next buffer in list */ - struct sk_buff * prev; /* Previous buffer in list */ - - struct sk_buff_head * list; /* List we are on */ - struct sock *sk; /* Socket we are owned by */ - struct timeval stamp; /* Time we arrived */ - struct net_device *dev; /* Device we arrived on/are leaving by */ - struct net_device *real_dev; /* For support of point to point protocols - (e.g. 802.3ad) over bonding, we must save the - physical device that got the packet before - replacing skb->dev with the virtual device. */ - - /* Transport layer header */ - union - { - struct tcphdr *th; - struct udphdr *uh; - struct icmphdr *icmph; - struct igmphdr *igmph; - struct iphdr *ipiph; - struct spxhdr *spxh; - unsigned char *raw; - } h; - - /* Network layer header */ - union - { - struct iphdr *iph; - struct ipv6hdr *ipv6h; - struct arphdr *arph; - struct ipxhdr *ipxh; - unsigned char *raw; - } nh; - - /* Link layer header */ - union - { - struct ethhdr *ethernet; - unsigned char *raw; - } mac; - - struct dst_entry *dst; - - /* - * This is the control buffer. It is free to use for every - * layer. Please put your private variables there. If you - * want to keep them across layers you have to do a skb_clone() - * first. This is owned by whoever has the skb queued ATM. - */ - char cb[48]; - - unsigned int len; /* Length of actual data */ - unsigned int data_len; - unsigned int csum; /* Checksum */ - unsigned char __unused, /* Dead field, may be reused */ - cloned, /* head may be cloned (check refcnt to be sure). */ - pkt_type, /* Packet class */ - ip_summed; /* Driver fed us an IP checksum */ - __u32 priority; /* Packet queueing priority */ - atomic_t users; /* User count - see datagram.c,tcp.c */ - unsigned short protocol; /* Packet protocol from driver. */ - unsigned short security; /* Security level of packet */ - unsigned int truesize; /* Buffer size */ - - unsigned char *head; /* Head of buffer */ - unsigned char *data; /* Data head pointer */ - unsigned char *tail; /* Tail pointer */ - unsigned char *end; /* End pointer */ - - void (*destructor)(struct sk_buff *); /* Destruct function */ -#ifdef CONFIG_NETFILTER - /* Can be used for communication between hooks. */ - unsigned long nfmark; - /* Cache info */ - __u32 nfcache; - /* Associated connection, if any */ - struct nf_ct_info *nfct; -#ifdef CONFIG_NETFILTER_DEBUG - unsigned int nf_debug; -#endif -#endif /*CONFIG_NETFILTER*/ - -#if defined(CONFIG_HIPPI) - union{ - __u32 ifield; - } private; -#endif - -#ifdef CONFIG_NET_SCHED - __u32 tc_index; /* traffic control index */ -#endif -}; - -#ifdef __KERNEL__ -/* - * Handling routines are only of interest to the kernel - */ -#include <linux/slab.h> - -#include <asm/system.h> - -extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff * alloc_skb(unsigned int size, int priority); -extern struct sk_buff * alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, int priority); -extern void kfree_skbmem(struct sk_buff *skb); -extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); -extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); -extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); -extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); -extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); -extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, - int newheadroom, - int newtailroom, - int priority); -extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); -#define dev_kfree_skb(a) kfree_skb(a) -extern void skb_over_panic(struct sk_buff *skb, int len, void *here); -extern void skb_under_panic(struct sk_buff *skb, int len, void *here); - -/* Internal */ -#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) - -/** - * skb_queue_empty - check if a queue is empty - * @list: queue head - * - * Returns true if the queue is empty, false otherwise. - */ - -static inline int skb_queue_empty(struct sk_buff_head *list) -{ - return (list->next == (struct sk_buff *) list); -} - -/** - * skb_get - reference buffer - * @skb: buffer to reference - * - * Makes another reference to a socket buffer and returns a pointer - * to the buffer. - */ - -static inline struct sk_buff *skb_get(struct sk_buff *skb) -{ - atomic_inc(&skb->users); - return skb; -} - -/* - * If users==1, we are the only owner and are can avoid redundant - * atomic change. - */ - -/** - * kfree_skb - free an sk_buff - * @skb: buffer to free - * - * Drop a reference to the buffer and free it if the usage count has - * hit zero. - */ - -static inline void kfree_skb(struct sk_buff *skb) -{ - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) - return; - __kfree_skb(skb); -} - -/** - * skb_cloned - is the buffer a clone - * @skb: buffer to check - * - * Returns true if the buffer was generated with skb_clone() and is - * one of multiple shared copies of the buffer. Cloned buffers are - * shared data so must not be written to under normal circumstances. - */ - -static inline int skb_cloned(struct sk_buff *skb) -{ - return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; -} - -/** - * skb_shared - is the buffer shared - * @skb: buffer to check - * - * Returns true if more than one person has a reference to this - * buffer. - */ - -static inline int skb_shared(struct sk_buff *skb) -{ - return (atomic_read(&skb->users) != 1); -} - -/** - * skb_share_check - check if buffer is shared and if so clone it - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the buffer is shared the buffer is cloned and the old copy - * drops a reference. A new clone with a single reference is returned. - * If the buffer is not shared the original buffer is returned. When - * being called from interrupt status or with spinlocks held pri must - * be GFP_ATOMIC. - * - * NULL is returned on a memory allocation failure. - */ - -static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) -{ - if (skb_shared(skb)) { - struct sk_buff *nskb; - nskb = skb_clone(skb, pri); - kfree_skb(skb); - return nskb; - } - return skb; -} - - -/* - * Copy shared buffers into a new sk_buff. We effectively do COW on - * packets to handle cases where we have a local reader and forward - * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. - */ - -/** - * skb_unshare - make a copy of a shared buffer - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the socket buffer is a clone then this function creates a new - * copy of the data, drops a reference count on the old copy and returns - * the new copy with the reference count at 1. If the buffer is not a clone - * the original buffer is returned. When called with a spinlock held or - * from interrupt state @pri must be %GFP_ATOMIC - * - * %NULL is returned on a memory allocation failure. - */ - -static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) -{ - struct sk_buff *nskb; - if(!skb_cloned(skb)) - return skb; - nskb=skb_copy(skb, pri); - kfree_skb(skb); /* Free our shared copy */ - return nskb; -} - -/** - * skb_peek - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the head element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ - -static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->next; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; -} - -/** - * skb_peek_tail - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the tail element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ - -static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->prev; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; -} - -/** - * skb_queue_len - get queue length - * @list_: list to measure - * - * Return the length of an &sk_buff queue. - */ - -static inline __u32 skb_queue_len(struct sk_buff_head *list_) -{ - return(list_->qlen); -} - -static inline void skb_queue_head_init(struct sk_buff_head *list) -{ - spin_lock_init(&list->lock); - list->prev = (struct sk_buff *)list; - list->next = (struct sk_buff *)list; - list->qlen = 0; -} - -/* - * Insert an sk_buff at the start of a list. - * - * The "__skb_xxxx()" functions are the non-atomic ones that - * can only be called with interrupts disabled. - */ - -/** - * __skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) -{ - struct sk_buff *prev, *next; - - newsk->list = list; - list->qlen++; - prev = (struct sk_buff *)list; - next = prev->next; - newsk->next = next; - newsk->prev = prev; - next->prev = newsk; - prev->next = newsk; -} - - -/** - * skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_head(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * __skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the end of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ - - -static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -{ - struct sk_buff *prev, *next; - - newsk->list = list; - list->qlen++; - next = (struct sk_buff *)list; - prev = next->prev; - newsk->next = next; - newsk->prev = prev; - next->prev = newsk; - prev->next = newsk; -} - -/** - * skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the tail of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_tail(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * __skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. This function does not take any locks - * so must be used with appropriate locks held only. The head item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) -{ - struct sk_buff *next, *prev, *result; - - prev = (struct sk_buff *) list; - next = prev->next; - result = NULL; - if (next != prev) { - result = next; - next = next->next; - list->qlen--; - next->prev = prev; - prev->next = next; - result->next = NULL; - result->prev = NULL; - result->list = NULL; - } - return result; -} - -/** - * skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The head item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -/* - * Insert a packet on a list. - */ - -static inline void __skb_insert(struct sk_buff *newsk, - struct sk_buff * prev, struct sk_buff *next, - struct sk_buff_head * list) -{ - newsk->next = next; - newsk->prev = prev; - next->prev = newsk; - prev->next = newsk; - newsk->list = list; - list->qlen++; -} - -/** - * skb_insert - insert a buffer - * @old: buffer to insert before - * @newsk: buffer to insert - * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); -} - -/* - * Place a packet after a given packet in a list. - */ - -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) -{ - __skb_insert(newsk, old, old->next, old->list); -} - -/** - * skb_append - append a buffer - * @old: buffer to insert after - * @newsk: buffer to insert - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls. - * A buffer cannot be placed on two lists at the same time. - */ - - -static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); -} - -/* - * remove sk_buff from list. _Must_ be called atomically, and with - * the list known.. - */ - -static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) -{ - struct sk_buff * next, * prev; - - list->qlen--; - next = skb->next; - prev = skb->prev; - skb->next = NULL; - skb->prev = NULL; - skb->list = NULL; - next->prev = prev; - prev->next = next; -} - -/** - * skb_unlink - remove a buffer from a list - * @skb: buffer to remove - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls - * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. - */ - -static inline void skb_unlink(struct sk_buff *skb) -{ - struct sk_buff_head *list = skb->list; - - if(list) { - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - if(skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } -} - -/* XXX: more streamlined implementation */ - -/** - * __skb_dequeue_tail - remove from the tail of the queue - * @list: list to dequeue from - * - * Remove the tail of the list. This function does not take any locks - * so must be used with appropriate locks held only. The tail item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek_tail(list); - if (skb) - __skb_unlink(skb, list); - return skb; -} - -/** - * skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The tail item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue_tail(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -static inline int skb_is_nonlinear(const struct sk_buff *skb) -{ - return skb->data_len; -} - -static inline unsigned int skb_headlen(const struct sk_buff *skb) -{ - return skb->len - skb->data_len; -} - -#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0) -#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0) -#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0) - -/* - * Add data to an sk_buff - */ - -static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp=skb->tail; - SKB_LINEAR_ASSERT(skb); - skb->tail+=len; - skb->len+=len; - return tmp; -} - -/** - * skb_put - add data to a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer. If this would - * exceed the total buffer size the kernel will panic. A pointer to the - * first byte of the extra data is returned. - */ - -static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp=skb->tail; - SKB_LINEAR_ASSERT(skb); - skb->tail+=len; - skb->len+=len; - if(skb->tail>skb->end) { - skb_over_panic(skb, len, current_text_addr()); - } - return tmp; -} - -static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data-=len; - skb->len+=len; - return skb->data; -} - -/** - * skb_push - add data to the start of a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer at the buffer - * start. If this would exceed the total buffer headroom the kernel will - * panic. A pointer to the first byte of the extra data is returned. - */ - -static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data-=len; - skb->len+=len; - if(skb->data<skb->head) { - skb_under_panic(skb, len, current_text_addr()); - } - return skb->data; -} - -static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) -{ - skb->len-=len; - if (skb->len < skb->data_len) - out_of_line_bug(); - return skb->data+=len; -} - -/** - * skb_pull - remove data from the start of a buffer - * @skb: buffer to use - * @len: amount of data to remove - * - * This function removes data from the start of a buffer, returning - * the memory to the headroom. A pointer to the next data in the buffer - * is returned. Once the data has been pulled future pushes will overwrite - * the old data. - */ - -static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb->len) - return NULL; - return __skb_pull(skb,len); -} - -extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); - -static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb_headlen(skb) && - __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) - return NULL; - skb->len -= len; - return skb->data += len; -} - -static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb->len) - return NULL; - return __pskb_pull(skb,len); -} - -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) -{ - if (len <= skb_headlen(skb)) - return 1; - if (len > skb->len) - return 0; - return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); -} - -/** - * skb_headroom - bytes at buffer head - * @skb: buffer to check - * - * Return the number of bytes of free space at the head of an &sk_buff. - */ - -static inline int skb_headroom(const struct sk_buff *skb) -{ - return skb->data-skb->head; -} - -/** - * skb_tailroom - bytes at buffer end - * @skb: buffer to check - * - * Return the number of bytes of free space at the tail of an sk_buff - */ - -static inline int skb_tailroom(const struct sk_buff *skb) -{ - return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; -} - -/** - * skb_reserve - adjust headroom - * @skb: buffer to alter - * @len: bytes to move - * - * Increase the headroom of an empty &sk_buff by reducing the tail - * room. This is only allowed for an empty buffer. - */ - -static inline void skb_reserve(struct sk_buff *skb, unsigned int len) -{ - skb->data+=len; - skb->tail+=len; -} - -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); - -static inline void __skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - } else { - ___pskb_trim(skb, len, 0); - } -} - -/** - * skb_trim - remove end from a buffer - * @skb: buffer to alter - * @len: new length - * - * Cut the length of a buffer down by removing data from the tail. If - * the buffer is already under the length specified it is not modified. - */ - -static inline void skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (skb->len > len) { - __skb_trim(skb, len); - } -} - - -static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) -{ - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - return 0; - } else { - return ___pskb_trim(skb, len, 1); - } -} - -static inline int pskb_trim(struct sk_buff *skb, unsigned int len) -{ - if (len < skb->len) - return __pskb_trim(skb, len); - return 0; -} - -/** - * skb_orphan - orphan a buffer - * @skb: buffer to orphan - * - * If a buffer currently has an owner then we call the owner's - * destructor function and make the @skb unowned. The buffer continues - * to exist but is no longer charged to its former owner. - */ - - -static inline void skb_orphan(struct sk_buff *skb) -{ - if (skb->destructor) - skb->destructor(skb); - skb->destructor = NULL; - skb->sk = NULL; -} - -/** - * skb_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function takes the list - * lock and is atomic with respect to other list locking functions. - */ - - -static inline void skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb=skb_dequeue(list))!=NULL) - kfree_skb(skb); -} - -/** - * __skb_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function does not take the - * list lock and the caller must hold the relevant locks to use it. - */ - - -static inline void __skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb=__skb_dequeue(list))!=NULL) - kfree_skb(skb); -} - -/** - * __dev_alloc_skb - allocate an skbuff for sending - * @length: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned in there is no free memory. - */ -#ifndef CONFIG_XEN -static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - int gfp_mask) -{ - struct sk_buff *skb = alloc_skb(length+16, gfp_mask); - if (skb) - skb_reserve(skb,16); - return skb; -} -#else -extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask); -#endif - -/** - * dev_alloc_skb - allocate an skbuff for sending - * @length: length to allocate - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned in there is no free memory. Although this function - * allocates memory it can be called from an interrupt. - */ - -static inline struct sk_buff *dev_alloc_skb(unsigned int length) -{ - return __dev_alloc_skb(length, GFP_ATOMIC); -} - -/** - * skb_cow - copy header of skb when it is required - * @skb: buffer to cow - * @headroom: needed headroom - * - * If the skb passed lacks sufficient headroom or its data part - * is shared, data is reallocated. If reallocation fails, an error - * is returned and original skb is not changed. - * - * The result is skb with writable area skb->head...skb->tail - * and at least @headroom of space at head. - */ - -static inline int -skb_cow(struct sk_buff *skb, unsigned int headroom) -{ - int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); - - if (delta < 0) - delta = 0; - - if (delta || skb_cloned(skb)) - return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); - return 0; -} - -/** - * skb_padto - pad an skbuff up to a minimal size - * @skb: buffer to pad - * @len: minimal length - * - * Pads up a buffer to ensure the trailing bytes exist and are - * blanked. If the buffer already contains sufficient data it - * is untouched. Returns the buffer, which may be a replacement - * for the original, or NULL for out of memory - in which case - * the original buffer is still freed. - */ - -static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) -{ - unsigned int size = skb->len; - if(likely(size >= len)) - return skb; - return skb_pad(skb, len-size); -} - -/** - * skb_linearize - convert paged skb to linear one - * @skb: buffer to linarize - * @gfp: allocation mode - * - * If there is no free memory -ENOMEM is returned, otherwise zero - * is returned and the old skb data released. */ -int skb_linearize(struct sk_buff *skb, int gfp); - -static inline void *kmap_skb_frag(const skb_frag_t *frag) -{ -#ifdef CONFIG_HIGHMEM - if (in_irq()) - out_of_line_bug(); - - local_bh_disable(); -#endif - return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); -} - -static inline void kunmap_skb_frag(void *vaddr) -{ - kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); -#ifdef CONFIG_HIGHMEM - local_bh_enable(); -#endif -} - -#define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next; \ - (skb != (struct sk_buff *)(queue)); \ - skb=skb->next) - - -extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); -extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); -extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); -extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size); -extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); -extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov); -extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); - -extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); -extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); -extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); -extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); - -extern void skb_init(void); -extern void skb_add_mtu(int mtu); - -#ifdef CONFIG_NETFILTER -static inline void -nf_conntrack_put(struct nf_ct_info *nfct) -{ - if (nfct && atomic_dec_and_test(&nfct->master->use)) - nfct->master->destroy(nfct->master); -} -static inline void -nf_conntrack_get(struct nf_ct_info *nfct) -{ - if (nfct) - atomic_inc(&nfct->master->use); -} -static inline void -nf_reset(struct sk_buff *skb) -{ - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif -} -#else /* CONFIG_NETFILTER */ -static inline void nf_reset(struct sk_buff *skb) {} -#endif /* CONFIG_NETFILTER */ - -#endif /* __KERNEL__ */ -#endif /* _LINUX_SKBUFF_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/include/linux/timer.h --- a/linux-2.4-xen-sparse/include/linux/timer.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,77 +0,0 @@ -#ifndef _LINUX_TIMER_H -#define _LINUX_TIMER_H - -#include <linux/config.h> -#include <linux/list.h> - -/* - * In Linux 2.4, static timers have been removed from the kernel. - * Timers may be dynamically created and destroyed, and should be initialized - * by a call to init_timer() upon creation. - * - * The "data" field enables use of a common timeout function for several - * timeouts. You can use this field to distinguish between the different - * invocations. - */ -struct timer_list { - struct list_head list; - unsigned long expires; - unsigned long data; - void (*function)(unsigned long); -}; - -extern void add_timer(struct timer_list * timer); -extern int del_timer(struct timer_list * timer); -#ifdef CONFIG_NO_IDLE_HZ -extern struct timer_list *next_timer_event(void); -#endif - -#ifdef CONFIG_SMP -extern int del_timer_sync(struct timer_list * timer); -extern void sync_timers(void); -#else -#define del_timer_sync(t) del_timer(t) -#define sync_timers() do { } while (0) -#endif - -/* - * mod_timer is a more efficient way to update the expire field of an - * active timer (if the timer is inactive it will be activated) - * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a). - * If the timer is known to be not pending (ie, in the handler), mod_timer - * is less efficient than a->expires = b; add_timer(a). - */ -int mod_timer(struct timer_list *timer, unsigned long expires); - -extern void it_real_fn(unsigned long); - -static inline void init_timer(struct timer_list * timer) -{ - timer->list.next = timer->list.prev = NULL; -} - -static inline int timer_pending (const struct timer_list * timer) -{ - return timer->list.next != NULL; -} - -/* - * These inlines deal with timer wrapping correctly. You are - * strongly encouraged to use them - * 1. Because people otherwise forget - * 2. Because if the timer wrap changes in future you wont have to - * alter your driver code. - * - * time_after(a,b) returns true if the time a is after time b. - * - * Do this with "<0" and ">=0" to only test the sign of the result. A - * good compiler would generate better code (and a really good compiler - * wouldn't care). Gcc is currently neither. - */ -#define time_after(a,b) ((long)(b) - (long)(a) < 0) -#define time_before(a,b) time_after(b,a) - -#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0) -#define time_before_eq(a,b) time_after_eq(b,a) - -#endif diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/kernel/time.c --- a/linux-2.4-xen-sparse/kernel/time.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,415 +0,0 @@ -/* - * linux/kernel/time.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * This file contains the interface functions for the various - * time related system calls: time, stime, gettimeofday, settimeofday, - * adjtime - */ -/* - * Modification history kernel/time.c - * - * 1993-09-02 Philip Gladstone - * Created file with time related functions from sched.c and adjtimex() - * 1993-10-08 Torsten Duwe - * adjtime interface update and CMOS clock write code - * 1995-08-13 Torsten Duwe - * kernel PLL updated to 1994-12-13 specs (rfc-1589) - * 1999-01-16 Ulrich Windl - * Introduced error checking for many cases in adjtimex(). - * Updated NTP code according to technical memorandum Jan '96 - * "A Kernel Model for Precision Timekeeping" by Dave Mills - * Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10) - * (Even though the technical memorandum forbids it) - */ - -#include <linux/mm.h> -#include <linux/timex.h> -#include <linux/smp_lock.h> - -#include <asm/uaccess.h> - -/* - * The timezone where the local system is located. Used as a default by some - * programs who obtain this value by using gettimeofday. - */ -struct timezone sys_tz; - -/* The xtime_lock is not only serializing the xtime read/writes but it's also - serializing all accesses to the global NTP variables now. */ -extern rwlock_t xtime_lock; - -#if !defined(__alpha__) && !defined(__ia64__) - -/* - * sys_time() can be implemented in user-level using - * sys_gettimeofday(). Is this for backwards compatibility? If so, - * why not move it into the appropriate arch directory (for those - * architectures that need it). - * - * XXX This function is NOT 64-bit clean! - */ -asmlinkage long sys_time(int * tloc) -{ - struct timeval now; - int i; - - do_gettimeofday(&now); - i = now.tv_sec; - if (tloc) { - if (put_user(i,tloc)) - i = -EFAULT; - } - return i; -} - -#if !defined(CONFIG_XEN) - -/* - * sys_stime() can be implemented in user-level using - * sys_settimeofday(). Is this for backwards compatibility? If so, - * why not move it into the appropriate arch directory (for those - * architectures that need it). - */ - -asmlinkage long sys_stime(int * tptr) -{ - int value; - - if (!capable(CAP_SYS_TIME)) - return -EPERM; - if (get_user(value, tptr)) - return -EFAULT; - write_lock_irq(&xtime_lock); - vxtime_lock(); - xtime.tv_sec = value; - xtime.tv_usec = 0; - vxtime_unlock(); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; - write_unlock_irq(&xtime_lock); - return 0; -} - -#endif - -#endif - -asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (copy_to_user(tv, &ktv, sizeof(ktv))) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -/* - * Adjust the time obtained from the CMOS to be UTC time instead of - * local time. - * - * This is ugly, but preferable to the alternatives. Otherwise we - * would either need to write a program to do it in /etc/rc (and risk - * confusion if the program gets run more than once; it would also be - * hard to make the program warp the clock precisely n hours) or - * compile in the timezone information into the kernel. Bad, bad.... - * - * - TYT, 1992-01-01 - * - * The best thing to do is to keep the CMOS clock in universal time (UTC) - * as real UNIX machines always do it. This avoids all headaches about - * daylight saving times and warping kernel clocks. - */ -inline static void warp_clock(void) -{ - write_lock_irq(&xtime_lock); - vxtime_lock(); - xtime.tv_sec += sys_tz.tz_minuteswest * 60; - vxtime_unlock(); - write_unlock_irq(&xtime_lock); -} - -/* - * In case for some reason the CMOS clock has not already been running - * in UTC, but in some local time: The first time we set the timezone, - * we will warp the clock so that it is ticking UTC time instead of - * local time. Presumably, if someone is setting the timezone then we - * are running in an environment where the programs understand about - * timezones. This should be done at boot time in the /etc/rc script, - * as soon as possible, so that the clock can be set right. Otherwise, - * various programs will get confused when the clock gets warped. - */ - -int do_sys_settimeofday(struct timeval *tv, struct timezone *tz) -{ - static int firsttime = 1; - - if (!capable(CAP_SYS_TIME)) - return -EPERM; - - if (tz) { - /* SMP safe, global irq locking makes it work. */ - sys_tz = *tz; - if (firsttime) { - firsttime = 0; - if (!tv) - warp_clock(); - } - } - if (tv) - { - /* SMP safe, again the code in arch/foo/time.c should - * globally block out interrupts when it runs. - */ - do_settimeofday(tv); - } - return 0; -} - -asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz) -{ - struct timeval new_tv; - struct timezone new_tz; - - if (tv) { - if (copy_from_user(&new_tv, tv, sizeof(*tv))) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&new_tz, tz, sizeof(*tz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &new_tv : NULL, tz ? &new_tz : NULL); -} - -long pps_offset; /* pps time offset (us) */ -long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */ - -long pps_freq; /* frequency offset (scaled ppm) */ -long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ - -long pps_valid = PPS_VALID; /* pps signal watchdog counter */ - -int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ - -long pps_jitcnt; /* jitter limit exceeded */ -long pps_calcnt; /* calibration intervals */ -long pps_errcnt; /* calibration errors */ -long pps_stbcnt; /* stability limit exceeded */ - -/* hook for a loadable hardpps kernel module */ -void (*hardpps_ptr)(struct timeval *); - -/* adjtimex mainly allows reading (and writing, if superuser) of - * kernel time-keeping variables. used by xntpd. - */ -int do_adjtimex(struct timex *txc) -{ - long ltemp, mtemp, save_adjust; - int result; - - /* In order to modify anything, you gotta be super-user! */ - if (txc->modes && !capable(CAP_SYS_TIME)) - return -EPERM; - - /* Now we validate the data before disabling interrupts */ - - if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) - /* singleshot must not be used with any other mode bits */ - if (txc->modes != ADJ_OFFSET_SINGLESHOT) - return -EINVAL; - - if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) - /* adjustment Offset limited to +- .512 seconds */ - if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) - return -EINVAL; - - /* if the quartz is off by more than 10% something is VERY wrong ! */ - if (txc->modes & ADJ_TICK) - if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) - return -EINVAL; - - write_lock_irq(&xtime_lock); - result = time_state; /* mostly `TIME_OK' */ - - /* Save for later - semantics of adjtime is to return old value */ - save_adjust = time_adjust; - -#if 0 /* STA_CLOCKERR is never set yet */ - time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ -#endif - /* If there are input parameters, then process them */ - if (txc->modes) - { - if (txc->modes & ADJ_STATUS) /* only set allowed bits */ - time_status = (txc->status & ~STA_RONLY) | - (time_status & STA_RONLY); - - if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ - if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { - result = -EINVAL; - goto leave; - } - time_freq = txc->freq - pps_freq; - } - - if (txc->modes & ADJ_MAXERROR) { - if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { - result = -EINVAL; - goto leave; - } - time_maxerror = txc->maxerror; - } - - if (txc->modes & ADJ_ESTERROR) { - if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { - result = -EINVAL; - goto leave; - } - time_esterror = txc->esterror; - } - - if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ - if (txc->constant < 0) { /* NTP v4 uses values > 6 */ - result = -EINVAL; - goto leave; - } - time_constant = txc->constant; - } - - if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ - if (txc->modes == ADJ_OFFSET_SINGLESHOT) { - /* adjtime() is independent from ntp_adjtime() */ - time_adjust = txc->offset; - } - else if ( time_status & (STA_PLL | STA_PPSTIME) ) { - ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) == - (STA_PPSTIME | STA_PPSSIGNAL) ? - pps_offset : txc->offset; - - /* - * Scale the phase adjustment and - * clamp to the operating range. - */ - if (ltemp > MAXPHASE) - time_offset = MAXPHASE << SHIFT_UPDATE; - else if (ltemp < -MAXPHASE) - time_offset = -(MAXPHASE << SHIFT_UPDATE); - else - time_offset = ltemp << SHIFT_UPDATE; - - /* - * Select whether the frequency is to be controlled - * and in which mode (PLL or FLL). Clamp to the operating - * range. Ugly multiply/divide should be replaced someday. - */ - - if (time_status & STA_FREQHOLD || time_reftime == 0) - time_reftime = xtime.tv_sec; - mtemp = xtime.tv_sec - time_reftime; - time_reftime = xtime.tv_sec; - if (time_status & STA_FLL) { - if (mtemp >= MINSEC) { - ltemp = (time_offset / mtemp) << (SHIFT_USEC - - SHIFT_UPDATE); - if (ltemp < 0) - time_freq -= -ltemp >> SHIFT_KH; - else - time_freq += ltemp >> SHIFT_KH; - } else /* calibration interval too short (p. 12) */ - result = TIME_ERROR; - } else { /* PLL mode */ - if (mtemp < MAXSEC) { - ltemp *= mtemp; - if (ltemp < 0) - time_freq -= -ltemp >> (time_constant + - time_constant + - SHIFT_KF - SHIFT_USEC); - else - time_freq += ltemp >> (time_constant + - time_constant + - SHIFT_KF - SHIFT_USEC); - } else /* calibration interval too long (p. 12) */ - result = TIME_ERROR; - } - if (time_freq > time_tolerance) - time_freq = time_tolerance; - else if (time_freq < -time_tolerance) - time_freq = -time_tolerance; - } /* STA_PLL || STA_PPSTIME */ - } /* txc->modes & ADJ_OFFSET */ - if (txc->modes & ADJ_TICK) { - /* if the quartz is off by more than 10% something is - VERY wrong ! */ - if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) { - result = -EINVAL; - goto leave; - } - tick = txc->tick; - } - } /* txc->modes */ -leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 - || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0 - && (time_status & STA_PPSSIGNAL) == 0) - /* p. 24, (b) */ - || ((time_status & (STA_PPSTIME|STA_PPSJITTER)) - == (STA_PPSTIME|STA_PPSJITTER)) - /* p. 24, (c) */ - || ((time_status & STA_PPSFREQ) != 0 - && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0)) - /* p. 24, (d) */ - result = TIME_ERROR; - - if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) - txc->offset = save_adjust; - else { - if (time_offset < 0) - txc->offset = -(-time_offset >> SHIFT_UPDATE); - else - txc->offset = time_offset >> SHIFT_UPDATE; - } - txc->freq = time_freq + pps_freq; - txc->maxerror = time_maxerror; - txc->esterror = time_esterror; - txc->status = time_status; - txc->constant = time_constant; - txc->precision = time_precision; - txc->tolerance = time_tolerance; - txc->tick = tick; - txc->ppsfreq = pps_freq; - txc->jitter = pps_jitter >> PPS_AVG; - txc->shift = pps_shift; - txc->stabil = pps_stabil; - txc->jitcnt = pps_jitcnt; - txc->calcnt = pps_calcnt; - txc->errcnt = pps_errcnt; - txc->stbcnt = pps_stbcnt; - write_unlock_irq(&xtime_lock); - do_gettimeofday(&txc->time); - return(result); -} - -asmlinkage long sys_adjtimex(struct timex *txc_p) -{ - struct timex txc; /* Local copy of parameter */ - int ret; - - /* Copy the user data space into the kernel copy - * structure. But bear in mind that the structures - * may change - */ - if(copy_from_user(&txc, txc_p, sizeof(struct timex))) - return -EFAULT; - ret = do_adjtimex(&txc); - return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/kernel/timer.c --- a/linux-2.4-xen-sparse/kernel/timer.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,968 +0,0 @@ -/* - * linux/kernel/timer.c - * - * Kernel internal timers, kernel timekeeping, basic process system calls - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. - * - * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 - * "A Kernel Model for Precision Timekeeping" by Dave Mills - * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to - * serialize accesses to xtime/lost_ticks). - * Copyright (C) 1998 Andrea Arcangeli - * 1999-03-10 Improved NTP compatibility by Ulrich Windl - */ - -#include <linux/config.h> -#include <linux/mm.h> -#include <linux/timex.h> -#include <linux/delay.h> -#include <linux/smp_lock.h> -#include <linux/interrupt.h> -#include <linux/kernel_stat.h> - -#include <asm/uaccess.h> - -/* - * Timekeeping variables - */ - -long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ - -/* The current time */ -struct timeval xtime __attribute__ ((aligned (16))); - -/* Don't completely fail for HZ > 500. */ -int tickadj = 500/HZ ? : 1; /* microsecs */ - -DECLARE_TASK_QUEUE(tq_timer); -DECLARE_TASK_QUEUE(tq_immediate); - -/* - * phase-lock loop variables - */ -/* TIME_ERROR prevents overwriting the CMOS clock */ -int time_state = TIME_OK; /* clock synchronization status */ -int time_status = STA_UNSYNC; /* clock status bits */ -long time_offset; /* time adjustment (us) */ -long time_constant = 2; /* pll time constant */ -long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ -long time_precision = 1; /* clock precision (us) */ -long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ -long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ -long time_phase; /* phase offset (scaled us) */ -long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; - /* frequency offset (scaled ppm)*/ -long time_adj; /* tick adjust (scaled 1 / HZ) */ -long time_reftime; /* time at last adjustment (s) */ - -long time_adjust; -long time_adjust_step; - -unsigned long event; - -extern int do_setitimer(int, struct itimerval *, struct itimerval *); - -unsigned long volatile jiffies; - -unsigned int * prof_buffer; -unsigned long prof_len; -unsigned long prof_shift; - -/* - * Event timer code - */ -#define TVN_BITS 6 -#define TVR_BITS 8 -#define TVN_SIZE (1 << TVN_BITS) -#define TVR_SIZE (1 << TVR_BITS) -#define TVN_MASK (TVN_SIZE - 1) -#define TVR_MASK (TVR_SIZE - 1) - -struct timer_vec { - int index; - struct list_head vec[TVN_SIZE]; -}; - -struct timer_vec_root { - int index; - struct list_head vec[TVR_SIZE]; -}; - -static struct timer_vec tv5; -static struct timer_vec tv4; -static struct timer_vec tv3; -static struct timer_vec tv2; -static struct timer_vec_root tv1; - -static struct timer_vec * const tvecs[] = { - (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 -}; - -static struct list_head * run_timer_list_running; - -#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) - -void init_timervecs (void) -{ - int i; - - for (i = 0; i < TVN_SIZE; i++) { - INIT_LIST_HEAD(tv5.vec + i); - INIT_LIST_HEAD(tv4.vec + i); - INIT_LIST_HEAD(tv3.vec + i); - INIT_LIST_HEAD(tv2.vec + i); - } - for (i = 0; i < TVR_SIZE; i++) - INIT_LIST_HEAD(tv1.vec + i); -} - -static unsigned long timer_jiffies; - -static inline void internal_add_timer(struct timer_list *timer) -{ - /* - * must be cli-ed when calling this - */ - unsigned long expires = timer->expires; - unsigned long idx = expires - timer_jiffies; - struct list_head * vec; - - if (run_timer_list_running) - vec = run_timer_list_running; - else if (idx < TVR_SIZE) { - int i = expires & TVR_MASK; - vec = tv1.vec + i; - } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { - int i = (expires >> TVR_BITS) & TVN_MASK; - vec = tv2.vec + i; - } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { - int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; - vec = tv3.vec + i; - } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { - int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; - vec = tv4.vec + i; - } else if ((signed long) idx < 0) { - /* can happen if you add a timer with expires == jiffies, - * or you set a timer to go off in the past - */ - vec = tv1.vec + tv1.index; - } else if (idx <= 0xffffffffUL) { - int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; - vec = tv5.vec + i; - } else { - /* Can only get here on architectures with 64-bit jiffies */ - INIT_LIST_HEAD(&timer->list); - return; - } - /* - * Timers are FIFO! - */ - list_add(&timer->list, vec->prev); -} - -/* Initialize both explicitly - let's try to have them in the same cache line */ -spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; - -#ifdef CONFIG_SMP -volatile struct timer_list * volatile running_timer; -#define timer_enter(t) do { running_timer = t; mb(); } while (0) -#define timer_exit() do { running_timer = NULL; } while (0) -#define timer_is_running(t) (running_timer == t) -#define timer_synchronize(t) while (timer_is_running(t)) barrier() -#else -#define timer_enter(t) do { } while (0) -#define timer_exit() do { } while (0) -#endif - -void add_timer(struct timer_list *timer) -{ - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - if (timer_pending(timer)) - goto bug; - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - return; -bug: - spin_unlock_irqrestore(&timerlist_lock, flags); - printk("bug: kernel timer added twice at %p.\n", - __builtin_return_address(0)); -} - -static inline int detach_timer (struct timer_list *timer) -{ - if (!timer_pending(timer)) - return 0; - list_del(&timer->list); - return 1; -} - -int mod_timer(struct timer_list *timer, unsigned long expires) -{ - int ret; - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - timer->expires = expires; - ret = detach_timer(timer); - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - return ret; -} - -int del_timer(struct timer_list * timer) -{ - int ret; - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - ret = detach_timer(timer); - timer->list.next = timer->list.prev = NULL; - spin_unlock_irqrestore(&timerlist_lock, flags); - return ret; -} - -#ifdef CONFIG_SMP -void sync_timers(void) -{ - spin_unlock_wait(&global_bh_lock); -} - -/* - * SMP specific function to delete periodic timer. - * Caller must disable by some means restarting the timer - * for new. Upon exit the timer is not queued and handler is not running - * on any CPU. It returns number of times, which timer was deleted - * (for reference counting). - */ - -int del_timer_sync(struct timer_list * timer) -{ - int ret = 0; - - for (;;) { - unsigned long flags; - int running; - - spin_lock_irqsave(&timerlist_lock, flags); - ret += detach_timer(timer); - timer->list.next = timer->list.prev = 0; - running = timer_is_running(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - - if (!running) - break; - - timer_synchronize(timer); - } - - return ret; -} -#endif - - -static inline void cascade_timers(struct timer_vec *tv) -{ - /* cascade all the timers from tv up one level */ - struct list_head *head, *curr, *next; - - head = tv->vec + tv->index; - curr = head->next; - /* - * We are removing _all_ timers from the list, so we don't have to - * detach them individually, just clear the list afterwards. - */ - while (curr != head) { - struct timer_list *tmp; - - tmp = list_entry(curr, struct timer_list, list); - next = curr->next; - list_del(curr); // not needed - internal_add_timer(tmp); - curr = next; - } - INIT_LIST_HEAD(head); - tv->index = (tv->index + 1) & TVN_MASK; -} - -static inline void run_timer_list(void) -{ - spin_lock_irq(&timerlist_lock); - while ((long)(jiffies - timer_jiffies) >= 0) { - LIST_HEAD(queued); - struct list_head *head, *curr; - if (!tv1.index) { - int n = 1; - do { - cascade_timers(tvecs[n]); - } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); - } - run_timer_list_running = &queued; -repeat: - head = tv1.vec + tv1.index; - curr = head->next; - if (curr != head) { - struct timer_list *timer; - void (*fn)(unsigned long); - unsigned long data; - - timer = list_entry(curr, struct timer_list, list); - fn = timer->function; - data= timer->data; - - detach_timer(timer); - timer->list.next = timer->list.prev = NULL; - timer_enter(timer); - spin_unlock_irq(&timerlist_lock); - fn(data); - spin_lock_irq(&timerlist_lock); - timer_exit(); - goto repeat; - } - run_timer_list_running = NULL; - ++timer_jiffies; - tv1.index = (tv1.index + 1) & TVR_MASK; - - curr = queued.next; - while (curr != &queued) { - struct timer_list *timer; - - timer = list_entry(curr, struct timer_list, list); - curr = curr->next; - internal_add_timer(timer); - } - } - spin_unlock_irq(&timerlist_lock); -} - -#ifdef CONFIG_NO_IDLE_HZ -/* - * Find out when the next timer event is due to happen. This - * is used on S/390 to stop all activity when all cpus are idle. - * And in XenoLinux to achieve the same. - * The timerlist_lock must be acquired before calling this function. - */ -struct timer_list *next_timer_event(void) -{ - struct timer_list *nte, *tmp; - struct list_head *lst; - int i, j; - - /* Look for the next timer event in tv1. */ - i = 0; - j = tvecs[0]->index; - do { - struct list_head *head = tvecs[0]->vec + j; - if (!list_empty(head)) { - nte = list_entry(head->next, struct timer_list, list); - goto found; - } - j = (j + 1) & TVR_MASK; - } while (j != tv1.index); - - /* No event found in tv1. Check tv2-tv5. */ - for (i = 1; i < NOOF_TVECS; i++) { - j = tvecs[i]->index; - do { - nte = NULL; - list_for_each(lst, tvecs[i]->vec + j) { - tmp = list_entry(lst, struct timer_list, list); - if (nte == NULL || - time_before(tmp->expires, nte->expires)) - nte = tmp; - } - if (nte) - goto found; - j = (j + 1) & TVN_MASK; - } while (j != tvecs[i]->index); - } - return NULL; -found: - /* Found timer event in tvecs[i]->vec[j] */ - if (j < tvecs[i]->index && i < NOOF_TVECS-1) { - /* - * The search wrapped. We need to look at the next list - * from tvecs[i+1] that would cascade into tvecs[i]. - */ - list_for_each(lst, tvecs[i+1]->vec+tvecs[i+1]->index) { - tmp = list_entry(lst, struct timer_list, list); - if (time_before(tmp->expires, nte->expires)) - nte = tmp; - } - } - return nte; -} -#endif - -spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; - -void tqueue_bh(void) -{ - run_task_queue(&tq_timer); -} - -void immediate_bh(void) -{ - run_task_queue(&tq_immediate); -} - -/* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@xxxxxxxx) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. - * - */ -static void second_overflow(void) -{ - long ltemp; - - /* Bump the maxerror field */ - time_maxerror += time_tolerance >> SHIFT_USEC; - if ( time_maxerror > NTP_PHASE_LIMIT ) { - time_maxerror = NTP_PHASE_LIMIT; - time_status |= STA_UNSYNC; - } - - /* - * Leap second processing. If in leap-insert state at - * the end of the day, the system clock is set back one - * second; if in leap-delete state, the system clock is - * set ahead one second. The microtime() routine or - * external clock driver will insure that reported time - * is always monotonic. The ugly divides should be - * replaced. - */ - switch (time_state) { - - case TIME_OK: - if (time_status & STA_INS) - time_state = TIME_INS; - else if (time_status & STA_DEL) - time_state = TIME_DEL; - break; - - case TIME_INS: - if (xtime.tv_sec % 86400 == 0) { - xtime.tv_sec--; - time_state = TIME_OOP; - printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); - } - break; - - case TIME_DEL: - if ((xtime.tv_sec + 1) % 86400 == 0) { - xtime.tv_sec++; - time_state = TIME_WAIT; - printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); - } - break; - - case TIME_OOP: - time_state = TIME_WAIT; - break; - - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - } - - /* - * Compute the phase adjustment for the next second. In - * PLL mode, the offset is reduced by a fixed factor - * times the time constant. In FLL mode the offset is - * used directly. In either mode, the maximum phase - * adjustment for each second is clamped so as to spread - * the adjustment over not more than the number of - * seconds between updates. - */ - if (time_offset < 0) { - ltemp = -time_offset; - if (!(time_status & STA_FLL)) - ltemp >>= SHIFT_KG + time_constant; - if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) - ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; - time_offset += ltemp; - time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - } else { - ltemp = time_offset; - if (!(time_status & STA_FLL)) - ltemp >>= SHIFT_KG + time_constant; - if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) - ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; - time_offset -= ltemp; - time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - } - - /* - * Compute the frequency estimate and additional phase - * adjustment due to frequency error for the next - * second. When the PPS signal is engaged, gnaw on the - * watchdog counter and update the frequency computed by - * the pll and the PPS signal. - */ - pps_valid++; - if (pps_valid == PPS_VALID) { /* PPS signal lost */ - pps_jitter = MAXTIME; - pps_stabil = MAXFREQ; - time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | - STA_PPSWANDER | STA_PPSERROR); - } - ltemp = time_freq + pps_freq; - if (ltemp < 0) - time_adj -= -ltemp >> - (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); - else - time_adj += ltemp >> - (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); - -#if HZ == 100 - /* Compensate for (HZ==100) != (1 << SHIFT_HZ). - * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14) - */ - if (time_adj < 0) - time_adj -= (-time_adj >> 2) + (-time_adj >> 5); - else - time_adj += (time_adj >> 2) + (time_adj >> 5); -#endif -} - -/* in the NTP reference this is called "hardclock()" */ -static void update_wall_time_one_tick(void) -{ - if ( (time_adjust_step = time_adjust) != 0 ) { - /* We are doing an adjtime thing. - * - * Prepare time_adjust_step to be within bounds. - * Note that a positive time_adjust means we want the clock - * to run faster. - * - * Limit the amount of the step to be in the range - * -tickadj .. +tickadj - */ - if (time_adjust > tickadj) - time_adjust_step = tickadj; - else if (time_adjust < -tickadj) - time_adjust_step = -tickadj; - - /* Reduce by this step the amount of time left */ - time_adjust -= time_adjust_step; - } - xtime.tv_usec += tick + time_adjust_step; - /* - * Advance the phase, once it gets to one microsecond, then - * advance the tick more. - */ - time_phase += time_adj; - if (time_phase <= -FINEUSEC) { - long ltemp = -time_phase >> SHIFT_SCALE; - time_phase += ltemp << SHIFT_SCALE; - xtime.tv_usec -= ltemp; - } - else if (time_phase >= FINEUSEC) { - long ltemp = time_phase >> SHIFT_SCALE; - time_phase -= ltemp << SHIFT_SCALE; - xtime.tv_usec += ltemp; - } -} - -/* - * Using a loop looks inefficient, but "ticks" is - * usually just one (we shouldn't be losing ticks, - * we're doing this this way mainly for interrupt - * latency reasons, not because we think we'll - * have lots of lost timer ticks - */ -static void update_wall_time(unsigned long ticks) -{ - do { - ticks--; - update_wall_time_one_tick(); - } while (ticks); - - while (xtime.tv_usec >= 1000000) { - xtime.tv_usec -= 1000000; - xtime.tv_sec++; - second_overflow(); - } -} - -static inline void do_process_times(struct task_struct *p, - unsigned long user, unsigned long system) -{ - unsigned long psecs; - - psecs = (p->times.tms_utime += user); - psecs += (p->times.tms_stime += system); - if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) { - /* Send SIGXCPU every second.. */ - if (!(psecs % HZ)) - send_sig(SIGXCPU, p, 1); - /* and SIGKILL when we go over max.. */ - if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max) - send_sig(SIGKILL, p, 1); - } -} - -static inline void do_it_virt(struct task_struct * p, unsigned long ticks) -{ - unsigned long it_virt = p->it_virt_value; - - if (it_virt) { - it_virt -= ticks; - if (!it_virt) { - it_virt = p->it_virt_incr; - send_sig(SIGVTALRM, p, 1); - } - p->it_virt_value = it_virt; - } -} - -static inline void do_it_prof(struct task_struct *p) -{ - unsigned long it_prof = p->it_prof_value; - - if (it_prof) { - if (--it_prof == 0) { - it_prof = p->it_prof_incr; - send_sig(SIGPROF, p, 1); - } - p->it_prof_value = it_prof; - } -} - -void update_one_process(struct task_struct *p, unsigned long user, - unsigned long system, int cpu) -{ - p->per_cpu_utime[cpu] += user; - p->per_cpu_stime[cpu] += system; - do_process_times(p, user, system); - do_it_virt(p, user); - do_it_prof(p); -} - -/* - * Called from the timer interrupt handler to charge one tick to the current - * process. user_tick is 1 if the tick is user time, 0 for system. - */ -void update_process_times(int user_tick) -{ - struct task_struct *p = current; - int cpu = smp_processor_id(), system = user_tick ^ 1; - - update_one_process(p, user_tick, system, cpu); - if (p->pid) { - if (--p->counter <= 0) { - p->counter = 0; - /* - * SCHED_FIFO is priority preemption, so this is - * not the place to decide whether to reschedule a - * SCHED_FIFO task or not - Bhavesh Davda - */ - if (p->policy != SCHED_FIFO) { - p->need_resched = 1; - } - } - if (p->nice > 0) - kstat.per_cpu_nice[cpu] += user_tick; - else - kstat.per_cpu_user[cpu] += user_tick; - kstat.per_cpu_system[cpu] += system; - } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1) - kstat.per_cpu_system[cpu] += system; -} - -/* - * Called from the timer interrupt handler to charge a couple of ticks - * to the current process. - */ -void update_process_times_us(int user_ticks, int system_ticks) -{ - struct task_struct *p = current; - int cpu = smp_processor_id(); - - update_one_process(p, user_ticks, system_ticks, cpu); - if (p->pid) { - p->counter -= user_ticks + system_ticks; - if (p->counter <= 0) { - p->counter = 0; - p->need_resched = 1; - } - if (p->nice > 0) - kstat.per_cpu_nice[cpu] += user_ticks; - else - kstat.per_cpu_user[cpu] += user_ticks; - kstat.per_cpu_system[cpu] += system_ticks; - } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1) - kstat.per_cpu_system[cpu] += system_ticks; -} - -/* - * Nr of active tasks - counted in fixed-point numbers - */ -static unsigned long count_active_tasks(void) -{ - struct task_struct *p; - unsigned long nr = 0; - - read_lock(&tasklist_lock); - for_each_task(p) { - if ((p->state == TASK_RUNNING || - (p->state & TASK_UNINTERRUPTIBLE))) - nr += FIXED_1; - } - read_unlock(&tasklist_lock); - return nr; -} - -/* - * Hmm.. Changed this, as the GNU make sources (load.c) seems to - * imply that avenrun[] is the standard name for this kind of thing. - * Nothing else seems to be standardized: the fractional size etc - * all seem to differ on different machines. - */ -unsigned long avenrun[3]; - -static inline void calc_load(unsigned long ticks) -{ - unsigned long active_tasks; /* fixed-point */ - static int count = LOAD_FREQ; - - count -= ticks; - while (count < 0) { - count += LOAD_FREQ; - active_tasks = count_active_tasks(); - CALC_LOAD(avenrun[0], EXP_1, active_tasks); - CALC_LOAD(avenrun[1], EXP_5, active_tasks); - CALC_LOAD(avenrun[2], EXP_15, active_tasks); - } -} - -/* jiffies at the most recent update of wall time */ -unsigned long wall_jiffies; - -/* - * This spinlock protect us from races in SMP while playing with xtime. -arca - */ -rwlock_t xtime_lock = RW_LOCK_UNLOCKED; - -static inline void update_times(void) -{ - unsigned long ticks; - - /* - * update_times() is run from the raw timer_bh handler so we - * just know that the irqs are locally enabled and so we don't - * need to save/restore the flags of the local CPU here. -arca - */ - write_lock_irq(&xtime_lock); - vxtime_lock(); - - ticks = jiffies - wall_jiffies; - if (ticks) { - wall_jiffies += ticks; - update_wall_time(ticks); - } - vxtime_unlock(); - write_unlock_irq(&xtime_lock); - calc_load(ticks); -} - -void timer_bh(void) -{ - update_times(); - run_timer_list(); -} - -void do_timer(struct pt_regs *regs) -{ - (*(unsigned long *)&jiffies)++; -#ifndef CONFIG_SMP - /* SMP process accounting uses the local APIC timer */ - - update_process_times(user_mode(regs)); -#endif - mark_bh(TIMER_BH); - if (TQ_ACTIVE(tq_timer)) - mark_bh(TQUEUE_BH); -} - -void do_timer_ticks(int ticks) -{ - (*(unsigned long *)&jiffies) += ticks; - mark_bh(TIMER_BH); - if (TQ_ACTIVE(tq_timer)) - mark_bh(TQUEUE_BH); -} - -#if !defined(__alpha__) && !defined(__ia64__) - -/* - * For backwards compatibility? This can be done in libc so Alpha - * and all newer ports shouldn't need it. - */ -asmlinkage unsigned long sys_alarm(unsigned int seconds) -{ - struct itimerval it_new, it_old; - unsigned int oldalarm; - - it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; - it_new.it_value.tv_sec = seconds; - it_new.it_value.tv_usec = 0; - do_setitimer(ITIMER_REAL, &it_new, &it_old); - oldalarm = it_old.it_value.tv_sec; - /* ehhh.. We can't return 0 if we have an alarm pending.. */ - /* And we'd better return too much than too little anyway */ - if (it_old.it_value.tv_usec) - oldalarm++; - return oldalarm; -} - -#endif - -#ifndef __alpha__ - -/* - * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this - * should be moved into arch/i386 instead? - */ - -/** - * sys_getpid - return the thread group id of the current process - * - * Note, despite the name, this returns the tgid not the pid. The tgid and - * the pid are identical unless CLONE_THREAD was specified on clone() in - * which case the tgid is the same in all threads of the same group. - * - * This is SMP safe as current->tgid does not change. - */ -asmlinkage long sys_getpid(void) -{ - return current->tgid; -} - -/* - * This is not strictly SMP safe: p_opptr could change - * from under us. However, rather than getting any lock - * we can use an optimistic algorithm: get the parent - * pid, and go back and check that the parent is still - * the same. If it has changed (which is extremely unlikely - * indeed), we just try again.. - * - * NOTE! This depends on the fact that even if we _do_ - * get an old value of "parent", we can happily dereference - * the pointer: we just can't necessarily trust the result - * until we know that the parent pointer is valid. - * - * The "mb()" macro is a memory barrier - a synchronizing - * event. It also makes sure that gcc doesn't optimize - * away the necessary memory references.. The barrier doesn't - * have to have all that strong semantics: on x86 we don't - * really require a synchronizing instruction, for example. - * The barrier is more important for code generation than - * for any real memory ordering semantics (even if there is - * a small window for a race, using the old pointer is - * harmless for a while). - */ -asmlinkage long sys_getppid(void) -{ - int pid; - struct task_struct * me = current; - struct task_struct * parent; - - parent = me->p_opptr; - for (;;) { - pid = parent->pid; -#if CONFIG_SMP -{ - struct task_struct *old = parent; - mb(); - parent = me->p_opptr; - if (old != parent) - continue; -} -#endif - break; - } - return pid; -} - -asmlinkage long sys_getuid(void) -{ - /* Only we change this so SMP safe */ - return current->uid; -} - -asmlinkage long sys_geteuid(void) -{ - /* Only we change this so SMP safe */ - return current->euid; -} - -asmlinkage long sys_getgid(void) -{ - /* Only we change this so SMP safe */ - return current->gid; -} - -asmlinkage long sys_getegid(void) -{ - /* Only we change this so SMP safe */ - return current->egid; -} - -#endif - -/* Thread ID - the internal kernel "pid" */ -asmlinkage long sys_gettid(void) -{ - return current->pid; -} - -asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) -{ - struct timespec t; - unsigned long expire; - - if(copy_from_user(&t, rqtp, sizeof(struct timespec))) - return -EFAULT; - - if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0) - return -EINVAL; - - - if (t.tv_sec == 0 && t.tv_nsec <= 2000000L && - current->policy != SCHED_OTHER) - { - /* - * Short delay requests up to 2 ms will be handled with - * high precision by a busy wait for all real-time processes. - * - * Its important on SMP not to do this holding locks. - */ - udelay((t.tv_nsec + 999) / 1000); - return 0; - } - - expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); - - current->state = TASK_INTERRUPTIBLE; - expire = schedule_timeout(expire); - - if (expire) { - if (rmtp) { - jiffies_to_timespec(expire, &t); - if (copy_to_user(rmtp, &t, sizeof(struct timespec))) - return -EFAULT; - } - return -EINTR; - } - return 0; -} - diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/mkbuildtree --- a/linux-2.4-xen-sparse/mkbuildtree Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,291 +0,0 @@ -#!/bin/bash - -# mkbuildtree <build tree> -# -# Creates symbolic links in <build tree> for the sparse tree -# in the current directory. - -# Script to determine the relative path between two directories. -# Copyright (c) D. J. Hawkey Jr. 2002 -# Fixed for Xen project by K. Fraser in 2003. -abs_to_rel () -{ - local CWD SRCPATH - - if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then - SRCPATH=${1%?} - else - SRCPATH=$1 - fi - if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then - DESTPATH=${2%?} - else - DESTPATH=$2 - fi - - CWD=$PWD - [ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD - [ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD - [ "$CWD" != "$PWD" ] && cd $CWD - - BASEPATH=$SRCPATH - - [ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return - [ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return - - while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do - BASEPATH=${BASEPATH%/*} - done - - SRCPATH=${SRCPATH#$BASEPATH} - DESTPATH=${DESTPATH#$BASEPATH} - DESTPATH=${DESTPATH#?} - while [ -n "$SRCPATH" ]; do - SRCPATH=${SRCPATH%/*} - DESTPATH="../$DESTPATH" - done - - [ -z "$BASEPATH" ] && BASEPATH="/" - [ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?} -} - -# relative_lndir <target_dir> -# Creates a tree of symlinks in the current working directory that mirror -# real files in <target_dir>. <target_dir> should be relative to the current -# working directory. Symlinks in <target_dir> are ignored. Source-control files -# are ignored. -relative_lndir () -{ - local SYMLINK_DIR REAL_DIR pref i j - SYMLINK_DIR=$PWD - REAL_DIR=$1 - ( - cd $REAL_DIR - for i in `find . -type d | grep -v SCCS`; do - [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i - ( - cd $i - pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'` - for j in `find . -maxdepth 1 -type f -o -type l`; do - ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j - done - ) - done - ) -} - -[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; } - -# Get absolute path to the destination directory -pushd . >/dev/null -cd ${1} || { echo "cannot cd to ${1}"; exit 1; } -AD=$PWD -popd >/dev/null - -# Get absolute path to the source directory -AS=`pwd` - -# Get path to source, relative to destination -abs_to_rel ${AD} ${AS} -RS=$DESTPATH - -# Remove old copies of files and directories at the destination -for i in `find . -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done - -# We now work from the destination directory -cd ${AD} || { echo "cannot cd to ${AD}"; exit 1; } - -# Remove old symlinks -for i in `find . -type l`; do rm -f $i; done - -# Create symlinks of files and directories which exist in the sparse source -relative_lndir ${RS} -rm -f mkbuildtree - -LINUX_26=${RS}/../linux-2.6-xen-sparse -[ -d $LINUX_26 ] || { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } - - -# Create links to the shared definitions of the Xen interfaces. -rm -rf ${AD}/include/asm-xen/xen-public -mkdir ${AD}/include/asm-xen/xen-public -cd ${AD}/include/asm-xen/xen-public -relative_lndir ../../../${RS}/../xen/include/public - -# Create a link to the shared definitions for the control interface -cd ${AD}/include/asm-xen - -## Symlinks for files: -## - which are identical in the i386 and xen-i386 architecture-dependent -## subdirectories. -## - which are identical in the Linux 2.6 and Linux 2.4 ports. - -cd ${AD}/include/asm-xen -ln -sf ../asm-i386/a.out.h -ln -sf ../asm-i386/apicdef.h -ln -sf ../asm-i386/apic.h -ln -sf ../asm-i386/atomic.h -ln -sf ../asm-i386/bitops.h -ln -sf ../asm-i386/boot.h -ln -sf ../asm-i386/byteorder.h -ln -sf ../asm-i386/cache.h -ln -sf ../asm-i386/checksum.h -ln -sf ../asm-i386/cpufeature.h -ln -sf ../asm-i386/current.h -ln -sf ../asm-i386/debugreg.h -ln -sf ../asm-i386/delay.h -ln -sf ../asm-i386/div64.h -ln -sf ../asm-i386/dma.h -ln -sf ../asm-i386/elf.h -ln -sf ../asm-i386/errno.h -ln -sf ../asm-i386/fcntl.h -ln -sf ../asm-i386/floppy.h -ln -sf ../asm-i386/hardirq.h -ln -sf ../asm-i386/hdreg.h -ln -sf ../asm-i386/i387.h -ln -sf ../asm-i386/ide.h -ln -sf ../asm-i386/init.h -ln -sf ../asm-i386/io_apic.h -ln -sf ../asm-i386/ioctl.h -ln -sf ../asm-i386/ioctls.h -ln -sf ../asm-i386/ipcbuf.h -ln -sf ../asm-i386/ipc.h -ln -sf ../asm-i386/kmap_types.h -ln -sf ../asm-i386/ldt.h -ln -sf ../asm-i386/linux_logo.h -ln -sf ../asm-i386/locks.h -ln -sf ../asm-i386/math_emu.h -ln -sf ../asm-i386/mc146818rtc.h -ln -sf ../asm-i386/mca_dma.h -ln -sf ../asm-i386/mman.h -ln -sf ../asm-i386/mmu.h -ln -sf ../asm-i386/mmx.h -ln -sf ../asm-i386/mpspec.h -ln -sf ../asm-i386/msgbuf.h -ln -sf ../asm-i386/msr.h -ln -sf ../asm-i386/mtrr.h -ln -sf ../asm-i386/namei.h -ln -sf ../asm-i386/param.h -ln -sf ../asm-i386/parport.h -ln -sf ../asm-i386/pgtable-3level.h -ln -sf ../asm-i386/poll.h -ln -sf ../asm-i386/posix_types.h -ln -sf ../asm-i386/ptrace.h -ln -sf ../asm-i386/resource.h -ln -sf ../asm-i386/rwlock.h -ln -sf ../asm-i386/rwsem.h -ln -sf ../asm-i386/scatterlist.h -ln -sf ../asm-i386/semaphore.h -ln -sf ../asm-i386/sembuf.h -ln -sf ../asm-i386/serial.h -ln -sf ../asm-i386/setup.h -ln -sf ../asm-i386/shmbuf.h -ln -sf ../asm-i386/shmparam.h -ln -sf ../asm-i386/sigcontext.h -ln -sf ../asm-i386/siginfo.h -ln -sf ../asm-i386/signal.h -ln -sf ../asm-i386/smplock.h -ln -sf ../asm-i386/socket.h -ln -sf ../asm-i386/sockios.h -ln -sf ../asm-i386/softirq.h -ln -sf ../asm-i386/spinlock.h -ln -sf ../asm-i386/statfs.h -ln -sf ../asm-i386/stat.h -ln -sf ../asm-i386/string-486.h -ln -sf ../asm-i386/string.h -ln -sf ../asm-i386/termbits.h -ln -sf ../asm-i386/termios.h -ln -sf ../asm-i386/timex.h -ln -sf ../asm-i386/tlb.h -ln -sf ../asm-i386/types.h -ln -sf ../asm-i386/uaccess.h -ln -sf ../asm-i386/ucontext.h -ln -sf ../asm-i386/unaligned.h -ln -sf ../asm-i386/unistd.h -ln -sf ../asm-i386/user.h -ln -sf ../asm-i386/vm86.h -ln -sf ../../${LINUX_26}/include/asm-xen/balloon.h -ln -sf ../../${LINUX_26}/include/asm-xen/ctrl_if.h -ln -sf ../../${LINUX_26}/include/asm-xen/evtchn.h -ln -sf ../../${LINUX_26}/include/asm-xen/gnttab.h -ln -sf ../../${LINUX_26}/include/asm-xen/hypervisor.h -ln -sf ../../${LINUX_26}/include/asm-xen/xen_proc.h -ln -sf ../../${LINUX_26}/include/asm-xen/asm-i386/synch_bitops.h -ln -sf ../../${LINUX_26}/include/asm-xen/asm-i386/hypercall.h - -mkdir -p linux-public && cd linux-public -ln -sf ../../../${LINUX_26}/include/asm-xen/linux-public/privcmd.h -ln -sf ../../../${LINUX_26}/include/asm-xen/linux-public/suspend.h - -cd ${AD}/arch/xen/kernel -ln -sf ../../i386/kernel/i387.c -ln -sf ../../i386/kernel/init_task.c -ln -sf ../../i386/kernel/pci-i386.c -ln -sf ../../i386/kernel/pci-i386.h -ln -sf ../../i386/kernel/ptrace.c -ln -sf ../../i386/kernel/semaphore.c -ln -sf ../../i386/kernel/sys_i386.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/ctrl_if.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/evtchn.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/fixup.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/gnttab.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/reboot.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/skbuff.c - -cd ${AD}/arch/xen/lib -ln -sf ../../i386/lib/checksum.S -ln -sf ../../i386/lib/dec_and_lock.c -ln -sf ../../i386/lib/getuser.S -ln -sf ../../i386/lib/iodebug.c -ln -sf ../../i386/lib/memcpy.c -ln -sf ../../i386/lib/mmx.c -ln -sf ../../i386/lib/old-checksum.c -ln -sf ../../i386/lib/strstr.c -ln -sf ../../i386/lib/usercopy.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/xen_proc.c - -cd ${AD}/arch/xen/mm -ln -sf ../../i386/mm/extable.c -ln -sf ../../i386/mm/pageattr.c -ln -sf ../../../${LINUX_26}/arch/xen/i386/mm/hypervisor.c - -cd ${AD}/arch/xen/drivers/balloon -ln -sf ../../../../${LINUX_26}/drivers/xen/balloon/balloon.c - -cd ${AD}/arch/xen/drivers/console -ln -sf ../../../../${LINUX_26}/drivers/xen/console/console.c - -cd ${AD}/arch/xen/drivers/dom0 -ln -sf ../../../../${LINUX_26}/drivers/xen/privcmd/privcmd.c core.c - -cd ${AD}/arch/xen/drivers/evtchn -ln -sf ../../../../${LINUX_26}/drivers/xen/evtchn/evtchn.c - -cd ${AD}/arch/xen/drivers/netif/frontend -ln -sf ../../../../../${LINUX_26}/drivers/xen/netfront/netfront.c main.c - -cd ${AD}/arch/xen/drivers/netif/backend -ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/common.h -ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/control.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/interface.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/netback.c main.c - -cd ${AD}/arch/xen/drivers/blkif/backend -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/common.h -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/blkback.c main.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/control.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/interface.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/vbd.c - -cd ${AD}/arch/xen/drivers/blkif/frontend -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkfront/blkfront.c - -cd ${AD}/arch/xen/drivers/usbif/frontend -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbfront/usbfront.c main.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbfront/xhci.h - -cd ${AD}/arch/xen/drivers/usbif/backend -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/common.h -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/control.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/interface.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/usbback.c main.c diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/mm/highmem.c --- a/linux-2.4-xen-sparse/mm/highmem.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,461 +0,0 @@ -/* - * High memory handling common code and variables. - * - * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@xxxxxxx - * Gerhard Wichert, Siemens AG, Gerhard.Wichert@xxxxxxxxxxxxxx - * - * - * Redesigned the x86 32-bit VM architecture to deal with - * 64-bit physical space. With current x86 CPUs this - * means up to 64 Gigabytes physical RAM. - * - * Rewrote high memory support to move the page cache into - * high memory. Implemented permanent (schedulable) kmaps - * based on Linus' idea. - * - * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx> - */ - -#include <linux/mm.h> -#include <linux/pagemap.h> -#include <linux/highmem.h> -#include <linux/swap.h> -#include <linux/slab.h> - -/* - * Virtual_count is not a pure "count". - * 0 means that it is not mapped, and has not been mapped - * since a TLB flush - it is usable. - * 1 means that there are no users, but it has been mapped - * since the last TLB flush - so we can't use it. - * n means that there are (n-1) current users of it. - */ -static int pkmap_count[LAST_PKMAP]; -static unsigned int last_pkmap_nr; -static spinlock_cacheline_t kmap_lock_cacheline = {SPIN_LOCK_UNLOCKED}; -#define kmap_lock kmap_lock_cacheline.lock - -pte_t * pkmap_page_table; - -static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); - -static void flush_all_zero_pkmaps(void) -{ - int i; - - flush_cache_all(); - - for (i = 0; i < LAST_PKMAP; i++) { - struct page *page; - - /* - * zero means we don't have anything to do, - * >1 means that it is still in use. Only - * a count of 1 means that it is free but - * needs to be unmapped - */ - if (pkmap_count[i] != 1) - continue; - pkmap_count[i] = 0; - - /* sanity check */ - if (pte_none(pkmap_page_table[i])) - BUG(); - - /* - * Don't need an atomic fetch-and-clear op here; - * no-one has the page mapped, and cannot get at - * its virtual address (and hence PTE) without first - * getting the kmap_lock (which is held here). - * So no dangers, even with speculative execution. - */ - page = pte_page(pkmap_page_table[i]); - pte_clear(&pkmap_page_table[i]); - - page->virtual = NULL; - } - flush_tlb_all(); -} - -static inline unsigned long map_new_virtual(struct page *page, int nonblocking) -{ - unsigned long vaddr; - int count; - -start: - count = LAST_PKMAP; - /* Find an empty entry */ - for (;;) { - last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; - if (!last_pkmap_nr) { - flush_all_zero_pkmaps(); - count = LAST_PKMAP; - } - if (!pkmap_count[last_pkmap_nr]) - break; /* Found a usable entry */ - if (--count) - continue; - - if (nonblocking) - return 0; - - /* - * Sleep for somebody else to unmap their entries - */ - { - DECLARE_WAITQUEUE(wait, current); - - current->state = TASK_UNINTERRUPTIBLE; - add_wait_queue(&pkmap_map_wait, &wait); - spin_unlock(&kmap_lock); - schedule(); - remove_wait_queue(&pkmap_map_wait, &wait); - spin_lock(&kmap_lock); - - /* Somebody else might have mapped it while we slept */ - if (page->virtual) - return (unsigned long) page->virtual; - - /* Re-start */ - goto start; - } - } - vaddr = PKMAP_ADDR(last_pkmap_nr); - set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); - - pkmap_count[last_pkmap_nr] = 1; - page->virtual = (void *) vaddr; - - return vaddr; -} - -void kmap_flush_unused(void) -{ - spin_lock(&kmap_lock); - flush_all_zero_pkmaps(); - spin_unlock(&kmap_lock); -} - -void fastcall *kmap_high(struct page *page, int nonblocking) -{ - unsigned long vaddr; - - /* - * For highmem pages, we can't trust "virtual" until - * after we have the lock. - * - * We cannot call this from interrupts, as it may block - */ - spin_lock(&kmap_lock); - vaddr = (unsigned long) page->virtual; - if (!vaddr) { - vaddr = map_new_virtual(page, nonblocking); - if (!vaddr) - goto out; - } - pkmap_count[PKMAP_NR(vaddr)]++; - if (pkmap_count[PKMAP_NR(vaddr)] < 2) - BUG(); - out: - spin_unlock(&kmap_lock); - return (void*) vaddr; -} - -void fastcall kunmap_high(struct page *page) -{ - unsigned long vaddr; - unsigned long nr; - int need_wakeup; - - spin_lock(&kmap_lock); - vaddr = (unsigned long) page->virtual; - if (!vaddr) - BUG(); - nr = PKMAP_NR(vaddr); - - /* - * A count must never go down to zero - * without a TLB flush! - */ - need_wakeup = 0; - switch (--pkmap_count[nr]) { - case 0: - BUG(); - case 1: - /* - * Avoid an unnecessary wake_up() function call. - * The common case is pkmap_count[] == 1, but - * no waiters. - * The tasks queued in the wait-queue are guarded - * by both the lock in the wait-queue-head and by - * the kmap_lock. As the kmap_lock is held here, - * no need for the wait-queue-head's lock. Simply - * test if the queue is empty. - */ - need_wakeup = waitqueue_active(&pkmap_map_wait); - } - spin_unlock(&kmap_lock); - - /* do wake-up, if needed, race-free outside of the spin lock */ - if (need_wakeup) - wake_up(&pkmap_map_wait); -} - -#define POOL_SIZE 32 - -/* - * This lock gets no contention at all, normally. - */ -static spinlock_t emergency_lock = SPIN_LOCK_UNLOCKED; - -int nr_emergency_pages; -static LIST_HEAD(emergency_pages); - -int nr_emergency_bhs; -static LIST_HEAD(emergency_bhs); - -/* - * Simple bounce buffer support for highmem pages. - * This will be moved to the block layer in 2.5. - */ - -static inline void copy_from_high_bh (struct buffer_head *to, - struct buffer_head *from) -{ - struct page *p_from; - char *vfrom; - - p_from = from->b_page; - - vfrom = kmap_atomic(p_from, KM_USER0); - memcpy(to->b_data, vfrom + bh_offset(from), to->b_size); - kunmap_atomic(vfrom, KM_USER0); -} - -static inline void copy_to_high_bh_irq (struct buffer_head *to, - struct buffer_head *from) -{ - struct page *p_to; - char *vto; - unsigned long flags; - - p_to = to->b_page; - __save_flags(flags); - __cli(); - vto = kmap_atomic(p_to, KM_BOUNCE_READ); - memcpy(vto + bh_offset(to), from->b_data, to->b_size); - kunmap_atomic(vto, KM_BOUNCE_READ); - __restore_flags(flags); -} - -static inline void bounce_end_io (struct buffer_head *bh, int uptodate) -{ - struct page *page; - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); - unsigned long flags; - - bh_orig->b_end_io(bh_orig, uptodate); - - page = bh->b_page; - - spin_lock_irqsave(&emergency_lock, flags); - if (nr_emergency_pages >= POOL_SIZE) - __free_page(page); - else { - /* - * We are abusing page->list to manage - * the highmem emergency pool: - */ - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - - if (nr_emergency_bhs >= POOL_SIZE) { -#ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); -#endif - kmem_cache_free(bh_cachep, bh); - } else { - /* - * Ditto in the bh case, here we abuse b_inode_buffers: - */ - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } - spin_unlock_irqrestore(&emergency_lock, flags); -} - -static __init int init_emergency_pool(void) -{ - struct sysinfo i; - si_meminfo(&i); - si_swapinfo(&i); - - if (!i.totalhigh) - return 0; - - spin_lock_irq(&emergency_lock); - while (nr_emergency_pages < POOL_SIZE) { - struct page * page = alloc_page(GFP_ATOMIC); - if (!page) { - printk("couldn't refill highmem emergency pages"); - break; - } - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - while (nr_emergency_bhs < POOL_SIZE) { - struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); - if (!bh) { - printk("couldn't refill highmem emergency bhs"); - break; - } - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } - spin_unlock_irq(&emergency_lock); - printk("allocated %d pages and %d bhs reserved for the highmem bounces\n", - nr_emergency_pages, nr_emergency_bhs); - - return 0; -} - -__initcall(init_emergency_pool); - -static void bounce_end_io_write (struct buffer_head *bh, int uptodate) -{ - bounce_end_io(bh, uptodate); -} - -static void bounce_end_io_read (struct buffer_head *bh, int uptodate) -{ - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); - - if (uptodate) - copy_to_high_bh_irq(bh_orig, bh); - bounce_end_io(bh, uptodate); -} - -struct page *alloc_bounce_page (void) -{ - struct list_head *tmp; - struct page *page; - - page = alloc_page(GFP_NOHIGHIO); - if (page) - return page; - /* - * No luck. First, kick the VM so it doesn't idle around while - * we are using up our emergency rations. - */ - wakeup_bdflush(); - -repeat_alloc: - /* - * Try to allocate from the emergency pool. - */ - tmp = &emergency_pages; - spin_lock_irq(&emergency_lock); - if (!list_empty(tmp)) { - page = list_entry(tmp->next, struct page, list); - list_del(tmp->next); - nr_emergency_pages--; - } - spin_unlock_irq(&emergency_lock); - if (page) - return page; - - /* we need to wait I/O completion */ - run_task_queue(&tq_disk); - - yield(); - goto repeat_alloc; -} - -struct buffer_head *alloc_bounce_bh (void) -{ - struct list_head *tmp; - struct buffer_head *bh; - - bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO); - if (bh) - return bh; - /* - * No luck. First, kick the VM so it doesn't idle around while - * we are using up our emergency rations. - */ - wakeup_bdflush(); - -repeat_alloc: - /* - * Try to allocate from the emergency pool. - */ - tmp = &emergency_bhs; - spin_lock_irq(&emergency_lock); - if (!list_empty(tmp)) { - bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); - list_del(tmp->next); - nr_emergency_bhs--; - } - spin_unlock_irq(&emergency_lock); - if (bh) - return bh; - - /* we need to wait I/O completion */ - run_task_queue(&tq_disk); - - yield(); - goto repeat_alloc; -} - -struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig) -{ - struct page *page; - struct buffer_head *bh; - - if (!PageHighMem(bh_orig->b_page)) - return bh_orig; - - bh = alloc_bounce_bh(); - /* - * This is wasteful for 1k buffers, but this is a stopgap measure - * and we are being ineffective anyway. This approach simplifies - * things immensly. On boxes with more than 4GB RAM this should - * not be an issue anyway. - */ - page = alloc_bounce_page(); - - set_bh_page(bh, page, 0); - - bh->b_next = NULL; - bh->b_blocknr = bh_orig->b_blocknr; - bh->b_size = bh_orig->b_size; - bh->b_list = -1; - bh->b_dev = bh_orig->b_dev; - bh->b_count = bh_orig->b_count; - bh->b_rdev = bh_orig->b_rdev; - bh->b_state = bh_orig->b_state; -#ifdef HIGHMEM_DEBUG - bh->b_flushtime = jiffies; - bh->b_next_free = NULL; - bh->b_prev_free = NULL; - /* bh->b_this_page */ - bh->b_reqnext = NULL; - bh->b_pprev = NULL; -#endif - /* bh->b_page */ - if (rw == WRITE) { - bh->b_end_io = bounce_end_io_write; - copy_from_high_bh(bh, bh_orig); - } else - bh->b_end_io = bounce_end_io_read; - bh->b_private = (void *)bh_orig; - bh->b_rsector = bh_orig->b_rsector; -#ifdef HIGHMEM_DEBUG - memset(&bh->b_wait, -1, sizeof(bh->b_wait)); -#endif - - return bh; -} - diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/mm/memory.c --- a/linux-2.4-xen-sparse/mm/memory.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,1534 +0,0 @@ -/* - * linux/mm/memory.c - * - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - */ - -/* - * demand-loading started 01.12.91 - seems it is high on the list of - * things wanted, and it should be easy to implement. - Linus - */ - -/* - * Ok, demand-loading was easy, shared pages a little bit tricker. Shared - * pages started 02.12.91, seems to work. - Linus. - * - * Tested sharing by executing about 30 /bin/sh: under the old kernel it - * would have taken more than the 6M I have free, but it worked well as - * far as I could see. - * - * Also corrected some "invalidate()"s - I wasn't doing enough of them. - */ - -/* - * Real VM (paging to/from disk) started 18.12.91. Much more work and - * thought has to go into this. Oh, well.. - * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why. - * Found it. Everything seems to work now. - * 20.12.91 - Ok, making the swap-device changeable like the root. - */ - -/* - * 05.04.94 - Multi-page memory management added for v1.1. - * Idea by Alex Bligh (alex@xxxxxxxxxxxxxxx) - * - * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG - * (Gerhard.Wichert@xxxxxxxxxxxxxx) - */ - -#include <linux/mm.h> -#include <linux/mman.h> -#include <linux/swap.h> -#include <linux/smp_lock.h> -#include <linux/swapctl.h> -#include <linux/iobuf.h> -#include <linux/highmem.h> -#include <linux/pagemap.h> -#include <linux/module.h> - -#include <asm/pgalloc.h> -#include <asm/uaccess.h> -#include <asm/tlb.h> - -unsigned long max_mapnr; -unsigned long num_physpages; -unsigned long num_mappedpages; -void * high_memory; -struct page *highmem_start_page; - -/* - * We special-case the C-O-W ZERO_PAGE, because it's such - * a common occurrence (no need to read the page to know - * that it's zero - better for the cache and memory subsystem). - */ -static inline void copy_cow_page(struct page * from, struct page * to, unsigned long address) -{ - if (from == ZERO_PAGE(address)) { - clear_user_highpage(to, address); - return; - } - copy_user_highpage(to, from, address); -} - -mem_map_t * mem_map; - -/* - * Called by TLB shootdown - */ -void __free_pte(pte_t pte) -{ - struct page *page = pte_page(pte); - if ((!VALID_PAGE(page)) || PageReserved(page)) - return; - if (pte_dirty(pte)) - set_page_dirty(page); - free_page_and_swap_cache(page); -} - - -/* - * Note: this doesn't free the actual pages themselves. That - * has been handled earlier when unmapping all the memory regions. - */ -static inline void free_one_pmd(pmd_t * dir) -{ - pte_t * pte; - - if (pmd_none(*dir)) - return; - if (pmd_bad(*dir)) { - pmd_ERROR(*dir); - pmd_clear(dir); - return; - } - pte = pte_offset(dir, 0); - pmd_clear(dir); - pte_free(pte); -} - -static inline void free_one_pgd(pgd_t * dir) -{ - int j; - pmd_t * pmd; - - if (pgd_none(*dir)) - return; - if (pgd_bad(*dir)) { - pgd_ERROR(*dir); - pgd_clear(dir); - return; - } - pmd = pmd_offset(dir, 0); - pgd_clear(dir); - for (j = 0; j < PTRS_PER_PMD ; j++) { - prefetchw(pmd+j+(PREFETCH_STRIDE/16)); - free_one_pmd(pmd+j); - } - pmd_free(pmd); -} - -/* Low and high watermarks for page table cache. - The system should try to have pgt_water[0] <= cache elements <= pgt_water[1] - */ -int pgt_cache_water[2] = { 25, 50 }; - -/* Returns the number of pages freed */ -int check_pgt_cache(void) -{ - return do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]); -} - - -/* - * This function clears all user-level page tables of a process - this - * is needed by execve(), so that old pages aren't in the way. - */ -void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr) -{ - pgd_t * page_dir = mm->pgd; - - spin_lock(&mm->page_table_lock); - page_dir += first; - do { - free_one_pgd(page_dir); - page_dir++; - } while (--nr); - spin_unlock(&mm->page_table_lock); - - /* keep the page table cache within bounds */ - check_pgt_cache(); -} - -#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t)) -#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t)) - -/* - * copy one vm_area from one task to the other. Assumes the page tables - * already present in the new task to be cleared in the whole range - * covered by this vma. - * - * 08Jan98 Merged into one routine from several inline routines to reduce - * variable count and make things faster. -jj - * - * dst->page_table_lock is held on entry and exit, - * but may be dropped within pmd_alloc() and pte_alloc(). - */ -int copy_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma) -{ - pgd_t * src_pgd, * dst_pgd; - unsigned long address = vma->vm_start; - unsigned long end = vma->vm_end; - unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; - - src_pgd = pgd_offset(src, address)-1; - dst_pgd = pgd_offset(dst, address)-1; - - for (;;) { - pmd_t * src_pmd, * dst_pmd; - - src_pgd++; dst_pgd++; - - /* copy_pmd_range */ - - if (pgd_none(*src_pgd)) - goto skip_copy_pmd_range; - if (pgd_bad(*src_pgd)) { - pgd_ERROR(*src_pgd); - pgd_clear(src_pgd); -skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; - if (!address || (address >= end)) - goto out; - continue; - } - - src_pmd = pmd_offset(src_pgd, address); - dst_pmd = pmd_alloc(dst, dst_pgd, address); - if (!dst_pmd) - goto nomem; - - do { - pte_t * src_pte, * dst_pte; - - /* copy_pte_range */ - - if (pmd_none(*src_pmd)) - goto skip_copy_pte_range; - if (pmd_bad(*src_pmd)) { - pmd_ERROR(*src_pmd); - pmd_clear(src_pmd); -skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; - if (address >= end) - goto out; - goto cont_copy_pmd_range; - } - - src_pte = pte_offset(src_pmd, address); - dst_pte = pte_alloc(dst, dst_pmd, address); - if (!dst_pte) - goto nomem; - - spin_lock(&src->page_table_lock); - do { - pte_t pte = *src_pte; - struct page *ptepage; - - /* copy_one_pte */ - - if (pte_none(pte)) - goto cont_copy_pte_range_noset; - if (!pte_present(pte)) { - swap_duplicate(pte_to_swp_entry(pte)); - goto cont_copy_pte_range; - } - ptepage = pte_page(pte); - if ((!VALID_PAGE(ptepage)) || - PageReserved(ptepage)) - goto cont_copy_pte_range; - - /* If it's a COW mapping, write protect it both in the parent and the child */ - if (cow && pte_write(pte)) { - ptep_set_wrprotect(src_pte); - pte = *src_pte; - } - - /* If it's a shared mapping, mark it clean in the child */ - if (vma->vm_flags & VM_SHARED) - pte = pte_mkclean(pte); - pte = pte_mkold(pte); - get_page(ptepage); - dst->rss++; - -cont_copy_pte_range: set_pte(dst_pte, pte); -cont_copy_pte_range_noset: address += PAGE_SIZE; - if (address >= end) - goto out_unlock; - src_pte++; - dst_pte++; - } while ((unsigned long)src_pte & PTE_TABLE_MASK); - spin_unlock(&src->page_table_lock); - -cont_copy_pmd_range: src_pmd++; - dst_pmd++; - } while ((unsigned long)src_pmd & PMD_TABLE_MASK); - } -out_unlock: - spin_unlock(&src->page_table_lock); -out: - return 0; -nomem: - return -ENOMEM; -} - -/* - * Return indicates whether a page was freed so caller can adjust rss - */ -static inline void forget_pte(pte_t page) -{ - if (!pte_none(page)) { - printk("forget_pte: old mapping existed!\n"); - BUG(); - } -} - -static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) -{ - unsigned long offset; - pte_t * ptep; - int freed = 0; - - if (pmd_none(*pmd)) - return 0; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - return 0; - } - ptep = pte_offset(pmd, address); - offset = address & ~PMD_MASK; - if (offset + size > PMD_SIZE) - size = PMD_SIZE - offset; - size &= PAGE_MASK; - for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { - pte_t pte = *ptep; - if (pte_none(pte)) - continue; - if (pte_present(pte)) { - struct page *page = pte_page(pte); - if (VALID_PAGE(page) && !PageReserved(page)) - freed ++; - /* This will eventually call __free_pte on the pte. */ - tlb_remove_page(tlb, ptep, address + offset); - } else { - free_swap_and_cache(pte_to_swp_entry(pte)); - pte_clear(ptep); - } - } - - return freed; -} - -static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size) -{ - pmd_t * pmd; - unsigned long end; - int freed; - - if (pgd_none(*dir)) - return 0; - if (pgd_bad(*dir)) { - pgd_ERROR(*dir); - pgd_clear(dir); - return 0; - } - pmd = pmd_offset(dir, address); - end = address + size; - if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) - end = ((address + PGDIR_SIZE) & PGDIR_MASK); - freed = 0; - do { - freed += zap_pte_range(tlb, pmd, address, end - address); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address < end); - return freed; -} - -/* - * remove user pages in a given range. - */ -void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) -{ - mmu_gather_t *tlb; - pgd_t * dir; - unsigned long start = address, end = address + size; - int freed = 0; - - dir = pgd_offset(mm, address); - - /* - * This is a long-lived spinlock. That's fine. - * There's no contention, because the page table - * lock only protects against kswapd anyway, and - * even if kswapd happened to be looking at this - * process we _want_ it to get stuck. - */ - if (address >= end) - BUG(); - spin_lock(&mm->page_table_lock); - flush_cache_range(mm, address, end); - tlb = tlb_gather_mmu(mm); - - do { - freed += zap_pmd_range(tlb, dir, address, end - address); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - - /* this will flush any remaining tlb entries */ - tlb_finish_mmu(tlb, start, end); - - /* - * Update rss for the mm_struct (not necessarily current->mm) - * Notice that rss is an unsigned long. - */ - if (mm->rss > freed) - mm->rss -= freed; - else - mm->rss = 0; - spin_unlock(&mm->page_table_lock); -} - -/* - * Do a quick page-table lookup for a single page. - */ -static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *ptep, pte; - - pgd = pgd_offset(mm, address); - if (pgd_none(*pgd) || pgd_bad(*pgd)) - goto out; - - pmd = pmd_offset(pgd, address); - if (pmd_none(*pmd) || pmd_bad(*pmd)) - goto out; - - ptep = pte_offset(pmd, address); - if (!ptep) - goto out; - - pte = *ptep; - if (pte_present(pte)) { - if (!write || - (pte_write(pte) && pte_dirty(pte))) - return pte_page(pte); - } - -out: - return 0; -} - -/* - * Given a physical address, is there a useful struct page pointing to - * it? This may become more complex in the future if we start dealing - * with IO-aperture pages in kiobufs. - */ - -static inline struct page * get_page_map(struct page *page) -{ - if (!VALID_PAGE(page)) - return 0; - return page; -} - -/* - * Please read Documentation/cachetlb.txt before using this function, - * accessing foreign memory spaces can cause cache coherency problems. - * - * Accessing a VM_IO area is even more dangerous, therefore the function - * fails if pages is != NULL and a VM_IO area is found. - */ -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) -{ - int i; - unsigned int flags; - - /* - * Require read or write permissions. - * If 'force' is set, we only require the "MAY" flags. - */ - flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); - flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); - i = 0; - - do { - struct vm_area_struct * vma; - - vma = find_extend_vma(mm, start); - - if ( !vma || (pages && vma->vm_flags & VM_IO) || !(flags & vma->vm_flags) ) - return i ? : -EFAULT; - - spin_lock(&mm->page_table_lock); - do { - struct page *map; - while (!(map = follow_page(mm, start, write))) { - spin_unlock(&mm->page_table_lock); - switch (handle_mm_fault(mm, vma, start, write)) { - case 1: - tsk->min_flt++; - break; - case 2: - tsk->maj_flt++; - break; - case 0: - if (i) return i; - return -EFAULT; - default: - if (i) return i; - return -ENOMEM; - } - spin_lock(&mm->page_table_lock); - } - if (pages) { - pages[i] = get_page_map(map); - /* FIXME: call the correct function, - * depending on the type of the found page - */ - if (!pages[i] || PageReserved(pages[i])) { - if (pages[i] != ZERO_PAGE(start)) - goto bad_page; - } else - page_cache_get(pages[i]); - } - if (vmas) - vmas[i] = vma; - i++; - start += PAGE_SIZE; - len--; - } while(len && start < vma->vm_end); - spin_unlock(&mm->page_table_lock); - } while(len); -out: - return i; - - /* - * We found an invalid page in the VMA. Release all we have - * so far and fail. - */ -bad_page: - spin_unlock(&mm->page_table_lock); - while (i--) - page_cache_release(pages[i]); - i = -EFAULT; - goto out; -} - -EXPORT_SYMBOL(get_user_pages); - -/* - * Force in an entire range of pages from the current process's user VA, - * and pin them in physical memory. - */ -#define dprintk(x...) - -int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) -{ - int pgcount, err; - struct mm_struct * mm; - - /* Make sure the iobuf is not already mapped somewhere. */ - if (iobuf->nr_pages) - return -EINVAL; - - mm = current->mm; - dprintk ("map_user_kiobuf: begin\n"); - - pgcount = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE; - /* mapping 0 bytes is not permitted */ - if (!pgcount) BUG(); - err = expand_kiobuf(iobuf, pgcount); - if (err) - return err; - - iobuf->locked = 0; - iobuf->offset = va & (PAGE_SIZE-1); - iobuf->length = len; - - /* Try to fault in all of the necessary pages */ - down_read(&mm->mmap_sem); - /* rw==READ means read from disk, write into memory area */ - err = get_user_pages(current, mm, va, pgcount, - (rw==READ), 0, iobuf->maplist, NULL); - up_read(&mm->mmap_sem); - if (err < 0) { - unmap_kiobuf(iobuf); - dprintk ("map_user_kiobuf: end %d\n", err); - return err; - } - iobuf->nr_pages = err; - while (pgcount--) { - /* FIXME: flush superflous for rw==READ, - * probably wrong function for rw==WRITE - */ - flush_dcache_page(iobuf->maplist[pgcount]); - } - dprintk ("map_user_kiobuf: end OK\n"); - return 0; -} - -/* - * Mark all of the pages in a kiobuf as dirty - * - * We need to be able to deal with short reads from disk: if an IO error - * occurs, the number of bytes read into memory may be less than the - * size of the kiobuf, so we have to stop marking pages dirty once the - * requested byte count has been reached. - * - * Must be called from process context - set_page_dirty() takes VFS locks. - */ - -void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes) -{ - int index, offset, remaining; - struct page *page; - - index = iobuf->offset >> PAGE_SHIFT; - offset = iobuf->offset & ~PAGE_MASK; - remaining = bytes; - if (remaining > iobuf->length) - remaining = iobuf->length; - - while (remaining > 0 && index < iobuf->nr_pages) { - page = iobuf->maplist[index]; - - if (!PageReserved(page)) - set_page_dirty(page); - - remaining -= (PAGE_SIZE - offset); - offset = 0; - index++; - } -} - -/* - * Unmap all of the pages referenced by a kiobuf. We release the pages, - * and unlock them if they were locked. - */ - -void unmap_kiobuf (struct kiobuf *iobuf) -{ - int i; - struct page *map; - - for (i = 0; i < iobuf->nr_pages; i++) { - map = iobuf->maplist[i]; - if (map) { - if (iobuf->locked) - UnlockPage(map); - /* FIXME: cache flush missing for rw==READ - * FIXME: call the correct reference counting function - */ - page_cache_release(map); - } - } - - iobuf->nr_pages = 0; - iobuf->locked = 0; -} - - -/* - * Lock down all of the pages of a kiovec for IO. - * - * If any page is mapped twice in the kiovec, we return the error -EINVAL. - * - * The optional wait parameter causes the lock call to block until all - * pages can be locked if set. If wait==0, the lock operation is - * aborted if any locked pages are found and -EAGAIN is returned. - */ - -int lock_kiovec(int nr, struct kiobuf *iovec[], int wait) -{ - struct kiobuf *iobuf; - int i, j; - struct page *page, **ppage; - int doublepage = 0; - int repeat = 0; - - repeat: - - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - - if (iobuf->locked) - continue; - - ppage = iobuf->maplist; - for (j = 0; j < iobuf->nr_pages; ppage++, j++) { - page = *ppage; - if (!page) - continue; - - if (TryLockPage(page)) { - while (j--) { - struct page *tmp = *--ppage; - if (tmp) - UnlockPage(tmp); - } - goto retry; - } - } - iobuf->locked = 1; - } - - return 0; - - retry: - - /* - * We couldn't lock one of the pages. Undo the locking so far, - * wait on the page we got to, and try again. - */ - - unlock_kiovec(nr, iovec); - if (!wait) - return -EAGAIN; - - /* - * Did the release also unlock the page we got stuck on? - */ - if (!PageLocked(page)) { - /* - * If so, we may well have the page mapped twice - * in the IO address range. Bad news. Of - * course, it _might_ just be a coincidence, - * but if it happens more than once, chances - * are we have a double-mapped page. - */ - if (++doublepage >= 3) - return -EINVAL; - - /* Try again... */ - wait_on_page(page); - } - - if (++repeat < 16) - goto repeat; - return -EAGAIN; -} - -/* - * Unlock all of the pages of a kiovec after IO. - */ - -int unlock_kiovec(int nr, struct kiobuf *iovec[]) -{ - struct kiobuf *iobuf; - int i, j; - struct page *page, **ppage; - - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - - if (!iobuf->locked) - continue; - iobuf->locked = 0; - - ppage = iobuf->maplist; - for (j = 0; j < iobuf->nr_pages; ppage++, j++) { - page = *ppage; - if (!page) - continue; - UnlockPage(page); - } - } - return 0; -} - -static inline void zeromap_pte_range(pte_t * pte, unsigned long address, - unsigned long size, pgprot_t prot) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot)); - pte_t oldpage = ptep_get_and_clear(pte); - set_pte(pte, zero_pte); - forget_pte(oldpage); - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, - unsigned long size, pgprot_t prot) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - do { - pte_t * pte = pte_alloc(mm, pmd, address); - if (!pte) - return -ENOMEM; - zeromap_pte_range(pte, address, end - address, prot); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot) -{ - int error = 0; - pgd_t * dir; - unsigned long beg = address; - unsigned long end = address + size; - struct mm_struct *mm = current->mm; - - dir = pgd_offset(mm, address); - flush_cache_range(mm, beg, end); - if (address >= end) - BUG(); - - spin_lock(&mm->page_table_lock); - do { - pmd_t *pmd = pmd_alloc(mm, dir, address); - error = -ENOMEM; - if (!pmd) - break; - error = zeromap_pmd_range(mm, pmd, address, end - address, prot); - if (error) - break; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - spin_unlock(&mm->page_table_lock); - flush_tlb_range(mm, beg, end); - return error; -} - -/* - * maps a range of physical memory into the requested pages. the old - * mappings are removed. any references to nonexistent pages results - * in null mappings (currently treated as "copy-on-access") - */ -static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size, - unsigned long phys_addr, pgprot_t prot) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - struct page *page; - pte_t oldpage; - oldpage = ptep_get_and_clear(pte); - - page = virt_to_page(__va(phys_addr)); - if ((!VALID_PAGE(page)) || PageReserved(page)) - set_pte(pte, mk_pte_phys(phys_addr, prot)); - forget_pte(oldpage); - address += PAGE_SIZE; - phys_addr += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, pgprot_t prot) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - phys_addr -= address; - do { - pte_t * pte = pte_alloc(mm, pmd, address); - if (!pte) - return -ENOMEM; - remap_pte_range(pte, address, end - address, address + phys_addr, prot); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -/* Note: this is only safe if the mm semaphore is held when called. */ -int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot) -{ - int error = 0; - pgd_t * dir; - unsigned long beg = from; - unsigned long end = from + size; - struct mm_struct *mm = current->mm; - - phys_addr -= from; - dir = pgd_offset(mm, from); - flush_cache_range(mm, beg, end); - if (from >= end) - BUG(); - - spin_lock(&mm->page_table_lock); - do { - pmd_t *pmd = pmd_alloc(mm, dir, from); - error = -ENOMEM; - if (!pmd) - break; - error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot); - if (error) - break; - from = (from + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (from && (from < end)); - spin_unlock(&mm->page_table_lock); - flush_tlb_range(mm, beg, end); - return error; -} - -/* - * Establish a new mapping: - * - flush the old one - * - update the page tables - * - inform the TLB about the new one - * - * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock - */ -static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry) -{ -#ifdef CONFIG_XEN - if ( likely(vma->vm_mm == current->mm) ) { - HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG|UVMF_LOCAL); - } else { - set_pte(page_table, entry); - flush_tlb_page(vma, address); - } -#else - set_pte(page_table, entry); - flush_tlb_page(vma, address); -#endif - update_mmu_cache(vma, address, entry); -} - -/* - * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock - */ -static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, - pte_t *page_table) -{ - flush_page_to_ram(new_page); - flush_cache_page(vma, address); - establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); -} - -/* - * This routine handles present pages, when users try to write - * to a shared page. It is done by copying the page to a new address - * and decrementing the shared-page counter for the old page. - * - * Goto-purists beware: the only reason for goto's here is that it results - * in better assembly code.. The "default" path will see no jumps at all. - * - * Note that this routine assumes that the protection checks have been - * done by the caller (the low-level page fault routine in most cases). - * Thus we can safely just mark it writable once we've done any necessary - * COW. - * - * We also mark the page dirty at this point even though the page will - * change only once the write actually happens. This avoids a few races, - * and potentially makes it more efficient. - * - * We hold the mm semaphore and the page_table_lock on entry and exit - * with the page_table_lock released. - */ -static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, - unsigned long address, pte_t *page_table, pte_t pte) -{ - struct page *old_page, *new_page; - - old_page = pte_page(pte); - if (!VALID_PAGE(old_page)) - goto bad_wp_page; - - if (!TryLockPage(old_page)) { - int reuse = can_share_swap_page(old_page); - unlock_page(old_page); - if (reuse) { - flush_cache_page(vma, address); - establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); - spin_unlock(&mm->page_table_lock); - return 1; /* Minor fault */ - } - } - - /* - * Ok, we need to copy. Oh, well.. - */ - page_cache_get(old_page); - spin_unlock(&mm->page_table_lock); - - new_page = alloc_page(GFP_HIGHUSER); - if (!new_page) - goto no_mem; - copy_cow_page(old_page,new_page,address); - - /* - * Re-check the pte - we dropped the lock - */ - spin_lock(&mm->page_table_lock); - if (pte_same(*page_table, pte)) { - if (PageReserved(old_page)) - ++mm->rss; - break_cow(vma, new_page, address, page_table); - if (vm_anon_lru) - lru_cache_add(new_page); - - /* Free the old page.. */ - new_page = old_page; - } - spin_unlock(&mm->page_table_lock); - page_cache_release(new_page); - page_cache_release(old_page); - return 1; /* Minor fault */ - -bad_wp_page: - spin_unlock(&mm->page_table_lock); - printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); - return -1; -no_mem: - page_cache_release(old_page); - return -1; -} - -static void vmtruncate_list(struct vm_area_struct *mpnt, unsigned long pgoff) -{ - do { - struct mm_struct *mm = mpnt->vm_mm; - unsigned long start = mpnt->vm_start; - unsigned long end = mpnt->vm_end; - unsigned long len = end - start; - unsigned long diff; - - /* mapping wholly truncated? */ - if (mpnt->vm_pgoff >= pgoff) { - zap_page_range(mm, start, len); - continue; - } - - /* mapping wholly unaffected? */ - len = len >> PAGE_SHIFT; - diff = pgoff - mpnt->vm_pgoff; - if (diff >= len) - continue; - - /* Ok, partially affected.. */ - start += diff << PAGE_SHIFT; - len = (len - diff) << PAGE_SHIFT; - zap_page_range(mm, start, len); - } while ((mpnt = mpnt->vm_next_share) != NULL); -} - -/* - * Handle all mappings that got truncated by a "truncate()" - * system call. - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode * inode, loff_t offset) -{ - unsigned long pgoff; - struct address_space *mapping = inode->i_mapping; - unsigned long limit; - - if (inode->i_size < offset) - goto do_expand; - inode->i_size = offset; - spin_lock(&mapping->i_shared_lock); - if (!mapping->i_mmap && !mapping->i_mmap_shared) - goto out_unlock; - - pgoff = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (mapping->i_mmap != NULL) - vmtruncate_list(mapping->i_mmap, pgoff); - if (mapping->i_mmap_shared != NULL) - vmtruncate_list(mapping->i_mmap_shared, pgoff); - -out_unlock: - spin_unlock(&mapping->i_shared_lock); - truncate_inode_pages(mapping, offset); - goto out_truncate; - -do_expand: - limit = current->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out; - inode->i_size = offset; - -out_truncate: - if (inode->i_op && inode->i_op->truncate) { - lock_kernel(); - inode->i_op->truncate(inode); - unlock_kernel(); - } - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out: - return -EFBIG; -} - -/* - * Primitive swap readahead code. We simply read an aligned block of - * (1 << page_cluster) entries in the swap area. This method is chosen - * because it doesn't cost us any seek time. We also make sure to queue - * the 'original' request together with the readahead ones... - */ -void swapin_readahead(swp_entry_t entry) -{ - int i, num; - struct page *new_page; - unsigned long offset; - - /* - * Get the number of handles we should do readahead io to. - */ - num = valid_swaphandles(entry, &offset); - for (i = 0; i < num; offset++, i++) { - /* Ok, do the async read-ahead now */ - new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset)); - if (!new_page) - break; - page_cache_release(new_page); - } - return; -} - -/* - * We hold the mm semaphore and the page_table_lock on entry and - * should release the pagetable lock on exit.. - */ -static int do_swap_page(struct mm_struct * mm, - struct vm_area_struct * vma, unsigned long address, - pte_t * page_table, pte_t orig_pte, int write_access) -{ - struct page *page; - swp_entry_t entry = pte_to_swp_entry(orig_pte); - pte_t pte; - int ret = 1; - - spin_unlock(&mm->page_table_lock); - page = lookup_swap_cache(entry); - if (!page) { - swapin_readahead(entry); - page = read_swap_cache_async(entry); - if (!page) { - /* - * Back out if somebody else faulted in this pte while - * we released the page table lock. - */ - int retval; - spin_lock(&mm->page_table_lock); - retval = pte_same(*page_table, orig_pte) ? -1 : 1; - spin_unlock(&mm->page_table_lock); - return retval; - } - - /* Had to read the page from swap area: Major fault */ - ret = 2; - } - - mark_page_accessed(page); - - lock_page(page); - - /* - * Back out if somebody else faulted in this pte while we - * released the page table lock. - */ - spin_lock(&mm->page_table_lock); - if (!pte_same(*page_table, orig_pte)) { - spin_unlock(&mm->page_table_lock); - unlock_page(page); - page_cache_release(page); - return 1; - } - - /* The page isn't present yet, go ahead with the fault. */ - - swap_free(entry); - if (vm_swap_full()) - remove_exclusive_swap_page(page); - - mm->rss++; - pte = mk_pte(page, vma->vm_page_prot); - if (write_access && can_share_swap_page(page)) - pte = pte_mkdirty(pte_mkwrite(pte)); - unlock_page(page); - - flush_page_to_ram(page); - flush_icache_page(vma, page); -#ifdef CONFIG_XEN - if ( likely(vma->vm_mm == current->mm) ) - HYPERVISOR_update_va_mapping(address, pte, 0); - else - set_pte(page_table, pte); -#else - set_pte(page_table, pte); -#endif - - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, address, pte); - spin_unlock(&mm->page_table_lock); - return ret; -} - -/* - * We are called with the MM semaphore and page_table_lock - * spinlock held to protect against concurrent faults in - * multithreaded programs. - */ -static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) -{ - pte_t entry; - - /* Read-only mapping of ZERO_PAGE. */ - entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); - - /* ..except if it's a write access */ - if (write_access) { - struct page *page; - - /* Allocate our own private page. */ - spin_unlock(&mm->page_table_lock); - - page = alloc_page(GFP_HIGHUSER); - if (!page) - goto no_mem; - clear_user_highpage(page, addr); - - spin_lock(&mm->page_table_lock); - if (!pte_none(*page_table)) { - page_cache_release(page); - spin_unlock(&mm->page_table_lock); - return 1; - } - mm->rss++; - flush_page_to_ram(page); - entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); - if (vm_anon_lru) - lru_cache_add(page); - mark_page_accessed(page); - } - -#ifdef CONFIG_XEN - if ( likely(vma->vm_mm == current->mm) ) - HYPERVISOR_update_va_mapping(addr, entry, 0); - else - set_pte(page_table, entry); -#else - set_pte(page_table, entry); -#endif - - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, addr, entry); - spin_unlock(&mm->page_table_lock); - return 1; /* Minor fault */ - -no_mem: - return -1; -} - -/* - * do_no_page() tries to create a new page mapping. It aggressively - * tries to share with existing pages, but makes a separate copy if - * the "write_access" parameter is true in order to avoid the next - * page fault. - * - * As this is called only for pages that do not currently exist, we - * do not need to flush old virtual caches or the TLB. - * - * This is called with the MM semaphore held and the page table - * spinlock held. Exit with the spinlock released. - */ -static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, - unsigned long address, int write_access, pte_t *page_table) -{ - struct page * new_page; - pte_t entry; - - if (!vma->vm_ops || !vma->vm_ops->nopage) - return do_anonymous_page(mm, vma, page_table, write_access, address); - spin_unlock(&mm->page_table_lock); - - new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); - - if (new_page == NULL) /* no page was available -- SIGBUS */ - return 0; - if (new_page == NOPAGE_OOM) - return -1; - - /* - * Should we do an early C-O-W break? - */ - if (write_access && !(vma->vm_flags & VM_SHARED)) { - struct page * page = alloc_page(GFP_HIGHUSER); - if (!page) { - page_cache_release(new_page); - return -1; - } - copy_user_highpage(page, new_page, address); - page_cache_release(new_page); - if (vm_anon_lru) - lru_cache_add(page); - new_page = page; - } - - spin_lock(&mm->page_table_lock); - /* - * This silly early PAGE_DIRTY setting removes a race - * due to the bad i386 page protection. But it's valid - * for other architectures too. - * - * Note that if write_access is true, we either now have - * an exclusive copy of the page, or this is a shared mapping, - * so we can make it writable and dirty to avoid having to - * handle that later. - */ - /* Only go through if we didn't race with anybody else... */ - if (pte_none(*page_table)) { - if (!PageReserved(new_page)) - ++mm->rss; - flush_page_to_ram(new_page); - flush_icache_page(vma, new_page); - entry = mk_pte(new_page, vma->vm_page_prot); - if (write_access) - entry = pte_mkwrite(pte_mkdirty(entry)); -#ifdef CONFIG_XEN - if ( likely(vma->vm_mm == current->mm) ) - HYPERVISOR_update_va_mapping(address, entry, 0); - else - set_pte(page_table, entry); -#else - set_pte(page_table, entry); -#endif - } else { - /* One of our sibling threads was faster, back out. */ - page_cache_release(new_page); - spin_unlock(&mm->page_table_lock); - return 1; - } - - /* no need to invalidate: a not-present page shouldn't be cached */ - update_mmu_cache(vma, address, entry); - spin_unlock(&mm->page_table_lock); - return 2; /* Major fault */ -} - -/* - * These routines also need to handle stuff like marking pages dirty - * and/or accessed for architectures that don't do it in hardware (most - * RISC architectures). The early dirtying is also good on the i386. - * - * There is also a hook called "update_mmu_cache()" that architectures - * with external mmu caches can use to update those (ie the Sparc or - * PowerPC hashed page tables that act as extended TLBs). - * - * Note the "page_table_lock". It is to protect against kswapd removing - * pages from under us. Note that kswapd only ever _removes_ pages, never - * adds them. As such, once we have noticed that the page is not present, - * we can drop the lock early. - * - * The adding of pages is protected by the MM semaphore (which we hold), - * so we don't need to worry about a page being suddenly been added into - * our VM. - * - * We enter with the pagetable spinlock held, we are supposed to - * release it when done. - */ -static inline int handle_pte_fault(struct mm_struct *mm, - struct vm_area_struct * vma, unsigned long address, - int write_access, pte_t * pte) -{ - pte_t entry; - - entry = *pte; - if (!pte_present(entry)) { - /* - * If it truly wasn't present, we know that kswapd - * and the PTE updates will not touch it later. So - * drop the lock. - */ - if (pte_none(entry)) - return do_no_page(mm, vma, address, write_access, pte); - return do_swap_page(mm, vma, address, pte, entry, write_access); - } - - if (write_access) { - if (!pte_write(entry)) - return do_wp_page(mm, vma, address, pte, entry); - - entry = pte_mkdirty(entry); - } - entry = pte_mkyoung(entry); - establish_pte(vma, address, pte, entry); - spin_unlock(&mm->page_table_lock); - return 1; -} - -/* - * By the time we get here, we already hold the mm semaphore - */ -int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, - unsigned long address, int write_access) -{ - pgd_t *pgd; - pmd_t *pmd; - - current->state = TASK_RUNNING; - pgd = pgd_offset(mm, address); - - /* - * We need the page table lock to synchronize with kswapd - * and the SMP-safe atomic PTE updates. - */ - spin_lock(&mm->page_table_lock); - pmd = pmd_alloc(mm, pgd, address); - - if (pmd) { - pte_t * pte = pte_alloc(mm, pmd, address); - if (pte) - return handle_pte_fault(mm, vma, address, write_access, pte); - } - spin_unlock(&mm->page_table_lock); - return -1; -} - -/* - * Allocate page middle directory. - * - * We've already handled the fast-path in-line, and we own the - * page table lock. - * - * On a two-level page table, this ends up actually being entirely - * optimized away. - */ -pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) -{ - pmd_t *new; - - /* "fast" allocation can happen without dropping the lock.. */ - new = pmd_alloc_one_fast(mm, address); - if (!new) { - spin_unlock(&mm->page_table_lock); - new = pmd_alloc_one(mm, address); - spin_lock(&mm->page_table_lock); - if (!new) - return NULL; - - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (!pgd_none(*pgd)) { - pmd_free(new); - check_pgt_cache(); - goto out; - } - } - pgd_populate(mm, pgd, new); -out: - return pmd_offset(pgd, address); -} - -/* - * Allocate the page table directory. - * - * We've already handled the fast-path in-line, and we own the - * page table lock. - */ -pte_t fastcall *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) -{ - if (pmd_none(*pmd)) { - pte_t *new; - - /* "fast" allocation can happen without dropping the lock.. */ - new = pte_alloc_one_fast(mm, address); - if (!new) { - spin_unlock(&mm->page_table_lock); - new = pte_alloc_one(mm, address); - spin_lock(&mm->page_table_lock); - if (!new) - return NULL; - - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (!pmd_none(*pmd)) { - pte_free(new); - check_pgt_cache(); - goto out; - } - } - pmd_populate(mm, pmd, new); - } -out: - return pte_offset(pmd, address); -} - -int make_pages_present(unsigned long addr, unsigned long end) -{ - int ret, len, write; - struct vm_area_struct * vma; - - vma = find_vma(current->mm, addr); - write = (vma->vm_flags & VM_WRITE) != 0; - if (addr >= end) - BUG(); - if (end > vma->vm_end) - BUG(); - len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE; - ret = get_user_pages(current, current->mm, addr, - len, write, 0, NULL, NULL); - return ret == len ? 0 : -1; -} - -struct page * vmalloc_to_page(void * vmalloc_addr) -{ - unsigned long addr = (unsigned long) vmalloc_addr; - struct page *page = NULL; - pmd_t *pmd; - pte_t *pte; - pgd_t *pgd; - - pgd = pgd_offset_k(addr); - if (!pgd_none(*pgd)) { - pmd = pmd_offset(pgd, addr); - if (!pmd_none(*pmd)) { - pte = pte_offset(pmd, addr); - if (pte_present(*pte)) { - page = pte_page(*pte); - } - } - } - return page; -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/mm/mprotect.c --- a/linux-2.4-xen-sparse/mm/mprotect.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,344 +0,0 @@ -/* - * linux/mm/mprotect.c - * - * (C) Copyright 1994 Linus Torvalds - */ -#include <linux/slab.h> -#include <linux/smp_lock.h> -#include <linux/shm.h> -#include <linux/mman.h> - -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#include <asm/pgtable.h> - -static inline void change_pte_range(pmd_t * pmd, unsigned long address, - unsigned long size, pgprot_t newprot) -{ - pte_t * pte; - unsigned long end; - - if (pmd_none(*pmd)) - return; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - return; - } - pte = pte_offset(pmd, address); - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - if (pte_present(*pte)) { - pte_t entry; - - /* Avoid an SMP race with hardware updated dirty/clean - * bits by wiping the pte and then setting the new pte - * into place. - */ - entry = ptep_get_and_clear(pte); - set_pte(pte, pte_modify(entry, newprot)); - } - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline void change_pmd_range(pgd_t * pgd, unsigned long address, - unsigned long size, pgprot_t newprot) -{ - pmd_t * pmd; - unsigned long end; - - if (pgd_none(*pgd)) - return; - if (pgd_bad(*pgd)) { - pgd_ERROR(*pgd); - pgd_clear(pgd); - return; - } - pmd = pmd_offset(pgd, address); - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - do { - change_pte_range(pmd, address, end - address, newprot); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); -} - -static void change_protection(unsigned long start, unsigned long end, pgprot_t newprot) -{ - pgd_t *dir; - unsigned long beg = start; - - dir = pgd_offset(current->mm, start); - flush_cache_range(current->mm, beg, end); - if (start >= end) - BUG(); - spin_lock(&current->mm->page_table_lock); - do { - change_pmd_range(dir, start, end - start, newprot); - start = (start + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (start && (start < end)); - spin_unlock(&current->mm->page_table_lock); - flush_tlb_range(current->mm, beg, end); - return; -} - -static inline int mprotect_fixup_all(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * prev = *pprev; - struct mm_struct * mm = vma->vm_mm; - - if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) && - !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&mm->page_table_lock); - prev->vm_end = vma->vm_end; - __vma_unlink(mm, vma, prev); - spin_unlock(&mm->page_table_lock); - - kmem_cache_free(vm_area_cachep, vma); - mm->map_count--; - - return 0; - } - - spin_lock(&mm->page_table_lock); - vma->vm_flags = newflags; - vma->vm_page_prot = prot; - spin_unlock(&mm->page_table_lock); - - *pprev = vma; - - return 0; -} - -static inline int mprotect_fixup_start(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long end, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * n, * prev = *pprev; - - *pprev = vma; - - if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) && - !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&vma->vm_mm->page_table_lock); - prev->vm_end = end; - vma->vm_start = end; - spin_unlock(&vma->vm_mm->page_table_lock); - - return 0; - } - n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!n) - return -ENOMEM; - *n = *vma; - n->vm_end = end; - n->vm_flags = newflags; - n->vm_raend = 0; - n->vm_page_prot = prot; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT; - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_start = end; - __insert_vm_struct(current->mm, n); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - - return 0; -} - -static inline int mprotect_fixup_end(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long start, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * n; - - n = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - if (!n) - return -ENOMEM; - *n = *vma; - n->vm_start = start; - n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; - n->vm_flags = newflags; - n->vm_raend = 0; - n->vm_page_prot = prot; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_end = start; - __insert_vm_struct(current->mm, n); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - - *pprev = n; - - return 0; -} - -static inline int mprotect_fixup_middle(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long start, unsigned long end, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * left, * right; - - left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!left) - return -ENOMEM; - right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!right) { - kmem_cache_free(vm_area_cachep, left); - return -ENOMEM; - } - *left = *vma; - *right = *vma; - left->vm_end = start; - right->vm_start = end; - right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; - left->vm_raend = 0; - right->vm_raend = 0; - if (vma->vm_file) - atomic_add(2,&vma->vm_file->f_count); - if (vma->vm_ops && vma->vm_ops->open) { - vma->vm_ops->open(left); - vma->vm_ops->open(right); - } - vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT; - vma->vm_raend = 0; - vma->vm_page_prot = prot; - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_start = start; - vma->vm_end = end; - vma->vm_flags = newflags; - __insert_vm_struct(current->mm, left); - __insert_vm_struct(current->mm, right); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - - *pprev = right; - - return 0; -} - -static int mprotect_fixup(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long start, unsigned long end, unsigned int newflags) -{ - pgprot_t newprot; - int error; - - if (newflags == vma->vm_flags) { - *pprev = vma; - return 0; - } - newprot = protection_map[newflags & 0xf]; - if (start == vma->vm_start) { - if (end == vma->vm_end) - error = mprotect_fixup_all(vma, pprev, newflags, newprot); - else - error = mprotect_fixup_start(vma, pprev, end, newflags, newprot); - } else if (end == vma->vm_end) - error = mprotect_fixup_end(vma, pprev, start, newflags, newprot); - else - error = mprotect_fixup_middle(vma, pprev, start, end, newflags, newprot); - - if (error) - return error; - - change_protection(start, end, newprot); - return 0; -} - -asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) -{ - unsigned long nstart, end, tmp; - struct vm_area_struct * vma, * next, * prev; - int error = -EINVAL; - - if (start & ~PAGE_MASK) - return -EINVAL; - len = PAGE_ALIGN(len); - end = start + len; - if (end < start) - return -ENOMEM; - if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) - return -EINVAL; - if (end == start) - return 0; - - down_write(&current->mm->mmap_sem); - - vma = find_vma_prev(current->mm, start, &prev); - error = -ENOMEM; - if (!vma || vma->vm_start > start) - goto out; - -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) - /* mprotect() unsupported for I/O mappings in Xenolinux. */ - error = -EINVAL; - if (vma->vm_flags & VM_IO) - goto out; -#endif - - for (nstart = start ; ; ) { - unsigned int newflags; - int last = 0; - - /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ - - newflags = prot | (vma->vm_flags & ~(PROT_READ | PROT_WRITE | PROT_EXEC)); - if ((newflags & ~(newflags >> 4)) & 0xf) { - error = -EACCES; - goto out; - } - - if (vma->vm_end > end) { - error = mprotect_fixup(vma, &prev, nstart, end, newflags); - goto out; - } - if (vma->vm_end == end) - last = 1; - - tmp = vma->vm_end; - next = vma->vm_next; - error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); - if (error) - goto out; - if (last) - break; - nstart = tmp; - vma = next; - if (!vma || vma->vm_start != nstart) { - error = -ENOMEM; - goto out; - } - } - if (next && prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags) && - !prev->vm_file && !(prev->vm_flags & VM_SHARED)) { - spin_lock(&prev->vm_mm->page_table_lock); - prev->vm_end = next->vm_end; - __vma_unlink(prev->vm_mm, next, prev); - spin_unlock(&prev->vm_mm->page_table_lock); - - kmem_cache_free(vm_area_cachep, next); - prev->vm_mm->map_count--; - } -out: - up_write(&current->mm->mmap_sem); - return error; -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/mm/mremap.c --- a/linux-2.4-xen-sparse/mm/mremap.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,390 +0,0 @@ -/* - * linux/mm/remap.c - * - * (C) Copyright 1996 Linus Torvalds - */ - -#include <linux/slab.h> -#include <linux/smp_lock.h> -#include <linux/shm.h> -#include <linux/mman.h> -#include <linux/swap.h> - -#include <asm/uaccess.h> -#include <asm/pgalloc.h> - -extern int vm_enough_memory(long pages); - -static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) -{ - pgd_t * pgd; - pmd_t * pmd; - pte_t * pte = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_none(*pgd)) - goto end; - if (pgd_bad(*pgd)) { - pgd_ERROR(*pgd); - pgd_clear(pgd); - goto end; - } - - pmd = pmd_offset(pgd, addr); - if (pmd_none(*pmd)) - goto end; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - goto end; - } - - pte = pte_offset(pmd, addr); - if (pte_none(*pte)) - pte = NULL; -end: - return pte; -} - -static inline pte_t *alloc_one_pte(struct mm_struct *mm, unsigned long addr) -{ - pmd_t * pmd; - pte_t * pte = NULL; - - pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr); - if (pmd) - pte = pte_alloc(mm, pmd, addr); - return pte; -} - -static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst) -{ - int error = 0; - pte_t pte; - - if (!pte_none(*src)) { - pte = ptep_get_and_clear(src); - if (!dst) { - /* No dest? We must put it back. */ - dst = src; - error++; - } - set_pte(dst, pte); - } - return error; -} - -static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned long new_addr) -{ - int error = 0; - pte_t * src, * dst; - - spin_lock(&mm->page_table_lock); - src = get_one_pte(mm, old_addr); - if (src) { - dst = alloc_one_pte(mm, new_addr); - src = get_one_pte(mm, old_addr); - if (src) - error = copy_one_pte(mm, src, dst); - } - spin_unlock(&mm->page_table_lock); - return error; -} - -static int move_page_tables(struct mm_struct * mm, - unsigned long new_addr, unsigned long old_addr, unsigned long len) -{ - unsigned long offset = len; - - flush_cache_range(mm, old_addr, old_addr + len); - - /* - * This is not the clever way to do this, but we're taking the - * easy way out on the assumption that most remappings will be - * only a few pages.. This also makes error recovery easier. - */ - while (offset) { - offset -= PAGE_SIZE; - if (move_one_page(mm, old_addr + offset, new_addr + offset)) - goto oops_we_failed; - } - flush_tlb_range(mm, old_addr, old_addr + len); - return 0; - - /* - * Ok, the move failed because we didn't have enough pages for - * the new page table tree. This is unlikely, but we have to - * take the possibility into account. In that case we just move - * all the pages back (this will work, because we still have - * the old page tables) - */ -oops_we_failed: - flush_cache_range(mm, new_addr, new_addr + len); - while ((offset += PAGE_SIZE) < len) - move_one_page(mm, new_addr + offset, old_addr + offset); - zap_page_range(mm, new_addr, len); - return -1; -} - -static inline unsigned long move_vma(struct vm_area_struct * vma, - unsigned long addr, unsigned long old_len, unsigned long new_len, - unsigned long new_addr) -{ - struct mm_struct * mm = vma->vm_mm; - struct vm_area_struct * new_vma, * next, * prev; - int allocated_vma; - - new_vma = NULL; - next = find_vma_prev(mm, new_addr, &prev); - if (next) { - if (prev && prev->vm_end == new_addr && - can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&mm->page_table_lock); - prev->vm_end = new_addr + new_len; - spin_unlock(&mm->page_table_lock); - new_vma = prev; - if (next != prev->vm_next) - BUG(); - if (prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags)) { - spin_lock(&mm->page_table_lock); - prev->vm_end = next->vm_end; - __vma_unlink(mm, next, prev); - spin_unlock(&mm->page_table_lock); - - mm->map_count--; - kmem_cache_free(vm_area_cachep, next); - } - } else if (next->vm_start == new_addr + new_len && - can_vma_merge(next, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&mm->page_table_lock); - next->vm_start = new_addr; - spin_unlock(&mm->page_table_lock); - new_vma = next; - } - } else { - prev = find_vma(mm, new_addr-1); - if (prev && prev->vm_end == new_addr && - can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&mm->page_table_lock); - prev->vm_end = new_addr + new_len; - spin_unlock(&mm->page_table_lock); - new_vma = prev; - } - } - - allocated_vma = 0; - if (!new_vma) { - new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!new_vma) - goto out; - allocated_vma = 1; - } - - if (!move_page_tables(current->mm, new_addr, addr, old_len)) { - unsigned long vm_locked = vma->vm_flags & VM_LOCKED; - - if (allocated_vma) { - *new_vma = *vma; - new_vma->vm_start = new_addr; - new_vma->vm_end = new_addr+new_len; - new_vma->vm_pgoff += (addr-vma->vm_start) >> PAGE_SHIFT; - new_vma->vm_raend = 0; - if (new_vma->vm_file) - get_file(new_vma->vm_file); - if (new_vma->vm_ops && new_vma->vm_ops->open) - new_vma->vm_ops->open(new_vma); - insert_vm_struct(current->mm, new_vma); - } - - /* XXX: possible errors masked, mapping might remain */ - do_munmap(current->mm, addr, old_len); - - current->mm->total_vm += new_len >> PAGE_SHIFT; - if (vm_locked) { - current->mm->locked_vm += new_len >> PAGE_SHIFT; - if (new_len > old_len) - make_pages_present(new_addr + old_len, - new_addr + new_len); - } - return new_addr; - } - if (allocated_vma) - kmem_cache_free(vm_area_cachep, new_vma); - out: - return -ENOMEM; -} - -/* - * Expand (or shrink) an existing mapping, potentially moving it at the - * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) - * - * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise - * This option implies MREMAP_MAYMOVE. - */ -unsigned long do_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr) -{ - struct vm_area_struct *vma; - unsigned long ret = -EINVAL; - - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) - goto out; - - if (addr & ~PAGE_MASK) - goto out; - - old_len = PAGE_ALIGN(old_len); - new_len = PAGE_ALIGN(new_len); - - if (old_len > TASK_SIZE || addr > TASK_SIZE - old_len) - goto out; - - if (addr >= TASK_SIZE) - goto out; - - /* new_addr is only valid if MREMAP_FIXED is specified */ - if (flags & MREMAP_FIXED) { - if (new_addr & ~PAGE_MASK) - goto out; - if (!(flags & MREMAP_MAYMOVE)) - goto out; - - if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) - goto out; - - if (new_addr >= TASK_SIZE) - goto out; - - /* - * Allow new_len == 0 only if new_addr == addr - * to preserve truncation in place (that was working - * safe and some app may depend on it). - */ - if (unlikely(!new_len && new_addr != addr)) - goto out; - - /* Check if the location we're moving into overlaps the - * old location at all, and fail if it does. - */ - if ((new_addr <= addr) && (new_addr+new_len) > addr) - goto out; - - if ((addr <= new_addr) && (addr+old_len) > new_addr) - goto out; - - ret = do_munmap(current->mm, new_addr, new_len); - if (ret && new_len) - goto out; - } - - /* - * Always allow a shrinking remap: that just unmaps - * the unnecessary pages.. - */ - if (old_len >= new_len) { - ret = do_munmap(current->mm, addr+new_len, old_len - new_len); - if (ret && old_len != new_len) - goto out; - ret = addr; - if (!(flags & MREMAP_FIXED) || (new_addr == addr)) - goto out; - } - - /* - * Ok, we need to grow.. or relocate. - */ - ret = -EFAULT; - vma = find_vma(current->mm, addr); - if (!vma || vma->vm_start > addr) - goto out; - /* We can't remap across vm area boundaries */ - if (old_len > vma->vm_end - addr) - goto out; - if (vma->vm_flags & VM_DONTEXPAND) { - if (new_len > old_len) - goto out; - } - if (vma->vm_flags & VM_LOCKED) { - unsigned long locked = current->mm->locked_vm << PAGE_SHIFT; - locked += new_len - old_len; - ret = -EAGAIN; - if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) - goto out; - } - ret = -ENOMEM; - if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len) - > current->rlim[RLIMIT_AS].rlim_cur) - goto out; - /* Private writable mapping? Check memory availability.. */ - if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE && - !(flags & MAP_NORESERVE) && - !vm_enough_memory((new_len - old_len) >> PAGE_SHIFT)) - goto out; - -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) - /* mremap() unsupported for I/O mappings in Xenolinux. */ - ret = -EINVAL; - if (vma->vm_flags & VM_IO) - goto out; -#endif - - /* old_len exactly to the end of the area.. - * And we're not relocating the area. - */ - if (old_len == vma->vm_end - addr && - !((flags & MREMAP_FIXED) && (addr != new_addr)) && - (old_len != new_len || !(flags & MREMAP_MAYMOVE))) { - unsigned long max_addr = TASK_SIZE; - if (vma->vm_next) - max_addr = vma->vm_next->vm_start; - /* can we just expand the current mapping? */ - if (max_addr - addr >= new_len) { - int pages = (new_len - old_len) >> PAGE_SHIFT; - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_end = addr + new_len; - spin_unlock(&vma->vm_mm->page_table_lock); - current->mm->total_vm += pages; - if (vma->vm_flags & VM_LOCKED) { - current->mm->locked_vm += pages; - make_pages_present(addr + old_len, - addr + new_len); - } - ret = addr; - goto out; - } - } - - /* - * We weren't able to just expand or shrink the area, - * we need to create a new one and move it.. - */ - ret = -ENOMEM; - if (flags & MREMAP_MAYMOVE) { - if (!(flags & MREMAP_FIXED)) { - unsigned long map_flags = 0; - if (vma->vm_flags & VM_SHARED) - map_flags |= MAP_SHARED; - - new_addr = get_unmapped_area(vma->vm_file, 0, new_len, vma->vm_pgoff, map_flags); - ret = new_addr; - if (new_addr & ~PAGE_MASK) - goto out; - } - ret = move_vma(vma, addr, old_len, new_len, new_addr); - } -out: - return ret; -} - -asmlinkage unsigned long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr) -{ - unsigned long ret; - - down_write(&current->mm->mmap_sem); - ret = do_mremap(addr, old_len, new_len, flags, new_addr); - up_write(&current->mm->mmap_sem); - return ret; -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/mm/page_alloc.c --- a/linux-2.4-xen-sparse/mm/page_alloc.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,972 +0,0 @@ -/* - * linux/mm/page_alloc.c - * - * Manages the free list, the system allocates free pages here. - * Note that kmalloc() lives in slab.c - * - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * Swap reorganised 29.12.95, Stephen Tweedie - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999 - * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 - * Zone balancing, Kanoj Sarcar, SGI, Jan 2000 - */ - -#include <linux/config.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/swapctl.h> -#include <linux/interrupt.h> -#include <linux/pagemap.h> -#include <linux/bootmem.h> -#include <linux/slab.h> -#include <linux/module.h> - -int nr_swap_pages; -int nr_active_pages; -int nr_inactive_pages; -LIST_HEAD(inactive_list); -LIST_HEAD(active_list); -pg_data_t *pgdat_list; - -/* - * - * The zone_table array is used to look up the address of the - * struct zone corresponding to a given zone number (ZONE_DMA, - * ZONE_NORMAL, or ZONE_HIGHMEM). - */ -zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES]; -EXPORT_SYMBOL(zone_table); - -static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; -static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, }; -static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, }; -static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, }; -static int lower_zone_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 }; - -int vm_gfp_debug = 0; - -static void FASTCALL(__free_pages_ok (struct page *page, unsigned int order)); - -static spinlock_t free_pages_ok_no_irq_lock = SPIN_LOCK_UNLOCKED; -struct page * free_pages_ok_no_irq_head; - -static void do_free_pages_ok_no_irq(void * arg) -{ - struct page * page, * __page; - - spin_lock_irq(&free_pages_ok_no_irq_lock); - - page = free_pages_ok_no_irq_head; - free_pages_ok_no_irq_head = NULL; - - spin_unlock_irq(&free_pages_ok_no_irq_lock); - - while (page) { - __page = page; - page = page->next_hash; - __free_pages_ok(__page, __page->index); - } -} - -static struct tq_struct free_pages_ok_no_irq_task = { - .routine = do_free_pages_ok_no_irq, -}; - - -/* - * Temporary debugging check. - */ -#define BAD_RANGE(zone, page) \ -( \ - (((page) - mem_map) >= ((zone)->zone_start_mapnr+(zone)->size)) \ - || (((page) - mem_map) < (zone)->zone_start_mapnr) \ - || ((zone) != page_zone(page)) \ -) - -/* - * Freeing function for a buddy system allocator. - * Contrary to prior comments, this is *NOT* hairy, and there - * is no reason for anyone not to understand it. - * - * The concept of a buddy system is to maintain direct-mapped tables - * (containing bit values) for memory blocks of various "orders". - * The bottom level table contains the map for the smallest allocatable - * units of memory (here, pages), and each level above it describes - * pairs of units from the levels below, hence, "buddies". - * At a high level, all that happens here is marking the table entry - * at the bottom level available, and propagating the changes upward - * as necessary, plus some accounting needed to play nicely with other - * parts of the VM system. - * At each level, we keep one bit for each pair of blocks, which - * is set to 1 iff only one of the pair is allocated. So when we - * are allocating or freeing one, we can derive the state of the - * other. That is, if we allocate a small block, and both were - * free, the remainder of the region must be split into blocks. - * If a block is freed, and its buddy is also free, then this - * triggers coalescing into a block of larger size. - * - * -- wli - */ - -static void fastcall __free_pages_ok (struct page *page, unsigned int order) -{ - unsigned long index, page_idx, mask, flags; - free_area_t *area; - struct page *base; - zone_t *zone; - - if (PageForeign(page)) - return (PageForeignDestructor(page))(page); - - /* - * Yes, think what happens when other parts of the kernel take - * a reference to a page in order to pin it for io. -ben - */ - if (PageLRU(page)) { - if (unlikely(in_interrupt())) { - unsigned long flags; - - spin_lock_irqsave(&free_pages_ok_no_irq_lock, flags); - page->next_hash = free_pages_ok_no_irq_head; - free_pages_ok_no_irq_head = page; - page->index = order; - - spin_unlock_irqrestore(&free_pages_ok_no_irq_lock, flags); - - schedule_task(&free_pages_ok_no_irq_task); - return; - } - - lru_cache_del(page); - } - - if (page->buffers) - BUG(); - if (page->mapping) - BUG(); - if (!VALID_PAGE(page)) - BUG(); - if (PageLocked(page)) - BUG(); - if (PageActive(page)) - BUG(); - ClearPageReferenced(page); - ClearPageDirty(page); - - if (current->flags & PF_FREE_PAGES) - goto local_freelist; - back_local_freelist: - - zone = page_zone(page); - - mask = (~0UL) << order; - base = zone->zone_mem_map; - page_idx = page - base; - if (page_idx & ~mask) - BUG(); - index = page_idx >> (1 + order); - - area = zone->free_area + order; - - spin_lock_irqsave(&zone->lock, flags); - - zone->free_pages -= mask; - - while (mask + (1 << (MAX_ORDER-1))) { - struct page *buddy1, *buddy2; - - if (area >= zone->free_area + MAX_ORDER) - BUG(); - if (!__test_and_change_bit(index, area->map)) - /* - * the buddy page is still allocated. - */ - break; - /* - * Move the buddy up one level. - * This code is taking advantage of the identity: - * -mask = 1+~mask - */ - buddy1 = base + (page_idx ^ -mask); - buddy2 = base + page_idx; - if (BAD_RANGE(zone,buddy1)) - BUG(); - if (BAD_RANGE(zone,buddy2)) - BUG(); - - list_del(&buddy1->list); - mask <<= 1; - area++; - index >>= 1; - page_idx &= mask; - } - list_add(&(base + page_idx)->list, &area->free_list); - - spin_unlock_irqrestore(&zone->lock, flags); - return; - - local_freelist: - if (current->nr_local_pages) - goto back_local_freelist; - if (in_interrupt()) - goto back_local_freelist; - - list_add(&page->list, &current->local_pages); - page->index = order; - current->nr_local_pages++; -} - -#define MARK_USED(index, order, area) \ - __change_bit((index) >> (1+(order)), (area)->map) - -static inline struct page * expand (zone_t *zone, struct page *page, - unsigned long index, int low, int high, free_area_t * area) -{ - unsigned long size = 1 << high; - - while (high > low) { - if (BAD_RANGE(zone,page)) - BUG(); - area--; - high--; - size >>= 1; - list_add(&(page)->list, &(area)->free_list); - MARK_USED(index, high, area); - index += size; - page += size; - } - if (BAD_RANGE(zone,page)) - BUG(); - return page; -} - -static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order)); -static struct page * fastcall rmqueue(zone_t *zone, unsigned int order) -{ - free_area_t * area = zone->free_area + order; - unsigned int curr_order = order; - struct list_head *head, *curr; - unsigned long flags; - struct page *page; - - spin_lock_irqsave(&zone->lock, flags); - do { - head = &area->free_list; - curr = head->next; - - if (curr != head) { - unsigned int index; - - page = list_entry(curr, struct page, list); - if (BAD_RANGE(zone,page)) - BUG(); - list_del(curr); - index = page - zone->zone_mem_map; - if (curr_order != MAX_ORDER-1) - MARK_USED(index, curr_order, area); - zone->free_pages -= 1UL << order; - - page = expand(zone, page, index, order, curr_order, area); - spin_unlock_irqrestore(&zone->lock, flags); - - set_page_count(page, 1); - if (BAD_RANGE(zone,page)) - BUG(); - if (PageLRU(page)) - BUG(); - if (PageActive(page)) - BUG(); - return page; - } - curr_order++; - area++; - } while (curr_order < MAX_ORDER); - spin_unlock_irqrestore(&zone->lock, flags); - - return NULL; -} - -#ifndef CONFIG_DISCONTIGMEM -struct page * fastcall _alloc_pages(unsigned int gfp_mask, unsigned int order) -{ - return __alloc_pages(gfp_mask, order, - contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK)); -} -#endif - -static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *)); -static struct page * fastcall balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed) -{ - struct page * page = NULL; - int __freed; - - if (in_interrupt()) - BUG(); - - current->allocation_order = order; - current->flags |= PF_MEMALLOC | PF_FREE_PAGES; - - __freed = try_to_free_pages_zone(classzone, gfp_mask); - - current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES); - - if (current->nr_local_pages) { - struct list_head * entry, * local_pages; - struct page * tmp; - int nr_pages; - - local_pages = &current->local_pages; - - if (likely(__freed)) { - /* pick from the last inserted so we're lifo */ - entry = local_pages->next; - do { - tmp = list_entry(entry, struct page, list); - if (tmp->index == order && memclass(page_zone(tmp), classzone)) { - list_del(entry); - current->nr_local_pages--; - set_page_count(tmp, 1); - page = tmp; - - if (page->buffers) - BUG(); - if (page->mapping) - BUG(); - if (!VALID_PAGE(page)) - BUG(); - if (PageLocked(page)) - BUG(); - if (PageLRU(page)) - BUG(); - if (PageActive(page)) - BUG(); - if (PageDirty(page)) - BUG(); - - break; - } - } while ((entry = entry->next) != local_pages); - } - - nr_pages = current->nr_local_pages; - /* free in reverse order so that the global order will be lifo */ - while ((entry = local_pages->prev) != local_pages) { - list_del(entry); - tmp = list_entry(entry, struct page, list); - __free_pages_ok(tmp, tmp->index); - if (!nr_pages--) - BUG(); - } - current->nr_local_pages = 0; - } - - *freed = __freed; - return page; -} - -static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order) -{ - long free = zone->free_pages - (1UL << order); - return free >= 0 ? free : 0; -} - -/* - * This is the 'heart' of the zoned buddy allocator: - */ -struct page * fastcall __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist) -{ - zone_t **zone, * classzone; - struct page * page; - int freed, class_idx; - - zone = zonelist->zones; - classzone = *zone; - class_idx = zone_idx(classzone); - - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - - if (zone_free_pages(z, order) > z->watermarks[class_idx].low) { - page = rmqueue(z, order); - if (page) - return page; - } - } - - classzone->need_balance = 1; - mb(); - if (waitqueue_active(&kswapd_wait)) - wake_up_interruptible(&kswapd_wait); - - zone = zonelist->zones; - for (;;) { - unsigned long min; - zone_t *z = *(zone++); - if (!z) - break; - - min = z->watermarks[class_idx].min; - if (!(gfp_mask & __GFP_WAIT)) - min >>= 2; - if (zone_free_pages(z, order) > min) { - page = rmqueue(z, order); - if (page) - return page; - } - } - - /* here we're in the low on memory slow path */ - - if ((current->flags & PF_MEMALLOC) && - (!in_interrupt() || (current->flags & PF_MEMDIE))) { - zone = zonelist->zones; - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - - page = rmqueue(z, order); - if (page) - return page; - } - return NULL; - } - - /* Atomic allocations - we can't balance anything */ - if (!(gfp_mask & __GFP_WAIT)) - goto out; - - rebalance: - page = balance_classzone(classzone, gfp_mask, order, &freed); - if (page) - return page; - - zone = zonelist->zones; - if (likely(freed)) { - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - - if (zone_free_pages(z, order) > z->watermarks[class_idx].min) { - page = rmqueue(z, order); - if (page) - return page; - } - } - goto rebalance; - } else { - /* - * Check that no other task is been killed meanwhile, - * in such a case we can succeed the allocation. - */ - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - - if (zone_free_pages(z, order) > z->watermarks[class_idx].high) { - page = rmqueue(z, order); - if (page) - return page; - } - } - } - - out: - printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n", - order, gfp_mask, !!(current->flags & PF_MEMALLOC)); - if (unlikely(vm_gfp_debug)) - dump_stack(); - return NULL; -} - -/* - * Common helper functions. - */ -fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order) -{ - struct page * page; - - page = alloc_pages(gfp_mask, order); - if (!page) - return 0; - return (unsigned long) page_address(page); -} - -fastcall unsigned long get_zeroed_page(unsigned int gfp_mask) -{ - struct page * page; - - page = alloc_pages(gfp_mask, 0); - if (page) { - void *address = page_address(page); - clear_page(address); - return (unsigned long) address; - } - return 0; -} - -fastcall void __free_pages(struct page *page, unsigned int order) -{ - if (!PageReserved(page) && put_page_testzero(page)) - __free_pages_ok(page, order); -} - -fastcall void free_pages(unsigned long addr, unsigned int order) -{ - if (addr != 0) - __free_pages(virt_to_page(addr), order); -} - -/* - * Total amount of free (allocatable) RAM: - */ -unsigned int nr_free_pages (void) -{ - unsigned int sum = 0; - zone_t *zone; - - for_each_zone(zone) - sum += zone->free_pages; - - return sum; -} - -/* - * Amount of free RAM allocatable as buffer memory: - */ -unsigned int nr_free_buffer_pages (void) -{ - pg_data_t *pgdat; - unsigned int sum = 0; - zonelist_t *zonelist; - zone_t **zonep, *zone; - - for_each_pgdat(pgdat) { - int class_idx; - zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); - zonep = zonelist->zones; - zone = *zonep; - class_idx = zone_idx(zone); - - sum += zone->nr_cache_pages; - for (; zone; zone = *zonep++) { - int free = zone->free_pages - zone->watermarks[class_idx].high; - if (free <= 0) - continue; - sum += free; - } - } - - return sum; -} - -#if CONFIG_HIGHMEM -unsigned int nr_free_highpages (void) -{ - pg_data_t *pgdat; - unsigned int pages = 0; - - for_each_pgdat(pgdat) - pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; - - return pages; -} - -unsigned int freeable_lowmem(void) -{ - unsigned int pages = 0; - pg_data_t *pgdat; - - for_each_pgdat(pgdat) { - pages += pgdat->node_zones[ZONE_DMA].free_pages; - pages += pgdat->node_zones[ZONE_DMA].nr_active_pages; - pages += pgdat->node_zones[ZONE_DMA].nr_inactive_pages; - pages += pgdat->node_zones[ZONE_NORMAL].free_pages; - pages += pgdat->node_zones[ZONE_NORMAL].nr_active_pages; - pages += pgdat->node_zones[ZONE_NORMAL].nr_inactive_pages; - } - - return pages; -} -#endif - -#define K(x) ((x) << (PAGE_SHIFT-10)) - -/* - * Show free area list (used inside shift_scroll-lock stuff) - * We also calculate the percentage fragmentation. We do this by counting the - * memory on each free list with the exception of the first item on the list. - */ -void show_free_areas_core(pg_data_t *pgdat) -{ - unsigned int order; - unsigned type; - pg_data_t *tmpdat = pgdat; - - printk("Free pages: %6dkB (%6dkB HighMem)\n", - K(nr_free_pages()), - K(nr_free_highpages())); - - while (tmpdat) { - zone_t *zone; - for (zone = tmpdat->node_zones; - zone < tmpdat->node_zones + MAX_NR_ZONES; zone++) - printk("Zone:%s freepages:%6lukB\n", - zone->name, - K(zone->free_pages)); - - tmpdat = tmpdat->node_next; - } - - printk("( Active: %d, inactive: %d, free: %d )\n", - nr_active_pages, - nr_inactive_pages, - nr_free_pages()); - - for (type = 0; type < MAX_NR_ZONES; type++) { - struct list_head *head, *curr; - zone_t *zone = pgdat->node_zones + type; - unsigned long nr, total, flags; - - total = 0; - if (zone->size) { - spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order < MAX_ORDER; order++) { - head = &(zone->free_area + order)->free_list; - curr = head; - nr = 0; - for (;;) { - if ((curr = curr->next) == head) - break; - nr++; - } - total += nr * (1 << order); - printk("%lu*%lukB ", nr, K(1UL) << order); - } - spin_unlock_irqrestore(&zone->lock, flags); - } - printk("= %lukB)\n", K(total)); - } - -#ifdef SWAP_CACHE_INFO - show_swap_cache_info(); -#endif -} - -void show_free_areas(void) -{ - show_free_areas_core(pgdat_list); -} - -/* - * Builds allocation fallback zone lists. - */ -static inline void build_zonelists(pg_data_t *pgdat) -{ - int i, j, k; - - for (i = 0; i <= GFP_ZONEMASK; i++) { - zonelist_t *zonelist; - zone_t *zone; - - zonelist = pgdat->node_zonelists + i; - memset(zonelist, 0, sizeof(*zonelist)); - - j = 0; - k = ZONE_NORMAL; - if (i & __GFP_HIGHMEM) - k = ZONE_HIGHMEM; - if (i & __GFP_DMA) - k = ZONE_DMA; - - switch (k) { - default: - BUG(); - /* - * fallthrough: - */ - case ZONE_HIGHMEM: - zone = pgdat->node_zones + ZONE_HIGHMEM; - if (zone->size) { -#ifndef CONFIG_HIGHMEM - BUG(); -#endif - zonelist->zones[j++] = zone; - } - case ZONE_NORMAL: - zone = pgdat->node_zones + ZONE_NORMAL; - if (zone->size) - zonelist->zones[j++] = zone; - case ZONE_DMA: - zone = pgdat->node_zones + ZONE_DMA; - if (zone->size) - zonelist->zones[j++] = zone; - } - zonelist->zones[j++] = NULL; - } -} - -/* - * Helper functions to size the waitqueue hash table. - * Essentially these want to choose hash table sizes sufficiently - * large so that collisions trying to wait on pages are rare. - * But in fact, the number of active page waitqueues on typical - * systems is ridiculously low, less than 200. So this is even - * conservative, even though it seems large. - * - * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to - * waitqueues, i.e. the size of the waitq table given the number of pages. - */ -#define PAGES_PER_WAITQUEUE 256 - -static inline unsigned long wait_table_size(unsigned long pages) -{ - unsigned long size = 1; - - pages /= PAGES_PER_WAITQUEUE; - - while (size < pages) - size <<= 1; - - /* - * Once we have dozens or even hundreds of threads sleeping - * on IO we've got bigger problems than wait queue collision. - * Limit the size of the wait table to a reasonable size. - */ - size = min(size, 4096UL); - - return size; -} - -/* - * This is an integer logarithm so that shifts can be used later - * to extract the more random high bits from the multiplicative - * hash function before the remainder is taken. - */ -static inline unsigned long wait_table_bits(unsigned long size) -{ - return ffz(~size); -} - -#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) - -/* - * Set up the zone data structures: - * - mark all pages reserved - * - mark all memory queues empty - * - clear the memory bitmaps - */ -void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, - unsigned long *zones_size, unsigned long zone_start_paddr, - unsigned long *zholes_size, struct page *lmem_map) -{ - unsigned long i, j; - unsigned long map_size; - unsigned long totalpages, offset, realtotalpages; - const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); - - if (zone_start_paddr & ~PAGE_MASK) - BUG(); - - totalpages = 0; - for (i = 0; i < MAX_NR_ZONES; i++) { - unsigned long size = zones_size[i]; - totalpages += size; - } - realtotalpages = totalpages; - if (zholes_size) - for (i = 0; i < MAX_NR_ZONES; i++) - realtotalpages -= zholes_size[i]; - - printk("On node %d totalpages: %lu\n", nid, realtotalpages); - - /* - * Some architectures (with lots of mem and discontinous memory - * maps) have to search for a good mem_map area: - * For discontigmem, the conceptual mem map array starts from - * PAGE_OFFSET, we need to align the actual array onto a mem map - * boundary, so that MAP_NR works. - */ - map_size = (totalpages + 1)*sizeof(struct page); - if (lmem_map == (struct page *)0) { - lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size); - lmem_map = (struct page *)(PAGE_OFFSET + - MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET)); - } - *gmap = pgdat->node_mem_map = lmem_map; - pgdat->node_size = totalpages; - pgdat->node_start_paddr = zone_start_paddr; - pgdat->node_start_mapnr = (lmem_map - mem_map); - pgdat->nr_zones = 0; - - offset = lmem_map - mem_map; - for (j = 0; j < MAX_NR_ZONES; j++) { - zone_t *zone = pgdat->node_zones + j; - unsigned long mask; - unsigned long size, realsize; - int idx; - - zone_table[nid * MAX_NR_ZONES + j] = zone; - realsize = size = zones_size[j]; - if (zholes_size) - realsize -= zholes_size[j]; - - printk("zone(%lu): %lu pages.\n", j, size); - zone->size = size; - zone->realsize = realsize; - zone->name = zone_names[j]; - zone->lock = SPIN_LOCK_UNLOCKED; - zone->zone_pgdat = pgdat; - zone->free_pages = 0; - zone->need_balance = 0; - zone->nr_active_pages = zone->nr_inactive_pages = 0; - - - if (!size) - continue; - - /* - * The per-page waitqueue mechanism uses hashed waitqueues - * per zone. - */ - zone->wait_table_size = wait_table_size(size); - zone->wait_table_shift = - BITS_PER_LONG - wait_table_bits(zone->wait_table_size); - zone->wait_table = (wait_queue_head_t *) - alloc_bootmem_node(pgdat, zone->wait_table_size - * sizeof(wait_queue_head_t)); - - for(i = 0; i < zone->wait_table_size; ++i) - init_waitqueue_head(zone->wait_table + i); - - pgdat->nr_zones = j+1; - - mask = (realsize / zone_balance_ratio[j]); - if (mask < zone_balance_min[j]) - mask = zone_balance_min[j]; - else if (mask > zone_balance_max[j]) - mask = zone_balance_max[j]; - zone->watermarks[j].min = mask; - zone->watermarks[j].low = mask*2; - zone->watermarks[j].high = mask*3; - /* now set the watermarks of the lower zones in the "j" classzone */ - for (idx = j-1; idx >= 0; idx--) { - zone_t * lower_zone = pgdat->node_zones + idx; - unsigned long lower_zone_reserve; - if (!lower_zone->size) - continue; - - mask = lower_zone->watermarks[idx].min; - lower_zone->watermarks[j].min = mask; - lower_zone->watermarks[j].low = mask*2; - lower_zone->watermarks[j].high = mask*3; - - /* now the brainer part */ - lower_zone_reserve = realsize / lower_zone_reserve_ratio[idx]; - lower_zone->watermarks[j].min += lower_zone_reserve; - lower_zone->watermarks[j].low += lower_zone_reserve; - lower_zone->watermarks[j].high += lower_zone_reserve; - - realsize += lower_zone->realsize; - } - - zone->zone_mem_map = mem_map + offset; - zone->zone_start_mapnr = offset; - zone->zone_start_paddr = zone_start_paddr; - - if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1)) - printk("BUG: wrong zone alignment, it will crash\n"); - - /* - * Initially all pages are reserved - free ones are freed - * up by free_all_bootmem() once the early boot process is - * done. Non-atomic initialization, single-pass. - */ - for (i = 0; i < size; i++) { - struct page *page = mem_map + offset + i; - set_page_zone(page, nid * MAX_NR_ZONES + j); - set_page_count(page, 0); - SetPageReserved(page); - INIT_LIST_HEAD(&page->list); - if (j != ZONE_HIGHMEM) - set_page_address(page, __va(zone_start_paddr)); - zone_start_paddr += PAGE_SIZE; - } - - offset += size; - for (i = 0; ; i++) { - unsigned long bitmap_size; - - INIT_LIST_HEAD(&zone->free_area[i].free_list); - if (i == MAX_ORDER-1) { - zone->free_area[i].map = NULL; - break; - } - - /* - * Page buddy system uses "index >> (i+1)", - * where "index" is at most "size-1". - * - * The extra "+3" is to round down to byte - * size (8 bits per byte assumption). Thus - * we get "(size-1) >> (i+4)" as the last byte - * we can access. - * - * The "+1" is because we want to round the - * byte allocation up rather than down. So - * we should have had a "+7" before we shifted - * down by three. Also, we have to add one as - * we actually _use_ the last bit (it's [0,n] - * inclusive, not [0,n[). - * - * So we actually had +7+1 before we shift - * down by 3. But (n+8) >> 3 == (n >> 3) + 1 - * (modulo overflows, which we do not have). - * - * Finally, we LONG_ALIGN because all bitmap - * operations are on longs. - */ - bitmap_size = (size-1) >> (i+4); - bitmap_size = LONG_ALIGN(bitmap_size+1); - zone->free_area[i].map = - (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size); - } - } - build_zonelists(pgdat); -} - -void __init free_area_init(unsigned long *zones_size) -{ - free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0); -} - -static int __init setup_mem_frac(char *str) -{ - int j = 0; - - while (get_option(&str, &zone_balance_ratio[j++]) == 2); - printk("setup_mem_frac: "); - for (j = 0; j < MAX_NR_ZONES; j++) printk("%d ", zone_balance_ratio[j]); - printk("\n"); - return 1; -} - -__setup("memfrac=", setup_mem_frac); - -static int __init setup_lower_zone_reserve(char *str) -{ - int j = 0; - - while (get_option(&str, &lower_zone_reserve_ratio[j++]) == 2); - printk("setup_lower_zone_reserve: "); - for (j = 0; j < MAX_NR_ZONES-1; j++) printk("%d ", lower_zone_reserve_ratio[j]); - printk("\n"); - return 1; -} - -__setup("lower_zone_reserve=", setup_lower_zone_reserve); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.4-xen-sparse/net/core/skbuff.c --- a/linux-2.4-xen-sparse/net/core/skbuff.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,1309 +0,0 @@ -/* - * Routines having to do with the 'struct sk_buff' memory handlers. - * - * Authors: Alan Cox <iiitac@xxxxxxxxxxxxxx> - * Florian La Roche <rzsfl@xxxxxxxxxxxx> - * - * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ - * - * Fixes: - * Alan Cox : Fixed the worst of the load balancer bugs. - * Dave Platt : Interrupt stacking fix. - * Richard Kooijman : Timestamp fixes. - * Alan Cox : Changed buffer format. - * Alan Cox : destructor hook for AF_UNIX etc. - * Linus Torvalds : Better skb_clone. - * Alan Cox : Added skb_copy. - * Alan Cox : Added all the changed routines Linus - * only put in the headers - * Ray VanTassle : Fixed --skb->lock in free - * Alan Cox : skb_copy copy arp field - * Andi Kleen : slabified it. - * - * NOTE: - * The __skb_ routines should be called with interrupts - * disabled, or you better be *real* sure that the operation is atomic - * with respect to whatever list is being frobbed (e.g. via lock_sock() - * or via disabling bottom half handlers, etc). - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * The functions in this file will not compile correctly with gcc 2.4.x - */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/slab.h> -#include <linux/netdevice.h> -#include <linux/string.h> -#include <linux/skbuff.h> -#include <linux/cache.h> -#include <linux/rtnetlink.h> -#include <linux/init.h> -#include <linux/highmem.h> - -#include <net/protocol.h> -#include <net/dst.h> -#include <net/sock.h> -#include <net/checksum.h> - -#include <asm/uaccess.h> -#include <asm/system.h> - -int sysctl_hot_list_len = 128; - -static kmem_cache_t *skbuff_head_cache; - -static union { - struct sk_buff_head list; - char pad[SMP_CACHE_BYTES]; -} skb_head_pool[NR_CPUS]; - -/* - * Keep out-of-line to prevent kernel bloat. - * __builtin_return_address is not used because it is not always - * reliable. - */ - -/** - * skb_over_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_put(). Not user callable. - */ - -void skb_over_panic(struct sk_buff *skb, int sz, void *here) -{ - printk("skput:over: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); - BUG(); -} - -/** - * skb_under_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_push(). Not user callable. - */ - - -void skb_under_panic(struct sk_buff *skb, int sz, void *here) -{ - printk("skput:under: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); - BUG(); -} - -static __inline__ struct sk_buff *skb_head_from_pool(void) -{ - struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; - - if (skb_queue_len(list)) { - struct sk_buff *skb; - unsigned long flags; - - local_irq_save(flags); - skb = __skb_dequeue(list); - local_irq_restore(flags); - return skb; - } - return NULL; -} - -static __inline__ void skb_head_to_pool(struct sk_buff *skb) -{ - struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; - - if (skb_queue_len(list) < sysctl_hot_list_len) { - unsigned long flags; - - local_irq_save(flags); - __skb_queue_head(list, skb); - local_irq_restore(flags); - - return; - } - kmem_cache_free(skbuff_head_cache, skb); -} - - -/* Allocate a new skbuff. We do this ourselves so we can fill in a few - * 'private' fields and also do memory statistics to find all the - * [BEEP] leaks. - * - */ - -/** - * alloc_skb - allocate a network buffer - * @size: size to allocate - * @gfp_mask: allocation mask - * - * Allocate a new &sk_buff. The returned buffer has no headroom and a - * tail room of size bytes. The object has a reference count of one. - * The return is the buffer. On a failure the return is %NULL. - * - * Buffers may only be allocated from interrupts using a @gfp_mask of - * %GFP_ATOMIC. - */ - -struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) -{ - struct sk_buff *skb; - u8 *data; - - if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { - static int count = 0; - if (++count < 5) { - printk(KERN_ERR "alloc_skb called nonatomically " - "from interrupt %p\n", NET_CALLER(size)); - BUG(); - } - gfp_mask &= ~__GFP_WAIT; - } - - /* Get the HEAD */ - skb = skb_head_from_pool(); - if (skb == NULL) { - skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); - if (skb == NULL) - goto nohead; - } - - /* Get the DATA. Size must match skb_add_mtu(). */ - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (data == NULL) - goto nodata; - - /* XXX: does not include slab overhead */ - skb->truesize = size + sizeof(struct sk_buff); - - /* Load the data pointers. */ - skb->head = data; - skb->data = data; - skb->tail = data; - skb->end = data + size; - - /* Set up other state */ - skb->len = 0; - skb->cloned = 0; - skb->data_len = 0; - - atomic_set(&skb->users, 1); - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - return skb; - -nodata: - skb_head_to_pool(skb); -nohead: - return NULL; -} - -/** - * alloc_skb_from_cache - allocate a network buffer - * @cp: kmem_cache from which to allocate the data area - * (object size must be big enough for @size bytes + skb overheads) - * @size: size to allocate - * @gfp_mask: allocation mask - * - * Allocate a new &sk_buff. The returned buffer has no headroom and a - * tail room of size bytes. The object has a reference count of one. - * The return is the buffer. On a failure the return is %NULL. - * - * Buffers may only be allocated from interrupts using a @gfp_mask of - * %GFP_ATOMIC. - */ - -struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, - unsigned int size, int gfp_mask) -{ - struct sk_buff *skb; - u8 *data; - - if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { - static int count = 0; - if (++count < 5) { - printk(KERN_ERR "alloc_skb called nonatomically " - "from interrupt %p\n", NET_CALLER(size)); - BUG(); - } - gfp_mask &= ~__GFP_WAIT; - } - - /* Get the HEAD */ - skb = skb_head_from_pool(); - if (skb == NULL) { - skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); - if (skb == NULL) - goto nohead; - } - - /* Get the DATA. */ - size = SKB_DATA_ALIGN(size); - data = kmem_cache_alloc(cp, gfp_mask); - if (data == NULL) - goto nodata; - - /* XXX: does not include slab overhead */ - skb->truesize = size + sizeof(struct sk_buff); - - /* Load the data pointers. */ - skb->head = data; - skb->data = data; - skb->tail = data; - skb->end = data + size; - - /* Set up other state */ - skb->len = 0; - skb->cloned = 0; - skb->data_len = 0; - - atomic_set(&skb->users, 1); - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - return skb; - -nodata: - skb_head_to_pool(skb); -nohead: - return NULL; -} - - -/* - * Slab constructor for a skb head. - */ -static inline void skb_headerinit(void *p, kmem_cache_t *cache, - unsigned long flags) -{ - struct sk_buff *skb = p; - - skb->next = NULL; - skb->prev = NULL; - skb->list = NULL; - skb->sk = NULL; - skb->stamp.tv_sec=0; /* No idea about time */ - skb->dev = NULL; - skb->real_dev = NULL; - skb->dst = NULL; - memset(skb->cb, 0, sizeof(skb->cb)); - skb->pkt_type = PACKET_HOST; /* Default type */ - skb->ip_summed = 0; - skb->priority = 0; - skb->security = 0; /* By default packets are insecure */ - skb->destructor = NULL; - -#ifdef CONFIG_NETFILTER - skb->nfmark = skb->nfcache = 0; - skb->nfct = NULL; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif -#endif -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#endif -} - -static void skb_drop_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - skb_shinfo(skb)->frag_list = NULL; - - do { - struct sk_buff *this = list; - list = list->next; - kfree_skb(this); - } while (list); -} - -static void skb_clone_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) - skb_get(list); -} - -static void skb_release_data(struct sk_buff *skb) -{ - if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); - } - - if (skb_shinfo(skb)->frag_list) - skb_drop_fraglist(skb); - - kfree(skb->head); - } -} - -/* - * Free an skbuff by memory without cleaning the state. - */ -void kfree_skbmem(struct sk_buff *skb) -{ - skb_release_data(skb); - skb_head_to_pool(skb); -} - -/** - * __kfree_skb - private function - * @skb: buffer - * - * Free an sk_buff. Release anything attached to the buffer. - * Clean the state. This is an internal helper function. Users should - * always call kfree_skb - */ - -void __kfree_skb(struct sk_buff *skb) -{ - if (skb->list) { - printk(KERN_WARNING "Warning: kfree_skb passed an skb still " - "on a list (from %p).\n", NET_CALLER(skb)); - BUG(); - } - - dst_release(skb->dst); - if(skb->destructor) { - if (in_irq()) { - printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", - NET_CALLER(skb)); - } - skb->destructor(skb); - } -#ifdef CONFIG_NETFILTER - nf_conntrack_put(skb->nfct); -#endif - skb_headerinit(skb, NULL, 0); /* clean state */ - kfree_skbmem(skb); -} - -/** - * skb_clone - duplicate an sk_buff - * @skb: buffer to clone - * @gfp_mask: allocation priority - * - * Duplicate an &sk_buff. The new one is not owned by a socket. Both - * copies share the same packet data but not structure. The new - * buffer has a reference count of 1. If the allocation fails the - * function returns %NULL otherwise the new buffer is returned. - * - * If this function is called from an interrupt gfp_mask() must be - * %GFP_ATOMIC. - */ - -struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) -{ - struct sk_buff *n; - - n = skb_head_from_pool(); - if (!n) { - n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - if (!n) - return NULL; - } - -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->list = NULL; - n->sk = NULL; - C(stamp); - C(dev); - C(real_dev); - C(h); - C(nh); - C(mac); - C(dst); - dst_clone(n->dst); - memcpy(n->cb, skb->cb, sizeof(skb->cb)); - C(len); - C(data_len); - C(csum); - n->cloned = 1; - C(pkt_type); - C(ip_summed); - C(priority); - atomic_set(&n->users, 1); - C(protocol); - C(security); - C(truesize); - C(head); - C(data); - C(tail); - C(end); - n->destructor = NULL; -#ifdef CONFIG_NETFILTER - C(nfmark); - C(nfcache); - C(nfct); -#ifdef CONFIG_NETFILTER_DEBUG - C(nf_debug); -#endif -#endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif -#ifdef CONFIG_NET_SCHED - C(tc_index); -#endif - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; -#ifdef CONFIG_NETFILTER - nf_conntrack_get(skb->nfct); -#endif - return n; -} - -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -{ - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; - - new->list=NULL; - new->sk=NULL; - new->dev=old->dev; - new->real_dev=old->real_dev; - new->priority=old->priority; - new->protocol=old->protocol; - new->dst=dst_clone(old->dst); - new->h.raw=old->h.raw+offset; - new->nh.raw=old->nh.raw+offset; - new->mac.raw=old->mac.raw+offset; - memcpy(new->cb, old->cb, sizeof(old->cb)); - atomic_set(&new->users, 1); - new->pkt_type=old->pkt_type; - new->stamp=old->stamp; - new->destructor = NULL; - new->security=old->security; -#ifdef CONFIG_NETFILTER - new->nfmark=old->nfmark; - new->nfcache=old->nfcache; - new->nfct=old->nfct; - nf_conntrack_get(new->nfct); -#ifdef CONFIG_NETFILTER_DEBUG - new->nf_debug=old->nf_debug; -#endif -#endif -#ifdef CONFIG_NET_SCHED - new->tc_index = old->tc_index; -#endif -} - -/** - * skb_copy - create private copy of an sk_buff - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data. This is used when the - * caller wishes to modify the data and needs a private copy of the - * data to alter. Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * As by-product this function converts non-linear &sk_buff to linear - * one, so that &sk_buff becomes completely private and caller is allowed - * to modify all the data of returned buffer. This means that this - * function is not recommended for use in circumstances when only - * header is going to be modified. Use pskb_copy() instead. - */ - -struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) -{ - struct sk_buff *n; - int headerlen = skb->data-skb->head; - - /* - * Allocate the copy buffer - */ - n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); - if(n==NULL) - return NULL; - - /* Set the data pointer */ - skb_reserve(n,headerlen); - /* Set the tail pointer and length */ - skb_put(n,skb->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; - - if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) - BUG(); - - copy_skb_header(n, skb); - - return n; -} - -/* Keep head the same: replace data */ -int skb_linearize(struct sk_buff *skb, int gfp_mask) -{ - unsigned int size; - u8 *data; - long offset; - int headerlen = skb->data - skb->head; - int expand = (skb->tail+skb->data_len) - skb->end; - - if (skb_shared(skb)) - BUG(); - - if (expand <= 0) - expand = 0; - - size = (skb->end - skb->head + expand); - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (data == NULL) - return -ENOMEM; - - /* Copy entire thing */ - if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) - BUG(); - - /* Offset between the two in bytes */ - offset = data - skb->head; - - /* Free old data. */ - skb_release_data(skb); - - skb->head = data; - skb->end = data + size; - - /* Set up new pointers */ - skb->h.raw += offset; - skb->nh.raw += offset; - skb->mac.raw += offset; - skb->tail += offset; - skb->data += offset; - - /* Set up shinfo */ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; - - skb->tail += skb->data_len; - skb->data_len = 0; - return 0; -} - - -/** - * pskb_copy - create copy of an sk_buff with private head. - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and part of its data, located - * in header. Fragmented data remain shared. This is used when - * the caller wishes to modify only header of &sk_buff and needs - * private copy of the header to alter. Returns %NULL on failure - * or the pointer to the buffer on success. - * The returned buffer has a reference count of 1. - */ - -struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) -{ - struct sk_buff *n; - - /* - * Allocate the copy buffer - */ - n=alloc_skb(skb->end - skb->head, gfp_mask); - if(n==NULL) - return NULL; - - /* Set the data pointer */ - skb_reserve(n,skb->data-skb->head); - /* Set the tail pointer and length */ - skb_put(n,skb_headlen(skb)); - /* Copy the bytes */ - memcpy(n->data, skb->data, n->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; - - n->data_len = skb->data_len; - n->len = skb->len; - - if (skb_shinfo(skb)->nr_frags) { - int i; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); - } - skb_shinfo(n)->nr_frags = i; - } - - if (skb_shinfo(skb)->frag_list) { - skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; - skb_clone_fraglist(n); - } - - copy_skb_header(n, skb); - - return n; -} - -/** - * pskb_expand_head - reallocate header of &sk_buff - * @skb: buffer to reallocate - * @nhead: room to add at head - * @ntail: room to add at tail - * @gfp_mask: allocation priority - * - * Expands (or creates identical copy, if &nhead and &ntail are zero) - * header of skb. &sk_buff itself is not changed. &sk_buff MUST have - * reference count of 1. Returns zero in the case of success or error, - * if expansion failed. In the last case, &sk_buff is not changed. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) -{ - int i; - u8 *data; - int size = nhead + (skb->end - skb->head) + ntail; - long off; - - if (skb_shared(skb)) - BUG(); - - size = SKB_DATA_ALIGN(size); - - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (data == NULL) - goto nodata; - - /* Copy only real data... and, alas, header. This should be - * optimized for the cases when header is void. */ - memcpy(data+nhead, skb->head, skb->tail-skb->head); - memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); - - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - - if (skb_shinfo(skb)->frag_list) - skb_clone_fraglist(skb); - - skb_release_data(skb); - - off = (data+nhead) - skb->head; - - skb->head = data; - skb->end = data+size; - - skb->data += off; - skb->tail += off; - skb->mac.raw += off; - skb->h.raw += off; - skb->nh.raw += off; - skb->cloned = 0; - atomic_set(&skb_shinfo(skb)->dataref, 1); - return 0; - -nodata: - return -ENOMEM; -} - -/* Make private copy of skb with writable head and some headroom */ - -struct sk_buff * -skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) -{ - struct sk_buff *skb2; - int delta = headroom - skb_headroom(skb); - - if (delta <= 0) - return pskb_copy(skb, GFP_ATOMIC); - - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL || - !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) - return skb2; - - kfree_skb(skb2); - return NULL; -} - - -/** - * skb_copy_expand - copy and expand sk_buff - * @skb: buffer to copy - * @newheadroom: new free bytes at head - * @newtailroom: new free bytes at tail - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data and while doing so - * allocate additional space. - * - * This is used when the caller wishes to modify the data and needs a - * private copy of the data to alter as well as more space for new fields. - * Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * You must pass %GFP_ATOMIC as the allocation priority if this function - * is called from an interrupt. - */ - - -struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, - int newtailroom, - int gfp_mask) -{ - struct sk_buff *n; - - /* - * Allocate the copy buffer - */ - - n=alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); - if(n==NULL) - return NULL; - - skb_reserve(n,newheadroom); - - /* Set the tail pointer and length */ - skb_put(n,skb->len); - - /* Copy the data only. */ - if (skb_copy_bits(skb, 0, n->data, skb->len)) - BUG(); - - copy_skb_header(n, skb); - return n; -} - -/** - * skb_pad - zero pad the tail of an skb - * @skb: buffer to pad - * @pad: space to pad - * - * Ensure that a buffer is followed by a padding area that is zero - * filled. Used by network drivers which may DMA or transfer data - * beyond the buffer end onto the wire. - * - * May return NULL in out of memory cases. - */ - -struct sk_buff *skb_pad(struct sk_buff *skb, int pad) -{ - struct sk_buff *nskb; - - /* If the skbuff is non linear tailroom is always zero.. */ - if(skb_tailroom(skb) >= pad) - { - memset(skb->data+skb->len, 0, pad); - return skb; - } - - nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); - kfree_skb(skb); - if(nskb) - memset(nskb->data+nskb->len, 0, pad); - return nskb; -} - -/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. - * If realloc==0 and trimming is impossible without change of data, - * it is BUG(). - */ - -int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) -{ - int offset = skb_headlen(skb); - int nfrags = skb_shinfo(skb)->nr_frags; - int i; - - for (i=0; i<nfrags; i++) { - int end = offset + skb_shinfo(skb)->frags[i].size; - if (end > len) { - if (skb_cloned(skb)) { - if (!realloc) - BUG(); - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; - } - if (len <= offset) { - put_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[i].size = len-offset; - } - } - offset = end; - } - - if (offset < len) { - skb->data_len -= skb->len - len; - skb->len = len; - } else { - if (len <= skb_headlen(skb)) { - skb->len = len; - skb->data_len = 0; - skb->tail = skb->data + len; - if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) - skb_drop_fraglist(skb); - } else { - skb->data_len -= skb->len - len; - skb->len = len; - } - } - - return 0; -} - -/** - * __pskb_pull_tail - advance tail of skb header - * @skb: buffer to reallocate - * @delta: number of bytes to advance tail - * - * The function makes a sense only on a fragmented &sk_buff, - * it expands header moving its tail forward and copying necessary - * data from fragmented part. - * - * &sk_buff MUST have reference count of 1. - * - * Returns %NULL (and &sk_buff does not change) if pull failed - * or value of new tail of skb in the case of success. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -/* Moves tail of skb head forward, copying data from fragmented part, - * when it is necessary. - * 1. It may fail due to malloc failure. - * 2. It may change skb pointers. - * - * It is pretty complicated. Luckily, it is called only in exceptional cases. - */ -unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) -{ - int i, k, eat; - - /* If skb has not enough free space at tail, get new one - * plus 128 bytes for future expansions. If we have enough - * room at tail, reallocate without expansion only if skb is cloned. - */ - eat = (skb->tail+delta) - skb->end; - - if (eat > 0 || skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) - return NULL; - } - - if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) - BUG(); - - /* Optimization: no fragments, no reasons to preestimate - * size of pulled pages. Superb. - */ - if (skb_shinfo(skb)->frag_list == NULL) - goto pull_pages; - - /* Estimate size of pulled pages. */ - eat = delta; - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size >= eat) - goto pull_pages; - eat -= skb_shinfo(skb)->frags[i].size; - } - - /* If we need update frag list, we are in troubles. - * Certainly, it possible to add an offset to skb data, - * but taking into account that pulling is expected to - * be very rare operation, it is worth to fight against - * further bloating skb head and crucify ourselves here instead. - * Pure masohism, indeed. 8)8) - */ - if (eat) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - struct sk_buff *clone = NULL; - struct sk_buff *insp = NULL; - - do { - if (list == NULL) - BUG(); - - if (list->len <= eat) { - /* Eaten as whole. */ - eat -= list->len; - list = list->next; - insp = list; - } else { - /* Eaten partially. */ - - if (skb_shared(list)) { - /* Sucks! We need to fork list. :-( */ - clone = skb_clone(list, GFP_ATOMIC); - if (clone == NULL) - return NULL; - insp = list->next; - list = clone; - } else { - /* This may be pulled without - * problems. */ - insp = list; - } - if (pskb_pull(list, eat) == NULL) { - if (clone) - kfree_skb(clone); - return NULL; - } - break; - } - } while (eat); - - /* Free pulled out fragments. */ - while ((list = skb_shinfo(skb)->frag_list) != insp) { - skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); - } - /* And insert new clone at head. */ - if (clone) { - clone->next = list; - skb_shinfo(skb)->frag_list = clone; - } - } - /* Success! Now we may commit changes to skb data. */ - -pull_pages: - eat = delta; - k = 0; - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); - eat -= skb_shinfo(skb)->frags[i].size; - } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; - if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; - eat = 0; - } - k++; - } - } - skb_shinfo(skb)->nr_frags = k; - - skb->tail += delta; - skb->data_len -= delta; - - return skb->tail; -} - -/* Copy some data bits from skb to kernel buffer. */ - -int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) -{ - int i, copy; - int start = skb->len - skb->data_len; - - if (offset > (int)skb->len-len) - goto fault; - - /* Copy header. */ - if ((copy = start-offset) > 0) { - if (copy > len) - copy = len; - memcpy(to, skb->data + offset, copy); - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end-offset) > 0) { - u8 *vaddr; - - if (copy > len) - copy = len; - - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); - memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ - offset-start, copy); - kunmap_skb_frag(vaddr); - - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + list->len; - if ((copy = end-offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_bits(list, offset-start, to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - } - if (len == 0) - return 0; - -fault: - return -EFAULT; -} - -/* Checksum skb data. */ - -unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) -{ - int i, copy; - int start = skb->len - skb->data_len; - int pos = 0; - - /* Checksum header. */ - if ((copy = start-offset) > 0) { - if (copy > len) - copy = len; - csum = csum_partial(skb->data+offset, copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos = copy; - } - - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end-offset) > 0) { - unsigned int csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial(vaddr + frag->page_offset + - offset-start, copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + list->len; - if ((copy = end-offset) > 0) { - unsigned int csum2; - if (copy > len) - copy = len; - csum2 = skb_checksum(list, offset-start, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - } - if (len == 0) - return csum; - - BUG(); - return csum; -} - -/* Both of above in one bottle. */ - -unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) -{ - int i, copy; - int start = skb->len - skb->data_len; - int pos = 0; - - /* Copy header. */ - if ((copy = start-offset) > 0) { - if (copy > len) - copy = len; - csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos = copy; - } - - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end-offset) > 0) { - unsigned int csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + - offset-start, to, copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { - unsigned int csum2; - int end; - - BUG_TRAP(start <= offset+len); - - end = start + list->len; - if ((copy = end-offset) > 0) { - if (copy > len) - copy = len; - csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - } - if (len == 0) - return csum; - - BUG(); - return csum; -} - -void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) -{ - unsigned int csum; - long csstart; - - if (skb->ip_summed == CHECKSUM_HW) - csstart = skb->h.raw - skb->data; - else - csstart = skb->len - skb->data_len; - - if (csstart > skb->len - skb->data_len) - BUG(); - - memcpy(to, skb->data, csstart); - - csum = 0; - if (csstart != skb->len) - csum = skb_copy_and_csum_bits(skb, csstart, to+csstart, - skb->len-csstart, 0); - - if (skb->ip_summed == CHECKSUM_HW) { - long csstuff = csstart + skb->csum; - - *((unsigned short *)(to + csstuff)) = csum_fold(csum); - } -} - -#if 0 -/* - * Tune the memory allocator for a new MTU size. - */ -void skb_add_mtu(int mtu) -{ - /* Must match allocation in alloc_skb */ - mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); - - kmem_add_cache_size(mtu); -} -#endif - -void __init skb_init(void) -{ - int i; - - skbuff_head_cache = kmem_cache_create("skbuff_head_cache", - sizeof(struct sk_buff), - 0, - SLAB_HWCACHE_ALIGN, - skb_headerinit, NULL); - if (!skbuff_head_cache) - panic("cannot create skbuff cache"); - - for (i=0; i<NR_CPUS; i++) - skb_queue_head_init(&skb_head_pool[i].list); -} diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.c --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,741 +0,0 @@ -/* - * Copyright (C) 2004 IBM Corporation - * - * Authors: - * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> - * Dave Safford <safford@xxxxxxxxxxxxxx> - * Reiner Sailer <sailer@xxxxxxxxxxxxxx> - * Kylene Hall <kjhall@xxxxxxxxxx> - * - * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> - * - * Device driver for TCG/TCPA TPM (trusted platform module). - * Specifications at www.trustedcomputinggroup.org - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. - * - * Note, the TPM chip is not interrupt driven (only polling) - * and can have very long timeouts (minutes!). Hence the unusual - * calls to schedule_timeout. - * - */ - -#include <linux/sched.h> -#include <linux/poll.h> -#include <linux/spinlock.h> -#include "tpm_nopci.h" - -enum { - TPM_MINOR = 224, /* officially assigned */ - TPM_BUFSIZE = 2048, - TPM_NUM_DEVICES = 256, - TPM_NUM_MASK_ENTRIES = TPM_NUM_DEVICES / (8 * sizeof(int)) -}; - - /* PCI configuration addresses */ -enum { - PCI_GEN_PMCON_1 = 0xA0, - PCI_GEN1_DEC = 0xE4, - PCI_LPC_EN = 0xE6, - PCI_GEN2_DEC = 0xEC -}; - -enum { - TPM_LOCK_REG = 0x0D, - TPM_INTERUPT_REG = 0x0A, - TPM_BASE_ADDR_LO = 0x08, - TPM_BASE_ADDR_HI = 0x09, - TPM_UNLOCK_VALUE = 0x55, - TPM_LOCK_VALUE = 0xAA, - TPM_DISABLE_INTERUPT_VALUE = 0x00 -}; - -static LIST_HEAD(tpm_chip_list); -static spinlock_t driver_lock = SPIN_LOCK_UNLOCKED; -static int dev_mask[32]; - -static void user_reader_timeout(unsigned long ptr) -{ - struct tpm_chip *chip = (struct tpm_chip *) ptr; - - down(&chip->buffer_mutex); - atomic_set(&chip->data_pending, 0); - memset(chip->data_buffer, 0, TPM_BUFSIZE); - up(&chip->buffer_mutex); -} - -void tpm_time_expired(unsigned long ptr) -{ - int *exp = (int *) ptr; - *exp = 1; -} - -EXPORT_SYMBOL_GPL(tpm_time_expired); - - -/* - * This function should be used by other kernel subsystems attempting to use the tpm through the tpm_transmit interface. - * A call to this function will return the chip structure corresponding to the TPM you are looking for that can then be sent with your command to tpm_transmit. - * Passing 0 as the argument corresponds to /dev/tpm0 and thus the first and probably primary TPM on the system. Passing 1 corresponds to /dev/tpm1 and the next TPM discovered. If a TPM with the given chip_num does not exist NULL will be returned. - */ -struct tpm_chip* tpm_chip_lookup(int chip_num) -{ - - struct tpm_chip *pos; - list_for_each_entry(pos, &tpm_chip_list, list) - if (pos->dev_num == chip_num || - chip_num == TPM_ANY_NUM) - return pos; - - return NULL; - -} - -/* - * Internal kernel interface to transmit TPM commands - */ -ssize_t tpm_transmit(struct tpm_chip * chip, const char *buf, - size_t bufsiz) -{ - ssize_t rc; - u32 count; - unsigned long stop; - - count = be32_to_cpu(*((__be32 *) (buf + 2))); - - if (count == 0) - return -ENODATA; - if (count > bufsiz) { - dev_err(chip->dev, - "invalid count value %x %x \n", count, bufsiz); - return -E2BIG; - } - - dev_dbg(chip->dev, "TPM Ordinal: %d\n", - be32_to_cpu(*((__be32 *) (buf + 6)))); - dev_dbg(chip->dev, "Chip Status: %x\n", - inb(chip->vendor->base + 1)); - - down(&chip->tpm_mutex); - - if ((rc = chip->vendor->send(chip, (u8 *) buf, count)) < 0) { - dev_err(chip->dev, - "tpm_transmit: tpm_send: error %d\n", rc); - goto out; - } - - stop = jiffies + 2 * 60 * HZ; - do { - u8 status = chip->vendor->status(chip); - if ((status & chip->vendor->req_complete_mask) == - chip->vendor->req_complete_val) { - goto out_recv; - } - - if ((status == chip->vendor->req_canceled)) { - dev_err(chip->dev, "Operation Canceled\n"); - rc = -ECANCELED; - goto out; - } - - msleep(TPM_TIMEOUT); /* CHECK */ - rmb(); - } - while (time_before(jiffies, stop)); - - - chip->vendor->cancel(chip); - dev_err(chip->dev, "Operation Timed out\n"); - rc = -ETIME; - goto out; - -out_recv: - rc = chip->vendor->recv(chip, (u8 *) buf, bufsiz); - if (rc < 0) - dev_err(chip->dev, - "tpm_transmit: tpm_recv: error %d\n", rc); - atomic_set(&chip->data_position, 0); - -out: - up(&chip->tpm_mutex); - return rc; -} - -EXPORT_SYMBOL_GPL(tpm_transmit); - -#define TPM_DIGEST_SIZE 20 -#define CAP_PCR_RESULT_SIZE 18 -static const u8 cap_pcr[] = { - 0, 193, /* TPM_TAG_RQU_COMMAND */ - 0, 0, 0, 22, /* length */ - 0, 0, 0, 101, /* TPM_ORD_GetCapability */ - 0, 0, 0, 5, - 0, 0, 0, 4, - 0, 0, 1, 1 -}; - -#define READ_PCR_RESULT_SIZE 30 -static const u8 pcrread[] = { - 0, 193, /* TPM_TAG_RQU_COMMAND */ - 0, 0, 0, 14, /* length */ - 0, 0, 0, 21, /* TPM_ORD_PcrRead */ - 0, 0, 0, 0 /* PCR index */ -}; - -ssize_t tpm_show_pcrs(struct device *dev, char *buf) -{ - u8 data[READ_PCR_RESULT_SIZE]; - ssize_t len; - int i, j, num_pcrs; - __be32 index; - char *str = buf; - - struct tpm_chip *chip = dev_get_drvdata(dev); - if (chip == NULL) - return -ENODEV; - - memcpy(data, cap_pcr, sizeof(cap_pcr)); - if ((len = tpm_transmit(chip, data, sizeof(data))) - < CAP_PCR_RESULT_SIZE) - return len; - - num_pcrs = be32_to_cpu(*((__be32 *) (data + 14))); - - for (i = 0; i < num_pcrs; i++) { - memcpy(data, pcrread, sizeof(pcrread)); - index = cpu_to_be32(i); - memcpy(data + 10, &index, 4); - if ((len = tpm_transmit(chip, data, sizeof(data))) - < READ_PCR_RESULT_SIZE) - return len; - str += sprintf(str, "PCR-%02d: ", i); - for (j = 0; j < TPM_DIGEST_SIZE; j++) - str += sprintf(str, "%02X ", *(data + 10 + j)); - str += sprintf(str, "\n"); - } - return str - buf; -} - -EXPORT_SYMBOL_GPL(tpm_show_pcrs); - -/* - * Return 0 on success. On error pass along error code. - * chip_id Upper 2 bytes equal ANY, HW_ONLY or SW_ONLY - * Lower 2 bytes equal tpm idx # or AN& - * res_buf must fit a TPM_PCR (20 bytes) or NULL if you don't care - */ -int tpm_pcr_read( u32 chip_id, int pcr_idx, u8* res_buf, int res_buf_size ) -{ - u8 data[READ_PCR_RESULT_SIZE]; - int rc; - __be32 index; - int chip_num = chip_id & TPM_CHIP_NUM_MASK; - struct tpm_chip* chip; - - if ( res_buf && res_buf_size < TPM_DIGEST_SIZE ) - return -ENOSPC; - if ( (chip = tpm_chip_lookup( chip_num /*, - chip_id >> TPM_CHIP_TYPE_SHIFT*/ ) ) == NULL ) { - printk("chip %d not found.\n",chip_num); - return -ENODEV; - } - memcpy(data, pcrread, sizeof(pcrread)); - index = cpu_to_be32(pcr_idx); - memcpy(data + 10, &index, 4); - if ((rc = tpm_transmit(chip, data, sizeof(data))) > 0 ) - rc = be32_to_cpu(*((u32*)(data+6))); - - if ( rc == 0 && res_buf ) - memcpy(res_buf, data+10, TPM_DIGEST_SIZE); - return rc; -} -EXPORT_SYMBOL_GPL(tpm_pcr_read); - -#define EXTEND_PCR_SIZE 34 -static const u8 pcrextend[] = { - 0, 193, /* TPM_TAG_RQU_COMMAND */ - 0, 0, 0, 34, /* length */ - 0, 0, 0, 20, /* TPM_ORD_Extend */ - 0, 0, 0, 0 /* PCR index */ -}; - -/* - * Return 0 on success. On error pass along error code. - * chip_id Upper 2 bytes equal ANY, HW_ONLY or SW_ONLY - * Lower 2 bytes equal tpm idx # or ANY - */ -int tpm_pcr_extend(u32 chip_id, int pcr_idx, const u8* hash) -{ - u8 data[EXTEND_PCR_SIZE]; - int rc; - __be32 index; - int chip_num = chip_id & TPM_CHIP_NUM_MASK; - struct tpm_chip* chip; - - if ( (chip = tpm_chip_lookup( chip_num /*, - chip_id >> TPM_CHIP_TYPE_SHIFT */)) == NULL ) - return -ENODEV; - - memcpy(data, pcrextend, sizeof(pcrextend)); - index = cpu_to_be32(pcr_idx); - memcpy(data + 10, &index, 4); - memcpy( data + 14, hash, TPM_DIGEST_SIZE ); - if ((rc = tpm_transmit(chip, data, sizeof(data))) > 0 ) - rc = be32_to_cpu(*((u32*)(data+6))); - return rc; -} -EXPORT_SYMBOL_GPL(tpm_pcr_extend); - - - -#define READ_PUBEK_RESULT_SIZE 314 -static const u8 readpubek[] = { - 0, 193, /* TPM_TAG_RQU_COMMAND */ - 0, 0, 0, 30, /* length */ - 0, 0, 0, 124, /* TPM_ORD_ReadPubek */ -}; - -ssize_t tpm_show_pubek(struct device *dev, char *buf) -{ - u8 *data; - ssize_t len; - int i, rc; - char *str = buf; - - struct tpm_chip *chip = dev_get_drvdata(dev); - if (chip == NULL) - return -ENODEV; - - data = kmalloc(READ_PUBEK_RESULT_SIZE, GFP_KERNEL); - if (!data) - return -ENOMEM; - - memcpy(data, readpubek, sizeof(readpubek)); - memset(data + sizeof(readpubek), 0, 20); /* zero nonce */ - - if ((len = tpm_transmit(chip, data, READ_PUBEK_RESULT_SIZE)) < - READ_PUBEK_RESULT_SIZE) { - rc = len; - goto out; - } - - /* - ignore header 10 bytes - algorithm 32 bits (1 == RSA ) - encscheme 16 bits - sigscheme 16 bits - parameters (RSA 12->bytes: keybit, #primes, expbit) - keylenbytes 32 bits - 256 byte modulus - ignore checksum 20 bytes - */ - - str += - sprintf(str, - "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n" - "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X" - " %02X %02X %02X %02X %02X %02X %02X %02X\n" - "Modulus length: %d\nModulus: \n", - data[10], data[11], data[12], data[13], data[14], - data[15], data[16], data[17], data[22], data[23], - data[24], data[25], data[26], data[27], data[28], - data[29], data[30], data[31], data[32], data[33], - be32_to_cpu(*((__be32 *) (data + 32)))); - - for (i = 0; i < 256; i++) { - str += sprintf(str, "%02X ", data[i + 39]); - if ((i + 1) % 16 == 0) - str += sprintf(str, "\n"); - } - rc = str - buf; -out: - kfree(data); - return rc; -} - -EXPORT_SYMBOL_GPL(tpm_show_pubek); - -#define CAP_VER_RESULT_SIZE 18 -static const u8 cap_version[] = { - 0, 193, /* TPM_TAG_RQU_COMMAND */ - 0, 0, 0, 18, /* length */ - 0, 0, 0, 101, /* TPM_ORD_GetCapability */ - 0, 0, 0, 6, - 0, 0, 0, 0 -}; - -#define CAP_MANUFACTURER_RESULT_SIZE 18 -static const u8 cap_manufacturer[] = { - 0, 193, /* TPM_TAG_RQU_COMMAND */ - 0, 0, 0, 22, /* length */ - 0, 0, 0, 101, /* TPM_ORD_GetCapability */ - 0, 0, 0, 5, - 0, 0, 0, 4, - 0, 0, 1, 3 -}; - -ssize_t tpm_show_caps(struct device *dev, char *buf) -{ - u8 data[sizeof(cap_manufacturer)]; - ssize_t len; - char *str = buf; - - struct tpm_chip *chip = dev_get_drvdata(dev); - if (chip == NULL) - return -ENODEV; - - memcpy(data, cap_manufacturer, sizeof(cap_manufacturer)); - - if ((len = tpm_transmit(chip, data, sizeof(data))) < - CAP_MANUFACTURER_RESULT_SIZE) - return len; - - str += sprintf(str, "Manufacturer: 0x%x\n", - be32_to_cpu(*((__be32 *)(data + 14)))); - - memcpy(data, cap_version, sizeof(cap_version)); - - if ((len = tpm_transmit(chip, data, sizeof(data))) < - CAP_VER_RESULT_SIZE) - return len; - - str += - sprintf(str, "TCG version: %d.%d\nFirmware version: %d.%d\n", - (int) data[14], (int) data[15], (int) data[16], - (int) data[17]); - - return str - buf; -} - -EXPORT_SYMBOL_GPL(tpm_show_caps); - -ssize_t tpm_store_cancel(struct device * dev, const char *buf, - size_t count) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - if (chip == NULL) - return 0; - - chip->vendor->cancel(chip); - return count; -} - -EXPORT_SYMBOL_GPL(tpm_store_cancel); - -/* - * Device file system interface to the TPM - */ -int tpm_open(struct inode *inode, struct file *file) -{ - int rc = 0, minor = iminor(inode); - struct tpm_chip *chip = NULL, *pos; - - spin_lock(&driver_lock); - - list_for_each_entry(pos, &tpm_chip_list, list) { - if (pos->vendor->miscdev.minor == minor) { - chip = pos; - break; - } - } - - if (chip == NULL) { - rc = -ENODEV; - goto err_out; - } - - if (chip->num_opens) { - dev_dbg(chip->dev, "Another process owns this TPM\n"); - rc = -EBUSY; - goto err_out; - } - - chip->num_opens++; - get_device(chip->dev); - - spin_unlock(&driver_lock); - - chip->data_buffer = kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL); - if (chip->data_buffer == NULL) { - chip->num_opens--; - put_device(chip->dev); - return -ENOMEM; - } - - atomic_set(&chip->data_pending, 0); - - file->private_data = chip; - return 0; - -err_out: - spin_unlock(&driver_lock); - return rc; -} - -EXPORT_SYMBOL_GPL(tpm_open); - -int tpm_release(struct inode *inode, struct file *file) -{ - struct tpm_chip *chip = file->private_data; - - spin_lock(&driver_lock); - file->private_data = NULL; - chip->num_opens--; - del_singleshot_timer_sync(&chip->user_read_timer); - atomic_set(&chip->data_pending, 0); - put_device(chip->dev); - kfree(chip->data_buffer); - spin_unlock(&driver_lock); - return 0; -} - -EXPORT_SYMBOL_GPL(tpm_release); - -ssize_t tpm_write(struct file * file, const char __user * buf, - size_t size, loff_t * off) -{ - struct tpm_chip *chip = file->private_data; - int in_size = size, out_size; - - /* cannot perform a write until the read has cleared - either via tpm_read or a user_read_timer timeout */ - while (atomic_read(&chip->data_pending) != 0) - msleep(TPM_TIMEOUT); - - down(&chip->buffer_mutex); - - if (in_size > TPM_BUFSIZE) - in_size = TPM_BUFSIZE; - - if (copy_from_user - (chip->data_buffer, (void __user *) buf, in_size)) { - up(&chip->buffer_mutex); - return -EFAULT; - } - - /* atomic tpm command send and result receive */ - out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE); - - atomic_set(&chip->data_pending, out_size); - up(&chip->buffer_mutex); - - /* Set a timeout by which the reader must come claim the result */ - mod_timer(&chip->user_read_timer, jiffies + (60 * HZ)); - - return in_size; -} - -EXPORT_SYMBOL_GPL(tpm_write); - -ssize_t tpm_read(struct file * file, char __user * buf, - size_t size, loff_t * off) -{ - struct tpm_chip *chip = file->private_data; - int ret_size; - - del_singleshot_timer_sync(&chip->user_read_timer); - ret_size = atomic_read(&chip->data_pending); - - if (ret_size > 0) { /* relay data */ - int position = atomic_read(&chip->data_position); - - if (size < ret_size) - ret_size = size; - - down(&chip->buffer_mutex); - - if (copy_to_user((void __user *) buf, - &chip->data_buffer[position], - ret_size)) { - ret_size = -EFAULT; - } else { - int pending = atomic_read(&chip->data_pending) - ret_size; - atomic_set(&chip->data_pending, - pending); - atomic_set(&chip->data_position, - position + ret_size); - } - up(&chip->buffer_mutex); - } - - return ret_size; -} - -EXPORT_SYMBOL_GPL(tpm_read); - -void tpm_remove_hardware(struct device *dev) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - int i; - - if (chip == NULL) { - dev_err(dev, "No device data found\n"); - return; - } - - spin_lock(&driver_lock); - - list_del(&chip->list); - - spin_unlock(&driver_lock); - - dev_set_drvdata(dev, NULL); - misc_deregister(&chip->vendor->miscdev); - - for (i = 0; i < TPM_NUM_ATTR; i++) - device_remove_file(dev, &chip->vendor->attr[i]); - - dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES] &= - !(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES)); - - kfree(chip); - - put_device(dev); -} - -EXPORT_SYMBOL_GPL(tpm_remove_hardware); - -static const u8 savestate[] = { - 0, 193, /* TPM_TAG_RQU_COMMAND */ - 0, 0, 0, 10, /* blob length (in bytes) */ - 0, 0, 0, 152 /* TPM_ORD_SaveState */ -}; - -/* - * We are about to suspend. Save the TPM state - * so that it can be restored. - */ -int tpm_pm_suspend(struct pci_dev *pci_dev, u32 pm_state) -{ - struct tpm_chip *chip = pci_get_drvdata(pci_dev); - if (chip == NULL) - return -ENODEV; - - tpm_transmit(chip, savestate, sizeof(savestate)); - return 0; -} - -EXPORT_SYMBOL_GPL(tpm_pm_suspend); - -/* - * Resume from a power safe. The BIOS already restored - * the TPM state. - */ -int tpm_pm_resume(struct pci_dev *pci_dev) -{ - struct tpm_chip *chip = pci_get_drvdata(pci_dev); - - if (chip == NULL) - return -ENODEV; - - return 0; -} - -EXPORT_SYMBOL_GPL(tpm_pm_resume); - -/* - * Called from tpm_<specific>.c probe function only for devices - * the driver has determined it should claim. Prior to calling - * this function the specific probe function has called pci_enable_device - * upon errant exit from this function specific probe function should call - * pci_disable_device - */ -int tpm_register_hardware_nopci(struct device *dev, - struct tpm_vendor_specific *entry) -{ - char devname[7]; - struct tpm_chip *chip; - int i, j; - - /* Driver specific per-device data */ - chip = kmalloc(sizeof(*chip), GFP_KERNEL); - if (chip == NULL) - return -ENOMEM; - - memset(chip, 0, sizeof(struct tpm_chip)); - - init_MUTEX(&chip->buffer_mutex); - init_MUTEX(&chip->tpm_mutex); - INIT_LIST_HEAD(&chip->list); - - init_timer(&chip->user_read_timer); - chip->user_read_timer.function = user_reader_timeout; - chip->user_read_timer.data = (unsigned long) chip; - - chip->vendor = entry; - - chip->dev_num = -1; - - for (i = 0; i < TPM_NUM_MASK_ENTRIES; i++) - for (j = 0; j < 8 * sizeof(int); j++) - if ((dev_mask[i] & (1 << j)) == 0) { - chip->dev_num = - i * TPM_NUM_MASK_ENTRIES + j; - dev_mask[i] |= 1 << j; - goto dev_num_search_complete; - } - -dev_num_search_complete: - if (chip->dev_num < 0) { - dev_err(dev, "No available tpm device numbers\n"); - kfree(chip); - return -ENODEV; - } else if (chip->dev_num == 0) - chip->vendor->miscdev.minor = TPM_MINOR; - else - chip->vendor->miscdev.minor = MISC_DYNAMIC_MINOR; - - snprintf(devname, sizeof(devname), "%s%d", "tpm", chip->dev_num); - chip->vendor->miscdev.name = devname; - - chip->vendor->miscdev.dev = dev; - chip->dev = get_device(dev); - - - if (misc_register(&chip->vendor->miscdev)) { - dev_err(chip->dev, - "unable to misc_register %s, minor %d\n", - chip->vendor->miscdev.name, - chip->vendor->miscdev.minor); - put_device(dev); - kfree(chip); - dev_mask[i] &= !(1 << j); - return -ENODEV; - } - - spin_lock(&driver_lock); - - dev_set_drvdata(dev, chip); - - list_add(&chip->list, &tpm_chip_list); - - spin_unlock(&driver_lock); - - for (i = 0; i < TPM_NUM_ATTR; i++) - device_create_file(dev, &chip->vendor->attr[i]); - - return 0; -} - -EXPORT_SYMBOL_GPL(tpm_register_hardware_nopci); - -static int __init init_tpm(void) -{ - return 0; -} - -static void __exit cleanup_tpm(void) -{ - -} - -module_init(init_tpm); -module_exit(cleanup_tpm); - -MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)"); -MODULE_DESCRIPTION("TPM Driver"); -MODULE_VERSION("2.0"); -MODULE_LICENSE("GPL"); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.h --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,127 +0,0 @@ -/* - * Copyright (C) 2004 IBM Corporation - * - * Authors: - * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> - * Dave Safford <safford@xxxxxxxxxxxxxx> - * Reiner Sailer <sailer@xxxxxxxxxxxxxx> - * Kylene Hall <kjhall@xxxxxxxxxx> - * - * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> - * - * Device driver for TCG/TCPA TPM (trusted platform module). - * Specifications at www.trustedcomputinggroup.org - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. - * - */ -#include <linux/module.h> -#include <linux/version.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/miscdevice.h> - -enum { - TPM_TIMEOUT = 5, /* msecs */ - TPM_NUM_ATTR = 4 -}; - -/* TPM addresses */ -enum { - TPM_ADDR = 0x4E, - TPM_DATA = 0x4F -}; - -/* - * Chip num is this value or a valid tpm idx in lower two bytes of chip_id - */ -enum tpm_chip_num { - TPM_ANY_NUM = 0xFFFF, -}; - -#define TPM_CHIP_NUM_MASK 0x0000ffff - -extern ssize_t tpm_show_pubek(struct device *, char *); -extern ssize_t tpm_show_pcrs(struct device *, char *); -extern ssize_t tpm_show_caps(struct device *, char *); -extern ssize_t tpm_store_cancel(struct device *, const char *, size_t); - -#define TPM_DEVICE_ATTRS { \ - __ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL), \ - __ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL), \ - __ATTR(caps, S_IRUGO, tpm_show_caps, NULL), \ - __ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel) } - -struct tpm_chip; - -struct tpm_vendor_specific { - u8 req_complete_mask; - u8 req_complete_val; - u8 req_canceled; - u16 base; /* TPM base address */ - - int (*recv) (struct tpm_chip *, u8 *, size_t); - int (*send) (struct tpm_chip *, u8 *, size_t); - void (*cancel) (struct tpm_chip *); - u8(*status) (struct tpm_chip *); - struct miscdevice miscdev; - struct device_attribute attr[TPM_NUM_ATTR]; -}; - -struct tpm_chip { - struct device *dev; /* PCI device stuff */ - - int dev_num; /* /dev/tpm# */ - int num_opens; /* only one allowed */ - int time_expired; - - /* Data passed to and from the tpm via the read/write calls */ - u8 *data_buffer; - atomic_t data_pending; - atomic_t data_position; - struct semaphore buffer_mutex; - - struct timer_list user_read_timer; /* user needs to claim result */ - struct semaphore tpm_mutex; /* tpm is processing */ - - struct tpm_vendor_specific *vendor; - - struct list_head list; -}; - -static inline int tpm_read_index(int index) -{ - outb(index, TPM_ADDR); - return inb(TPM_DATA) & 0xFF; -} - -static inline void tpm_write_index(int index, int value) -{ - outb(index, TPM_ADDR); - outb(value & 0xFF, TPM_DATA); -} - -extern void tpm_time_expired(unsigned long); -extern int tpm_lpc_bus_init(struct pci_dev *, u16); - -extern int tpm_register_hardware_nopci(struct device *, - struct tpm_vendor_specific *); -extern void tpm_remove_hardware(struct device *); -extern int tpm_open(struct inode *, struct file *); -extern int tpm_release(struct inode *, struct file *); -extern ssize_t tpm_write(struct file *, const char __user *, size_t, - loff_t *); -extern ssize_t tpm_read(struct file *, char __user *, size_t, loff_t *); -extern int tpm_pcr_extend(u32 chip_id, int pcr_idx, const u8* hash); -extern int tpm_pcr_read( u32 chip_id, int pcr_idx, u8* res_buf, int res_buf_size ); - -extern int tpm_pm_suspend(struct pci_dev *, u32); -extern int tpm_pm_resume(struct pci_dev *); - -/* internal kernel interface */ -extern ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, - size_t bufsiz); -extern struct tpm_chip *tpm_chip_lookup(int chip_num); diff -r 0ba10f7fef51 -r 4e0c94871be2 linux-2.6-xen-sparse/include/asm-generic/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-generic/pgtable.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,221 +0,0 @@ -#ifndef _ASM_GENERIC_PGTABLE_H -#define _ASM_GENERIC_PGTABLE_H - -#ifndef __HAVE_ARCH_PTEP_ESTABLISH -/* - * Establish a new mapping: - * - flush the old one - * - update the page tables - * - inform the TLB about the new one - * - * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock. - * - * Note: the old pte is known to not be writable, so we don't need to - * worry about dirty bits etc getting lost. - */ -#ifndef __HAVE_ARCH_SET_PTE_ATOMIC -#define ptep_establish(__vma, __address, __ptep, __entry) \ -do { \ - set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \ - flush_tlb_page(__vma, __address); \ -} while (0) -#else /* __HAVE_ARCH_SET_PTE_ATOMIC */ -#define ptep_establish(__vma, __address, __ptep, __entry) \ -do { \ - set_pte_atomic(__ptep, __entry); \ - flush_tlb_page(__vma, __address); \ -} while (0) -#endif /* __HAVE_ARCH_SET_PTE_ATOMIC */ -#endif - -#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -/* - * Largely same as above, but only sets the access flags (dirty, - * accessed, and writable). Furthermore, we know it always gets set - * to a "more permissive" setting, which allows most architectures - * to optimize this. - */ -#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ -do { \ - set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \ - flush_tlb_page(__vma, __address); \ -} while (0) -#endif - -#ifndef __HAVE_ARCH_PTEP_ESTABLISH_NEW -/* - * Establish a mapping where none previously existed - */ -#define ptep_establish_new(__vma, __address, __ptep, __entry) \ -do { \ - set_pte(__ptep, __entry); \ -} while (0) -#endif - -#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define ptep_test_and_clear_young(__vma, __address, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - int r = 1; \ - if (!pte_young(__pte)) \ - r = 0; \ - else \ - set_pte_at((__vma)->vm_mm, (__address), \ - (__ptep), pte_mkold(__pte)); \ - r; \ -}) -#endif - -#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -#define ptep_clear_flush_young(__vma, __address, __ptep) \ -({ \ - int __young; \ - __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ - if (__young) \ - flush_tlb_page(__vma, __address); \ - __young; \ -}) -#endif - -#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY -#define ptep_test_and_clear_dirty(__vma, __address, __ptep) \ -({ \ - pte_t __pte = *__ptep; \ - int r = 1; \ - if (!pte_dirty(__pte)) \ - r = 0; \ - else \ - set_pte_at((__vma)->vm_mm, (__address), (__ptep), \ - pte_mkclean(__pte)); \ - r; \ -}) -#endif - -#ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH -#define ptep_clear_flush_dirty(__vma, __address, __ptep) \ -({ \ - int __dirty; \ - __dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep); \ - if (__dirty) \ - flush_tlb_page(__vma, __address); \ - __dirty; \ -}) -#endif - -#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define ptep_get_and_clear(__mm, __address, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - pte_clear((__mm), (__address), (__ptep)); \ - __pte; \ -}) -#endif - -#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH -#define ptep_clear_flush(__vma, __address, __ptep) \ -({ \ - pte_t __pte; \ - __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \ - flush_tlb_page(__vma, __address); \ - __pte; \ -}) -#endif - -#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) -{ - pte_t old_pte = *ptep; - set_pte_at(mm, address, ptep, pte_wrprotect(old_pte)); -} -#endif - -#ifndef __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) (pte_val(A) == pte_val(B)) -#endif - -#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY -#define page_test_and_clear_dirty(page) (0) -#endif - -#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -#define page_test_and_clear_young(page) (0) -#endif - -#ifndef __HAVE_ARCH_PGD_OFFSET_GATE -#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) -#endif - -#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE -#define lazy_mmu_prot_update(pte) do { } while (0) -#endif - -/* - * When walking page tables, get the address of the next boundary, - * or the end address of the range if that comes earlier. Although no - * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. - */ - -#define pgd_addr_end(addr, end) \ -({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ - (__boundary - 1 < (end) - 1)? __boundary: (end); \ -}) - -#ifndef pud_addr_end -#define pud_addr_end(addr, end) \ -({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ - (__boundary - 1 < (end) - 1)? __boundary: (end); \ -}) -#endif - -#ifndef pmd_addr_end -#define pmd_addr_end(addr, end) \ -({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ - (__boundary - 1 < (end) - 1)? __boundary: (end); \ -}) -#endif - -#ifndef __ASSEMBLY__ -/* - * When walking page tables, we usually want to skip any p?d_none entries; - * and any p?d_bad entries - reporting the error before resetting to none. - * Do the tests inline, but report and clear the bad entry in mm/memory.c. - */ -void pgd_clear_bad(pgd_t *); -void pud_clear_bad(pud_t *); -void pmd_clear_bad(pmd_t *); - -static inline int pgd_none_or_clear_bad(pgd_t *pgd) -{ - if (pgd_none(*pgd)) - return 1; - if (unlikely(pgd_bad(*pgd))) { - pgd_clear_bad(pgd); - return 1; - } - return 0; -} - -static inline int pud_none_or_clear_bad(pud_t *pud) -{ - if (pud_none(*pud)) - return 1; - if (unlikely(pud_bad(*pud))) { - pud_clear_bad(pud); - return 1; - } - return 0; -} - -static inline int pmd_none_or_clear_bad(pmd_t *pmd) -{ - if (pmd_none(*pmd)) - return 1; - if (unlikely(pmd_bad(*pmd))) { - pmd_clear_bad(pmd); - return 1; - } - return 0; -} -#endif /* !__ASSEMBLY__ */ - -#endif /* _ASM_GENERIC_PGTABLE_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 patches/linux-2.6.12/tpm_partial_read.patch --- a/patches/linux-2.6.12/tpm_partial_read.patch Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,74 +0,0 @@ ---- ref-linux-2.6.12/drivers/char/tpm/tpm.c 2005-06-17 15:48:29.000000000 -0400 -+++ linux-2.6-xen-sparse/drivers/char/tpm/tpm.c 2005-09-15 14:56:05.000000000 -0400 -@@ -473,6 +401,7 @@ ssize_t tpm_write(struct file * file, co - out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE); - - atomic_set(&chip->data_pending, out_size); -+ atomic_set(&chip->data_position, 0); - up(&chip->buffer_mutex); - - /* Set a timeout by which the reader must come claim the result */ -@@ -494,29 +423,34 @@ ssize_t tpm_read(struct file * file, cha - { - struct tpm_chip *chip = file->private_data; - int ret_size = -ENODATA; -+ int pos, pending = 0; - -- if (atomic_read(&chip->data_pending) != 0) { /* Result available */ -+ down(&chip->buffer_mutex); -+ ret_size = atomic_read(&chip->data_pending); -+ if ( ret_size > 0 ) { /* Result available */ -+ if (size < ret_size) -+ ret_size = size; -+ -+ pos = atomic_read(&chip->data_position); -+ -+ if (copy_to_user((void __user *) buf, -+ &chip->data_buffer[pos], ret_size)) { -+ ret_size = -EFAULT; -+ } else { -+ pending = atomic_read(&chip->data_pending) - ret_size; -+ if ( pending ) { -+ atomic_set( &chip->data_pending, pending ); -+ atomic_set( &chip->data_position, pos+ret_size ); -+ } -+ } -+ } -+ up(&chip->buffer_mutex); -+ -+ if ( ret_size <= 0 || pending == 0 ) { -+ atomic_set( &chip->data_pending, 0 ); - down(&chip->timer_manipulation_mutex); - del_singleshot_timer_sync(&chip->user_read_timer); - up(&chip->timer_manipulation_mutex); -- -- down(&chip->buffer_mutex); -- -- ret_size = atomic_read(&chip->data_pending); -- atomic_set(&chip->data_pending, 0); -- -- if (ret_size == 0) /* timeout just occurred */ -- ret_size = -ETIME; -- else if (ret_size > 0) { /* relay data */ -- if (size < ret_size) -- ret_size = size; -- -- if (copy_to_user((void __user *) buf, -- chip->data_buffer, ret_size)) { -- ret_size = -EFAULT; -- } -- } -- up(&chip->buffer_mutex); - } - - return ret_size; ---- ref-linux-2.6.12/drivers/char/tpm/tpm.h 2005-06-17 15:48:29.000000000 -0400 -+++ linux-2.6-xen-sparse/drivers/char/tpm/tpm.h 2005-09-15 14:56:05.000000000 -0400 -@@ -54,6 +54,7 @@ struct tpm_chip { - /* Data passed to and from the tpm via the read/write calls */ - u8 *data_buffer; - atomic_t data_pending; -+ atomic_t data_position; - struct semaphore buffer_mutex; - - struct timer_list user_read_timer; /* user needs to claim result */ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/block-file --- a/tools/examples/block-file Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,31 +0,0 @@ -#!/bin/sh - -# Usage: block_loop [bind file|unbind node] -# -# The file argument to the bind command is the file we are to bind to a -# loop device. -# -# The node argument to unbind is the name of the device node we are to -# unbind. - -set -e - -case $1 in - bind) - for dev in /dev/loop*; do - if losetup $dev $2; then - major=$(stat -L -c %t "$dev") - minor=$(stat -L -c %T "$dev") - pdev=$(printf "0x%02x%02x" 0x$major 0x$minor) - xenstore-write "$XENBUS_PATH"/physical-device $pdev \ - "$XENBUS_PATH"/node $dev - exit 0 - fi - done - exit 1 - ;; - unbind) - losetup -d $2 - exit 0 - ;; -esac diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/examples/block-phy --- a/tools/examples/block-phy Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,30 +0,0 @@ -#! /bin/sh - -set -e - -expand_dev() { - local dev - case $1 in - /*) - dev=$1 - ;; - *) - dev=/dev/$1 - ;; - esac - echo -n $dev -} - -case $1 in - bind) - dev=$(expand_dev $2) - major=$(stat -L -c %t "$dev") - minor=$(stat -L -c %T "$dev") - pdev=$(printf "0x%02x%02x" 0x$major 0x$minor) - xenstore-write "$XENBUS_PATH"/physical-device $pdev \ - "$XENBUS_PATH"/node $dev - exit 0 - ;; - unbind) - ;; -esac diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/web/reactor.py --- a/tools/python/xen/web/reactor.py Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,19 +0,0 @@ -#============================================================================ -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#============================================================================ -# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx> -#============================================================================ - -from unix import listenUNIX, connectUNIX -from tcp import listenTCP, connectTCP, SetCloExec diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/EventServer.py --- a/tools/python/xen/xend/EventServer.py Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,265 +0,0 @@ -#============================================================================ -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#============================================================================ -# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> -#============================================================================ - -"""Simple publish/subscribe event server. - -""" -import string -from threading import Lock - -import scheduler - -# subscribe a.b.c h: map a.b.c -> h -# subscribe a.b.* h: map a.b.* -> h -# subscribe a.b.? h: map a.b.? -> h -# -# for event a.b.c.d: -# -# lookup a.b.c.d, call handlers -# -# lookup a.b.c.?, call handlers -# -# lookup a.b.c.d.*, call handlers -# lookup a.b.c.*, call handlers -# lookup a.b.*, call handlers -# lookup a.*, call handlers -# lookup *, call handlers - -# a.b.c.d = (a b c d) -# a.b.c.? = (a b c _) -# a.b.c.* = (a b c . _) - -class EventServer: - - DOT = '.' - QUERY = '?' - DOT_QUERY = DOT + QUERY - STAR = '*' - DOT_STAR = DOT + STAR - - def __init__(self, run=0): - self.handlers = {} - self.run = run - self.queue = [] - self.lock = Lock() - - def start(self): - """Enable event handling. Sends any queued events. - """ - try: - self.lock.acquire() - self.run = 1 - queue = self.queue - self.queue = [] - finally: - self.lock.release() - for (e,v) in queue: - self.inject(e, v) - - def stop(self): - """Suspend event handling. Events injected while suspended - are queued until we are started again. - """ - try: - self.lock.acquire() - self.run = 0 - finally: - self.lock.release() - - def subscribe(self, event, handler): - """Subscribe to an event. For example 'a.b.c.d'. - A subcription like 'a.b.c.?' ending in '?' matches any value - for the '?'. A subscription like 'a.b.c.*' ending in '*' matches - any event type with the same prefix, 'a.b.c' in this case. - - event event name - handler event handler fn(event, val) - """ - try: - self.lock.acquire() - hl = self.handlers.get(event) - if hl is None: - self.handlers[event] = [handler] - else: - hl.append(handler) - finally: - self.lock.release() - - def unsubscribe_all(self, event=None): - """Unsubscribe all handlers for a given event, or all handlers. - - event event (optional) - """ - try: - self.lock.acquire() - if event == None: - self.handlers.clear() - elif event in self.handlers: - del self.handlers[event] - finally: - self.lock.release() - - def unsubscribe(self, event, handler): - """Unsubscribe a given event and handler. - - event event - handler handler - """ - try: - self.lock.acquire() - hl = self.handlers.get(event) - if hl is None: - return - if handler in hl: - hl.remove(handler) - finally: - self.lock.release() - - def inject(self, event, val, async=1): - """Inject an event. Handlers for it are called if running, otherwise - it is queued. - - event event type - val event value - """ - try: - self.lock.acquire() - if not self.run: - self.queue.append( (event, val) ) - return - finally: - self.lock.release() - - if async: - scheduler.now(self.call_handlers, event, val) - else: - self.call_handlers(event, val) - - def call_handlers(self, event, val): - """Internal method to call event handlers. - """ - #print ">event", event, val - self.call_event_handlers(event, event, val) - self.call_query_handlers(event, val) - self.call_star_handlers(event, val) - - def call_event_handlers(self, key, event, val): - """Call the handlers for an event. - It is safe for handlers to subscribe or unsubscribe. - - key key for handler list - event event type - val event value - """ - try: - self.lock.acquire() - hl = self.handlers.get(key) - if hl is None: - return - # Copy the handler list so that handlers can call - # subscribe/unsubscribe safely - python list iteration - # is not safe against list modification. - hl = hl[:] - finally: - self.lock.release() - # Must not hold the lock while calling the handlers. - for h in hl: - try: - h(event, val) - except: - pass - - def call_query_handlers(self, event, val): - """Call regex handlers for events matching 'event' that end in '?'. - - event event type - val event value - """ - dot_idx = event.rfind(self.DOT) - if dot_idx == -1: - self.call_event_handlers(self.QUERY, event, val) - else: - event_query = event[0:dot_idx] + self.DOT_QUERY - self.call_event_handlers(event_query, event, val) - - def call_star_handlers(self, event, val): - """Call regex handlers for events matching 'event' that end in '*'. - - event event type - val event value - """ - etype = string.split(event, self.DOT) - for i in range(len(etype), 0, -1): - event_star = self.DOT.join(etype[0:i]) + self.DOT_STAR - self.call_event_handlers(event_star, event, val) - self.call_event_handlers(self.STAR, event, val) - -def instance(): - global inst - try: - inst - except: - inst = EventServer() - inst.start() - return inst - -def main(): - def sys_star(event, val): - print 'sys_star', event, val - - def sys_foo(event, val): - print 'sys_foo', event, val - s.unsubscribe('sys.foo', sys_foo) - - def sys_foo2(event, val): - print 'sys_foo2', event, val - - def sys_bar(event, val): - print 'sys_bar', event, val - - def sys_foo_bar(event, val): - print 'sys_foo_bar', event, val - - def foo_bar(event, val): - print 'foo_bar', event, val - - s = EventServer() - s.start() - s.subscribe('sys.*', sys_star) - s.subscribe('sys.foo', sys_foo) - s.subscribe('sys.foo', sys_foo2) - s.subscribe('sys.bar', sys_bar) - s.subscribe('sys.foo.bar', sys_foo_bar) - s.subscribe('foo.bar', foo_bar) - s.inject('sys.foo', 'hello') - print - s.inject('sys.bar', 'hello again') - print - s.inject('sys.foo.bar', 'hello again') - print - s.inject('foo.bar', 'hello again') - print - s.inject('foo', 'hello again') - print - s.start() - s.unsubscribe('sys.*', sys_star) - s.unsubscribe_all('sys.*') - s.inject('sys.foo', 'hello') - -if __name__ == "__main__": - main() - diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/server/channel.py --- a/tools/python/xen/xend/server/channel.py Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,92 +0,0 @@ -#============================================================================ -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#============================================================================ -# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> -#============================================================================ - -import threading -import select - -import xen.lowlevel.xc; xc = xen.lowlevel.xc.new() - -from xen.xend.XendLogging import log - -DEBUG = 0 - -RESPONSE_TIMEOUT = 20.0 - -class EventChannel(dict): - """An event channel between domains. - """ - - def interdomain(cls, dom1, dom2, port1=0, port2=0): - """Create an event channel between domains. - - @return EventChannel (None on error) - """ - v = xc.evtchn_bind_interdomain(dom1=dom1, dom2=dom2, - port1=port1, port2=port2) - if v: - v = cls(dom1, dom2, v) - return v - - interdomain = classmethod(interdomain) - - def __init__(self, dom1, dom2, d): - d['dom1'] = dom1 - d['dom2'] = dom2 - self.update(d) - self.dom1 = dom1 - self.dom2 = dom2 - self.port1 = d.get('port1') - self.port2 = d.get('port2') - - def close(self): - """Close the event channel. - """ - def evtchn_close(dom, port): - try: - xc.evtchn_close(dom=dom, port=port) - except Exception, ex: - pass - - if DEBUG: - print 'EventChannel>close>', self - evtchn_close(self.dom1, self.port1) - evtchn_close(self.dom2, self.port2) - - def sxpr(self): - return ['event-channel', - ['dom1', self.dom1 ], - ['port1', self.port1 ], - ['dom2', self.dom2 ], - ['port2', self.port2 ] - ] - - def __repr__(self): - return ("<EventChannel dom1:%d:%d dom2:%d:%d>" - % (self.dom1, self.port1, self.dom2, self.port2)) - -def eventChannel(dom1, dom2, port1=0, port2=0): - """Create an event channel between domains. - - @return EventChannel (None on error) - """ - return EventChannel.interdomain(dom1, dom2, port1=port1, port2=port2) - -def eventChannelClose(evtchn): - """Close an event channel. - """ - if not evtchn: return - evtchn.close() diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/xenstore/xsnode.py --- a/tools/python/xen/xend/xenstore/xsnode.py Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,477 +0,0 @@ -#============================================================================ -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#============================================================================ -# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx> -#============================================================================ -import errno -import os -import os.path -import select -import socket -import sys -import threading -import time - -from xen.lowlevel import xs -from xen.xend import sxp -from xen.xend.PrettyPrint import prettyprint - -SELECT_TIMEOUT = 2.0 - -def getEventPath(event): - if event and event.startswith("/"): - event = event[1:] - return os.path.join("/event", event) - -def getEventIdPath(event): - return os.path.join(getEventPath(event), "@eid") - -class Subscription: - - def __init__(self, path, fn, sid): - self.path = path - self.watcher = None - self.fn = fn - self.sid = sid - - def getPath(self): - return self.path - - def getSid(self): - return self.sid - - def watch(self, watcher): - self.watcher = watcher - watcher.addSubs(self) - - def unwatch(self): - watcher = self.watcher - if watcher: - self.watcher = None - watcher.delSubs(self) - return watcher - - def notify(self, token, path, val): - try: - self.fn(self, token, path, val) - except SystemExit: - raise - except Exception, ex: - pass - -class Watcher: - - def __init__(self, store, path): - self.path = path - store.mkdirs(self.path) - self.xs = None - self.subscriptions = [] - - def fileno(self): - if self.xs: - return self.xs.fileno() - else: - return -1 - - def getPath(self): - return self.path - - def getToken(self): - return self.path - - def addSubs(self, subs): - self.subscriptions.append(subs) - self.watch() - - def delSubs(self, subs): - self.subscriptions.remove(subs) - if len(self.subscriptions) == 0: - self.unwatch() - - def watch(self): - if self.xs: return - self.xs = xs.open() - self.xs.watch(path=self.getPath(), token=self.getToken()) - - def unwatch(self): - if self.xs: -## Possibly crashes xenstored. -## try: -## self.xs.unwatch(path=self.getPath(), token=self.getToken()) -## except Exception, ex: -## print 'Watcher>unwatch>', ex - try: - self.xs.close() - except Exception, ex: - pass - self.xs = None - - def watching(self): - return self.xs is not None - - def getNotification(self): - p = self.xs.read_watch() - self.xs.acknowledge_watch(p[1]) - return p - - def notify(self): - try: - (path, token) = self.getNotification() - if path.endswith("@eid"): - pass - else: - val = self.xs.read(path) - for subs in self.subscriptions: - subs.notify(token, path, val) - except SystemExit: - raise - except Exception, ex: - raise - -class EventWatcher(Watcher): - - def __init__(self, store, path, event): - Watcher.__init__(self, store, path) - self.event = event - self.eidPath = getEventIdPath(event) - if not store.exists(self.eidPath): - store.write(self.eidPath, str(0)) - - def getEvent(self): - return self.event - - def getToken(self): - return self.event - -class XenStore: - - xs = None - watchThread = None - subscription_id = 1 - - def __init__(self): - self.subscriptions = {} - self.watchers = {} - self.write("/", "") - - def getxs(self): - if self.xs is None: - ex = None - for i in range(0,20): - try: - self.xs = xs.open() - ex = None - break - except SystemExit: - raise - except Exception, ex: - print >>sys.stderr, "Exception connecting to xenstored:", ex - print >>sys.stderr, "Trying again..." - time.sleep(1) - else: - raise ex - - #todo would like to reconnect if xs conn closes (e.g. daemon restart). - return self.xs - - def dump(self, path="/", out=sys.stdout): - print 'dump>', path - val = ['node'] - val.append(['path', path]) -## perms = ['perms'] -## for p in self.getPerms(path): -## l = ['perm'] -## l.append('dom', p.get['dom']) -## for k in ['read', 'write', 'create', 'owner']: -## v = p.get(k) -## l.append([k, v]) -## perms.append(l) -## val.append(perms) - data = self.read(path) - if data: - val.append(['data', data]) - children = ['children'] - for x in self.lsPaths(path): - print 'dump>', 'child=', x - children.append(self.dump(x)) - if len(children) > 1: - val.append(children) - prettyprint(val, out=out) - return val - - def getPerms(self, path): - return self.getxs().get_permissions(path) - - def ls(self, path="/"): - return self.getxs().ls(path) - - def lsPaths(self, path="/"): - return [ os.path.join(path, x) for x in self.ls(path) ] - - def lsr(self, path="/", list=None): - if list is None: - list = [] - list.append(path) - for x in self.lsPaths(path): - list.append(x) - self.lsr(x, list=list) - return list - - def rm(self, path): - try: - #for x in self.lsPaths(): - # self.getxs().rm(x) - self.getxs().rm(path) - except: - pass - - def exists(self, path): - try: - self.getxs().ls(path) - return True - except RuntimeError, ex: - if ex.args[0] == errno.ENOENT: - return False - else: - raise RuntimeError(ex.args[0], - ex.args[1] + - (', in exists(%s)' % (str(path)))) - - def mkdirs(self, path): - if self.exists(path): - return - elts = path.split("/") - p = "/" - for x in elts: - if x == "": continue - p = os.path.join(p, x) - if not self.exists(p): - self.getxs().write(p, "") - - def read(self, path): - try: - return self.getxs().read(path) - except RuntimeError, ex: - if ex.args[0] == errno.EISDIR: - return None - else: - raise - - def create(self, path): - self.write(path, "") - - def write(self, path, data): - try: - self.getxs().write(path, data) - except RuntimeError, ex: - raise RuntimeError(ex.args[0], - ex.args[1] + - (', while writing %s : %s' % (str(path), - str(data)))) - - def begin(self): - self.getxs().transaction_start() - - def commit(self, abandon=False): - self.getxs().transaction_end(abort=abandon) - - def watch(self, path, fn): - watcher = self.watchers.get(path) - if not watcher: - watcher = self.addWatcher(Watcher(self, path)) - return self.addSubscription(watcher, fn) - - def unwatch(self, sid): - s = self.subscriptions.get(sid) - if not s: return - del self.subscriptions[s.sid] - watcher = s.unwatch() - if watcher and not watcher.watching(): - try: - del self.watchers[watcher.getPath()] - except: - pass - - def subscribe(self, event, fn): - path = getEventPath(event) - watcher = self.watchers.get(path) - if not watcher: - watcher = self.addWatcher(EventWatcher(self, path, event)) - return self.addSubscription(watcher, fn) - - unsubscribe = unwatch - - def sendEvent(self, event, data): - eventPath = getEventPath(event) - eidPath = getEventIdPath(event) - try: - #self.begin(eventPath) - self.mkdirs(eventPath) - eid = 1 - if self.exists(eidPath): - try: - eid = int(self.read(eidPath)) - eid += 1 - except Exception, ex: - pass - self.write(eidPath, str(eid)) - self.write(os.path.join(eventPath, str(eid)), data) - finally: - #self.commit() - pass - - def addWatcher(self, watcher): - self.watchers[watcher.getPath()] = watcher - self.watchStart() - return watcher - - def addSubscription(self, watcher, fn): - self.subscription_id += 1 - subs = Subscription(watcher.getPath(), fn, self.subscription_id) - self.subscriptions[subs.sid] = subs - subs.watch(watcher) - return subs.sid - - def watchStart(self): - if self.watchThread: return - self.watchThread = threading.Thread(name="Watcher", - target=self.watchMain) - self.watchThread.setDaemon(True) - self.watchThread.start() - - def watchMain(self): - try: - while True: - if self.watchThread is None: return - if not self.watchers: - return - rd = self.watchers.values() - try: - (srd, swr, ser) = select.select(rd, [], [], SELECT_TIMEOUT) - for watcher in srd: - watcher.notify() - except socket.error, ex: - if ex.args[0] in (EAGAIN, EINTR): - pass - else: - raise - finally: - self.watchThread = None - -def getXenStore(): - global xenstore - try: - return xenstore - except: - xenstore = XenStore() - return xenstore - -def sendEvent(event, val): - getXenStore.sendEvent(event, val) - -def subscribe(event, fn): - return getXenStore().subscribe(event, fn) - -def unsubscribe(sid): - getXenStore().unsubscribe(sid) - -class XenNode: - - def __init__(self, path="/", create=True): - self.store = getXenStore() - self.path = path - if not self.store.exists(path): - if create: - self.store.create(path) - else: - raise ValueError("path does not exist: '%s'" % path) - - def getStore(self): - return self.store - - def relPath(self, path=""): - if not path: - return self.path - if path and path.startswith("/"): - path = path[1:] - return os.path.join(self.path, path) - - def delete(self, path=""): - self.store.rm(self.relPath(path)) - - def exists(self, path=""): - return self.store.exists(self.relPath(path)) - - def getNode(self, path="", create=True): - if path == "": - return self - else: - return XenNode(self.relPath(path=path), create=create) - - getChild = getNode - - def getData(self, path=""): - path = self.relPath(path) - try: - return self.store.read(path) - except: - return None - - def setData(self, data, path=""): - return self.store.write(self.relPath(path), data) - - def getLock(self): - return None - - def lock(self, lockid): - return None - - def unlock(self, lockid): - return None - - def deleteChild(self, name): - self.delete(name) - - def deleteChildren(self): - for name in self.ls(): - self.deleteChild(name) - - def getChildren(self): - return [ self.getNode(name) for name in self.ls() ] - - def ls(self): - return self.store.ls(self.path) - - def watch(self, fn, path=""): - """Watch a path for changes. The path is relative - to the node and defaults to the node itself. - """ - return self.store.watch(self.relPath(path), fn) - - def unwatch(self, sid): - return self.store.unwatch(sid) - - def subscribe(self, event, fn): - return self.store.subscribe(event, fn) - - def unsubscribe(self, sid): - self.store.unsubscribe(sid) - - def sendEvent(self, event, data): - return self.store.sendEvent(event, data) - - def __repr__(self): - return "<XenNode %s>" % self.path - - diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/xenstore/xsobj.py --- a/tools/python/xen/xend/xenstore/xsobj.py Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,537 +0,0 @@ -#============================================================================ -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#============================================================================ -# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx> -#============================================================================ -import string -import types - -from xen.xend.XendLogging import log - -from xen.xend import sxp -from xsnode import XenNode -from xen.util.mac import macToString, macFromString - -VALID_KEY_CHARS = string.ascii_letters + string.digits + "_-@" - -def hasAttr(obj, attr): - if isinstance(obj, dict): - return obj.contains(attr) - else: - return hasattr(obj, attr) - -def getAttr(obj, attr): - try: - if isinstance(obj, dict): - return obj.get(attr) - else: - return getattr(obj, attr, None) - except AttributeError: - return None - except LookupError: - return None - -def setAttr(obj, attr, val): - if isinstance(obj, dict): - obj[attr] = val - else: - setattr(obj, attr, val) - -class DBConverter: - """Conversion of values to and from strings in xenstore. - """ - - converters = {} - - def checkType(cls, ty): - if ty is None or ty in cls.converters: - return - raise ValueError("invalid converter type: '%s'" % ty) - - checkType = classmethod(checkType) - - def getConverter(cls, ty=None): - if ty is None: - ty = "str" - conv = cls.converters.get(ty) - if not conv: - raise ValueError("no converter for type: '%s'" % ty) - return conv - - getConverter = classmethod(getConverter) - - def exportTypeToDB(cls, db, path, val, ty=None): - return cls.getConverter(ty).exportToDB(db, path, val) - - exportTypeToDB = classmethod(exportTypeToDB) - - def importTypeFromDB(cls, db, path, ty=None): - return cls.getConverter(ty).importFromDB(db, path) - - importTypeFromDB = classmethod(importTypeFromDB) - - # Must define in subclass. - name = None - - def __init__(self): - self.register() - - def register(self): - if not self.name: - raise ValueError("invalid converter name: '%s'" % self.name) - self.converters[self.name] = self - - def exportToDB(self, db, path, val): - if val is None: - return - try: - data = self.toDB(val) - except Exception, ex: - raise - setattr(db, path, data) - - def importFromDB(self, db, path): - data = getAttr(db, path) - if data is None: - val = None - else: - try: - val = self.fromDB(data.getData()) - except Exception, ex: - raise - return val - - def toDB(self, val): - raise NotImplementedError() - - def fromDB(self, val): - raise NotImplementedError() - -class StrConverter(DBConverter): - - name = "str" - - def toDB(self, val): - # Convert True/False to 1/0, otherwise they convert to - # 'True' and 'False' rather than '1' and '0', even though - # isinstance(True/False, int) is true. - if isinstance(val, bool): - val = int(val) - return str(val) - - def fromDB(self, data): - return data - -StrConverter() - -class BoolConverter(DBConverter): - - name = "bool" - - def toDB(self, val): - return str(int(bool(val))) - - def fromDB(self, data): - return bool(int(data)) - -BoolConverter() - -class SxprConverter(DBConverter): - - name = "sxpr" - - def toDB(self, val): - return sxp.to_string(val) - - def fromDB(self, data): - return sxp.from_string(data) - -SxprConverter() - -class IntConverter(DBConverter): - - name = "int" - - def toDB(self, val): - return str(int(val)) - - def fromDB(self, data): - return int(data) - -IntConverter() - -class FloatConverter(DBConverter): - - name = "float" - - def toDB(self, val): - return str(float(val)) - - def fromDB(self, data): - return float(data) - -FloatConverter() - -class LongConverter(DBConverter): - - name = "long" - - def toDB(self, val): - return str(long(val)) - - def fromDB(self, data): - return long(data) - -LongConverter() - -class MacConverter(DBConverter): - - name = "mac" - - def toDB(self, val): - return macToString(val) - - def fromDB(self, data): - return macFromString(data) - -MacConverter() - -class DBVar: - - def __init__(self, var, ty=None, path=None): - DBConverter.checkType(ty) - if path is None: - path = var - self.var = var - self.ty = ty - self.path = path - varpath = filter(bool, self.var.split()) - self.attrpath = varpath[:-1] - self.attr = varpath[-1] - - def exportToDB(self, db, obj): - val = self.getObj(obj) - DBConverter.exportTypeToDB(db, self.path, val, ty=self.ty) - - def importFromDB(self, db, obj): - val = DBConverter.importTypeFromDB(db, self.path, ty=self.ty) - self.setObj(obj, val) - - def getObj(self, obj): - o = obj - for x in self.attrpath: - o = getAttr(o, x) - if o is None: - return None - return getAttr(o, self.attr) - - def setObj(self, obj, val): - o = obj - for x in self.attrpath: - o = getAttr(o, x) - # Don't set obj attr if val is None. - if val is None and hasAttr(o, self.attr): - return - setAttr(o, self.attr, val) - -class DBMap(dict): - """A persistent map. Extends dict with persistence. - Set and get values using the usual map syntax: - - m[k], m.get(k) - m[k] = v - - Also supports being treated as an object with attributes. - When 'k' is a legal identifier you may also use - - m.k, getattr(m, k) - m.k = v, setattr(m, k) - k in m, hasattr(m, k) - - When setting you can pass in a normal value, for example - - m.x = 3 - - Getting works too: - - m.x ==> 3 - - while m['x'] will return the map for x. - - m['x'].getData() ==> 3 - - To get values from subdirs use get() to get the subdir first: - - get(m, 'foo').x - m['foo'].x - - instead of m.foo.x, because m.foo will return the data for field foo, - not the directory. - - You can assign values into a subdir by passing a map: - - m.foo = {'x': 1, 'y':2 } - - You can also use paths as keys: - - m['foo/x'] = 1 - - sets field x in subdir foo. - - """ - - __db__ = None - __data__ = None - __perms__ = None - __parent__ = None - __name__ = "" - - __transaction__ = False - - # True if value set since saved (or never saved). - __dirty__ = True - - def __init__(self, parent=None, name="", db=None): - if parent is None: - self.__name__ = name - else: - if not isinstance(parent, DBMap): - raise ValueError("invalid parent") - self.__parent__ = parent - self.__name__ = name - db = self.__parent__.getChildDB(name) - self.setDB(db) - - def getName(self): - return self.__name__ - - def getPath(self): - return self.__db__ and self.__db__.relPath() - - def watch(self, fn, path=""): - return self.__db__.watch(fn, path=path) - - def unwatch(self, sid): - return self.__db__.unwatch(sid) - - def subscribe(self, event, fn): - return self.__db__.subscribe(event, fn) - - def unsubscribe(self, sid): - return self.__db__.unsubscribe(sid) - - def sendEvent(self, event, val): - return self.__db__.sendEvent(event, val) - - def transactionBegin(self): - # Begin a transaction. - pass - - def transactionCommit(self): - # Commit writes to db. - pass - - def transactionFail(self): - # Fail a transaction. - # We have changed values, what do we do? - pass - - def checkName(self, k): - if k == "": - raise ValueError("invalid key, empty string") - for c in k: - if c in VALID_KEY_CHARS: continue - raise ValueError("invalid key char '%s'" % c) - - def _setData(self, v): - #print 'DBMap>_setData>', self.getPath(), 'data=', v - if v != self.__data__: - self.__dirty__ = True - self.__data__ = v - - def setData(self, v): - if isinstance(v, dict): - for (key, val) in v.items(): - self[key] = val - else: - self._setData(v) - - def getData(self): - return self.__data__ - - def _set(self, k, v): - dict.__setitem__(self, k, v) - - def _get(self, k): - try: - return dict.__getitem__(self, k) - except: - return None - - def _del(self, k, v): - try: - dict.__delitem__(self, k) - except: - pass - - def _contains(self, k): - return dict.__contains__(self, k) - - def __setitem__(self, k, v, save=False): - node = self.addChild(k) - node.setData(v) - if save: - node.saveDB() - - def __getitem__(self, k): - if self._contains(k): - v = self._get(k) - else: - v = self.readChildDB(k) - self._set(k, v) - return v - - def __delitem__(self, k): - self._del(k) - self.deleteChildDB(k) - - def __repr__(self): - if len(self): - return dict.__repr__(self) - else: - return repr(self.__data__) - - def __setattr__(self, k, v): - if k.startswith("__"): - object.__setattr__(self, k, v) - else: - self.__setitem__(k, v, save=True) - return v - - def __getattr__(self, k): - if k.startswith("__"): - v = object.__getattr__(self, k) - else: - try: - v = self.__getitem__(k).getData() - except LookupError, ex: - raise AttributeError(ex.args) - return v - - def __delattr__(self, k): - return self.__delitem__(k) - - def delete(self): - dict.clear(self) - self.__data__ = None - if self.__db__: - self.__db__.delete() - - def clear(self): - dict.clear(self) - if self.__db__: - self.__db__.deleteChildren() - - def getChild(self, k): - return self._get(k) - - def getChildDB(self, k): - self.checkName(k) - return self.__db__ and self.__db__.getChild(k) - - def deleteChildDB(self, k): - if self.__db__: - self.__db__.deleteChild(k) - - def _addChild(self, k): - kid = self._get(k) - if kid is None: - kid = DBMap(parent=self, name=k, db=self.getChildDB(k)) - self._set(k, kid) - return kid - - def addChild(self, path): - l = path.split("/") - n = self - for x in l: - if x == "": continue - n = n._addChild(x) - return n - - def setDB(self, db): - if (db is not None) and not isinstance(db, XenNode): - raise ValueError("invalid db") - self.__db__ = db - for (k, v) in self.items(): - if v is None: continue - if isinstance(v, DBMap): - v._setDB(self.addChild(k), restore) - - def readDB(self): - if self.__db__ is None: - return - self.__data__ = self.__db__.getData() - l = self.__db__.ls() - if l: - for k in l: - n = self.addChild(k) - n.readDB() - self.__dirty__ = False - - def readChildDB(self, k): - if self.__db__ and (k in self.__db__.ls()): - n = self.addChild(k) - n.readDB() - raise LookupError("invalid key '%s'" % k) - - def saveDB(self, sync=False, save=False): - """Save unsaved data to db. - If save or sync is true, saves whether dirty or not. - If sync is true, removes db entries not in the map. - """ - - if self.__db__ is None: - #print 'DBMap>saveDB>',self.getPath(), 'no db' - return - # Write data. - #print 'DBMap>saveDB>', self.getPath(), 'dirty=', self.__dirty__, 'data=', self.__data__ - if ((self.__data__ is not None) - and (sync or save or self.__dirty__)): - self.__db__.setData(self.__data__) - self.__dirty__ = False - else: - #print 'DBMap>saveDB>', self.getPath(), 'not written' - pass - # Write children. - for (name, node) in self.items(): - if not isinstance(node, DBMap): continue - node.saveDB(sync=sync, save=save) - # Remove db nodes not in children. - ###if sync: - ### for name in self.__db__.ls(): - ### if name not in self: - ### self.__db__.delete(name) - - def importFromDB(self, obj, fields): - """Set fields in obj from db fields. - """ - for f in fields: - f.importFromDB(self, obj) - - def exportToDB(self, obj, fields, save=False, sync=False): - """Set fields in db from obj fields. - """ - for f in fields: - f.exportToDB(self, obj) - self.saveDB(save=save, sync=sync) diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/python/xen/xend/xenstore/xsresource.py --- a/tools/python/xen/xend/xenstore/xsresource.py Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,149 +0,0 @@ -#============================================================================ -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#============================================================================ -# Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx> -#============================================================================ -# HTTP interface onto xenstore (read-only). -# Mainly intended for testing. - -import os -import os.path - -from xen.web.httpserver import HttpServer, UnixHttpServer -from xen.web.SrvBase import SrvBase -from xen.web.SrvDir import SrvDir -from xen.xend.Args import FormFn -from xen.xend.xenstore import XenNode - -def pathurl(req): - url = req.prePathURL() - if not url.endswith('/'): - url += '/' - return url - -def writelist(req, l): - req.write('(') - for k in l: - req.write(' ' + k) - req.write(')') - -def lsData(dbnode, req, url): - v = dbnode.getData() - if v is None: - req.write('<p>No data') - else: - req.write('<p>Data: <pre>') - req.write(str(v)) - req.write('</pre>') - v = dbnode.getLock() - if v is None: - req.write("<p>Unlocked") - else: - req.write("<p>Lock = %s" % v) - -def lsChildren(dbnode, req, url): - l = dbnode.ls() - if l: - req.write('<p>Children: <ul>') - for key in l: - child = dbnode.getChild(key) - data = child.getData() - if data is None: data = "" - req.write('<li><a href="%(url)s%(key)s">%(key)s</a> %(data)s</li>' - % { "url": url, "key": key, "data": data }) - req.write('</ul>') - else: - req.write('<p>No children') - - -class DBDataResource(SrvBase): - """Resource for the node data. - """ - - def __init__(self, dbnode): - SrvBase.__init__(self) - self.dbnode = dbnode - - def render_GET(self, req): - req.write('<html><head></head><body>') - self.print_path(req) - req.write("<pre>") - req.write(self.getData() or self.getNoData()) - req.write("</pre>") - req.write('</body></html>') - - def getContentType(self): - # Use content-type from metadata. - return "text/plain" - - def getData(self): - v = self.dbnode.getData() - if v is None: return v - return str(v) - - def getNoData(self): - return "" - -class DBNodeResource(SrvDir): - """Resource for a DB node. - """ - - def __init__(self, dbnode): - SrvDir.__init__(self) - self.dbnode = dbnode - - def get(self, x): - val = None - if x == "__data__": - val = DBDataResource(self.dbnode) - else: - if self.dbnode.exists(x): - child = self.dbnode.getChild(x, create=False) - else: - child = None - if child is not None: - val = DBNodeResource(child) - return val - - def render_POST(self, req): - return self.perform(req) - - def ls(self, req, use_sxp=0): - if use_sxp: - writelist(req, self.dbnode.getChildren()) - else: - url = pathurl(req) - req.write("<fieldset>") - lsData(self.dbnode, req, url) - lsChildren(self.dbnode, req, url) - req.write("</fieldset>") - - def form(self, req): - url = req.prePathURL() - pass - -class DBRootResource(DBNodeResource): - """Resource for the root of a DB. - """ - - def __init__(self): - DBNodeResource.__init__(self, XenNode()) - -def main(argv): - root = SrvDir() - root.putChild('xenstore', DBRootResource()) - interface = '' - port = 8003 - server = HttpServer(root=root, interface=interface, port=port) - server.run() diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/testsuite/15nowait.test --- a/tools/xenstore/testsuite/15nowait.test Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,25 +0,0 @@ -# If we don't wait for an ack, we can crash daemon as it never expects to be -# sending out two replies on top of each other. -noackwrite /1 1 -noackwrite /2 2 -noackwrite /3 3 -noackwrite /4 4 -noackwrite /5 5 -readack -readack -readack -readack -readack - -expect handle is 1 -introduce 1 100 7 /my/home -1 noackwrite /1 1 -1 noackwrite /2 2 -1 noackwrite /3 3 -1 noackwrite /4 4 -1 noackwrite /5 5 -1 readack -1 readack -1 readack -1 readack -1 readack diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/testsuite/16block-watch-crash.test --- a/tools/xenstore/testsuite/16block-watch-crash.test Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,14 +0,0 @@ -# Test case where blocked connection gets sent watch. - -# FIXME: We no longer block connections -# mkdir /test -# watch /test token -# 1 start -# # This will block on above -# noackwrite /test/entry contents -# 1 write /test/entry2 contents -# 1 commit -# readack -# expect /test/entry2:token -# waitwatch -# ackwatch token diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xenstored.h --- a/tools/xenstore/xenstored.h Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,89 +0,0 @@ -/* - * Simple prototyle Xen Store Daemon providing simple tree-like database. - * Copyright (C) 2005 Rusty Russell IBM Corporation - * - * This file may be distributed separately from the Linux kernel, or - * incorporated into other software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef _XENSTORED_H -#define _XENSTORED_H - -enum xsd_sockmsg_type -{ - XS_DEBUG, - XS_SHUTDOWN, - XS_DIRECTORY, - XS_READ, - XS_GET_PERMS, - XS_WATCH, - XS_WATCH_ACK, - XS_UNWATCH, - XS_TRANSACTION_START, - XS_TRANSACTION_END, - XS_OP_READ_ONLY = XS_TRANSACTION_END, - XS_INTRODUCE, - XS_RELEASE, - XS_GET_DOMAIN_PATH, - XS_WRITE, - XS_MKDIR, - XS_RM, - XS_SET_PERMS, - XS_WATCH_EVENT, - XS_ERROR, -}; - -#define XS_WRITE_NONE "NONE" -#define XS_WRITE_CREATE "CREATE" -#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" - -/* We hand errors as strings, for portability. */ -struct xsd_errors -{ - int errnum; - const char *errstring; -}; -#define XSD_ERROR(x) { x, #x } -static struct xsd_errors xsd_errors[] __attribute__((unused)) = { - XSD_ERROR(EINVAL), - XSD_ERROR(EACCES), - XSD_ERROR(EEXIST), - XSD_ERROR(EISDIR), - XSD_ERROR(ENOENT), - XSD_ERROR(ENOMEM), - XSD_ERROR(ENOSPC), - XSD_ERROR(EIO), - XSD_ERROR(ENOTEMPTY), - XSD_ERROR(ENOSYS), - XSD_ERROR(EROFS), - XSD_ERROR(EBUSY), - XSD_ERROR(EAGAIN), - XSD_ERROR(EISCONN), -}; -struct xsd_sockmsg -{ - u32 type; - u32 len; /* Length of data following this. */ - - /* Generally followed by nul-terminated string(s). */ -}; - -#endif /* _XENSTORED_H */ diff -r 0ba10f7fef51 -r 4e0c94871be2 tools/xenstore/xs_dom0_test.c --- a/tools/xenstore/xs_dom0_test.c Sat Oct 8 17:37:45 2005 +++ /dev/null Sat Oct 8 20:28:24 2005 @@ -1,43 +0,0 @@ -/* Test introduction of domain 0 */ -#include <linux/ioctl.h> -#include <sys/ioctl.h> -#include "xs.h" -#include "utils.h" -#include <xenctrl.h> -#include <xen/linux/privcmd.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/mman.h> - -int main() -{ - int h, local = 0, kernel = 0; - long err; - void *page; - - h = xc_interface_open(); - if (h < 0) - barf_perror("Failed to open xc"); - - if (xc_evtchn_bind_interdomain(h, DOMID_SELF, 0, &local, &kernel) != 0) - barf_perror("Failed to bind interdomain"); - - printf("Got ports %i & %i\n", local, kernel); - - err = ioctl(h, IOCTL_PRIVCMD_INITDOMAIN_STORE, kernel); - if (err < 0) - barf_perror("Failed to initialize store"); - printf("Got mfn %li\n", err); - - page = xc_map_foreign_range(h, 0, getpagesize(), PROT_READ|PROT_WRITE, - err); - if (!page) - barf_perror("Failed to map page %li", err); - printf("Mapped page at %p\n", page); - printf("Page says %s\n", (char *)page); - munmap(page, getpagesize()); - printf("unmapped\n"); - - return 0; -} - _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.