Xen project Mailing List

[Xen-changelog] Remove linux-2.4 sparse tree.

From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>

Date: Fri, 07 Oct 2005 22:46:10 +0000

Delivery-date: Fri, 07 Oct 2005 22:43:44 +0000

List-id: BK change log <xen-changelog.lists.xensource.com>

# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID f1abe953e4019c023cccec46560052df7261828d # Parent bf07490fab19178cf1822ea540aa2684f207de96 Remove linux-2.4 sparse tree. Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> diff -r bf07490fab19 -r f1abe953e401 Makefile --- a/Makefile Fri Oct 7 22:22:35 2005 +++ b/Makefile Fri Oct 7 22:36:26 2005 @@ -3,8 +3,7 @@ # KERNELS ?= linux-2.6-xen0 linux-2.6-xenU -# linux-2.4-xen0 linux-2.4-xenU netbsd-2.0-xenU -# You may use wildcards in the above e.g. KERNELS=*2.4* +# You may use wildcards in the above e.g. KERNELS=*2.6* XKERNELS := $(foreach kernel, $(KERNELS), $(patsubst buildconfigs/mk.%,%,$(wildcard buildconfigs/mk.$(kernel))) ) @@ -188,12 +187,5 @@ rm -rf $(D)/usr/share/man/man8/xen* # Legacy targets for compatibility -linux24: - $(MAKE) 'KERNELS=linux-2.4*' kernels - linux26: $(MAKE) 'KERNELS=linux-2.6*' kernels - -netbsd20: - $(MAKE) netbsd-2.0-xenU-build - diff -r bf07490fab19 -r f1abe953e401 README --- a/README Fri Oct 7 22:22:35 2005 +++ b/README Fri Oct 7 22:36:26 2005 @@ -22,7 +22,7 @@ GNU GPL. The 2.0 release offers excellent performance, hardware support and -enterprise-grade features such as live migration. Linux 2.6, 2.4 and +enterprise-grade features such as live migration. Linux 2.6 and NetBSD 2.0 are already available for Xen, with more operating system ports on the way. @@ -97,8 +97,8 @@ version for unstable. 2. cd to xen-unstable (or whatever you sensibly rename it to). - The Linux (2.4 and 2.6), netbsd and freebsd kernel source - trees are in the $os-$version-xen-sparse directories. + The Linux, netbsd and freebsd kernel source trees are in + the $os-$version-xen-sparse directories. On Linux: diff -r bf07490fab19 -r f1abe953e401 docs/misc/xen_config.html --- a/docs/misc/xen_config.html Fri Oct 7 22:22:35 2005 +++ b/docs/misc/xen_config.html Fri Oct 7 22:36:26 2005 @@ -154,7 +154,7 @@ (memory 64) (image (linux - (kernel /boot/vmlinuz-2.4.26-xen) + (kernel /boot/vmlinuz-2.6.12-xen) (ip ::::xendom1:eth0:dhcp) (root /dev/xda1) (args 'rw fastboot 4') diff -r bf07490fab19 -r f1abe953e401 docs/src/user/installation.tex --- a/docs/src/user/installation.tex Fri Oct 7 22:22:35 2005 +++ b/docs/src/user/installation.tex Fri Oct 7 22:36:26 2005 @@ -1,7 +1,7 @@ \chapter{Installation} The Xen distribution includes three main components: Xen itself, ports -of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the userspace +of Linux and NetBSD to run on Xen, and the userspace tools required to manage a Xen-based system. This chapter describes how to install the Xen~2.0 distribution from source. Alternatively, there may be pre-built packages available as part of your operating @@ -169,7 +169,7 @@ You can edit this line to include any set of operating system kernels which have configurations in the top-level \path{buildconfigs/} -directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4 +directory, for example \path{mk.linux-2.6-xenU} to build a Linux 2.6 kernel containing only virtual device drivers. %% Inspect the Makefile if you want to see what goes on during a @@ -190,10 +190,6 @@ %% mkbuildtree} script to add the Xen patches to the kernel. -%% The procedure is similar to build the Linux 2.4 port: \\ -%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24! - - %% \framebox{\parbox{5in}{ %% {\bf Distro specific:} \\ %% {\it Gentoo} --- if not using udev (most installations, diff -r bf07490fab19 -r f1abe953e401 docs/src/user/introduction.tex --- a/docs/src/user/introduction.tex Fri Oct 7 22:22:35 2005 +++ b/docs/src/user/introduction.tex Fri Oct 7 22:36:26 2005 @@ -28,7 +28,7 @@ space applications and libraries \emph{do not} require modification. Xen support is available for increasingly many operating systems: -right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0. +right now, Linux and NetBSD are available for Xen 2.0. A FreeBSD port is undergoing testing and will be incorporated into the release soon. Other OS ports, including Plan 9, are in progress. We hope that that arch-xen patches will be incorporated into the diff -r bf07490fab19 -r f1abe953e401 buildconfigs/mk.linux-2.4-xenU --- a/buildconfigs/mk.linux-2.4-xenU Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,51 +0,0 @@ - -OS = linux - -LINUX_SERIES = 2.4 -LINUX_VER = 2.4.30 - -EXTRAVERSION = xenU - -LINUX_DIR = $(OS)-$(LINUX_VER)-$(EXTRAVERSION) - -include buildconfigs/Rules.mk - -.PHONY: build clean delete - -# The real action starts here! -build: $(LINUX_DIR)/include/linux/autoconf.h - if grep "^CONFIG_MODULES=" $(LINUX_DIR)/.config ; then \ - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) modules ; \ - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_MOD_PATH=$(DESTDIR) modules_install ; \ - fi - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_PATH=$(DESTDIR) install - -$(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref - rm -rf $(LINUX_DIR) - cp -al $(<D) $(LINUX_DIR) - # Apply arch-xen patches - ( cd linux-$(LINUX_SERIES)-xen-sparse ; \ - LINUX_ARCH=$(LINUX_ARCH) ./mkbuildtree ../$(LINUX_DIR) ) - # Re-use config from install dir if one exits else use default config - CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \ - [ -r $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \ - cp $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \ - || cp $(LINUX_DIR)/arch/xen/defconfig-$(EXTRAVERSION) \ - $(LINUX_DIR)/.config - # Patch kernel Makefile to set EXTRAVERSION - ( cd $(LINUX_DIR) ; \ - sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST = -$(EXTRAVERSION)/' Makefile >Mk.tmp ; \ - rm -f Makefile ; mv Mk.tmp Makefile ) - make -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) oldconfig - make -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) dep - -config: CONFIGMODE = menuconfig -config: $(LINUX_DIR)/include/linux/autoconf.h - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) $(CONFIGMODE) - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) dep - -clean:: - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) clean - -delete: - rm -rf tmp-$(OS)-$(LINUX_VER) $(LINUX_DIR) diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/Makefile --- a/linux-2.4-xen-sparse/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,592 +0,0 @@ -VERSION = 2 -PATCHLEVEL = 4 -SUBLEVEL = 30 -EXTRAVERSION = - -KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) - -# SUBARCH always tells us the underlying machine architecture. -# Unless overridden, by default ARCH is equivalent to SUBARCH. -# This will be overriden for Xen and UML builds. -SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -ARCH ?= $(SUBARCH) - -## XXX The following hack can be discarded after users have adjusted to the -## architectural name change 'xeno' -> 'xen'. -ifeq ($(ARCH),xeno) - ARCH := xen -endif - -KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//g") - -CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ - else if [ -x /bin/bash ]; then echo /bin/bash; \ - else echo sh; fi ; fi) -TOPDIR := $(shell /bin/pwd) - -HPATH = $(TOPDIR)/include -FINDHPATH = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net $(HPATH)/math-emu - -HOSTCC = gcc -HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer - -CROSS_COMPILE = - -# -# Include the make variables (CC, etc...) -# - -AS = $(CROSS_COMPILE)as -LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc -CPP = $(CC) -E -AR = $(CROSS_COMPILE)ar -NM = $(CROSS_COMPILE)nm -STRIP = $(CROSS_COMPILE)strip -OBJCOPY = $(CROSS_COMPILE)objcopy -OBJDUMP = $(CROSS_COMPILE)objdump -MAKEFILES = $(TOPDIR)/.config -GENKSYMS = /sbin/genksyms -DEPMOD = /sbin/depmod -MODFLAGS = -DMODULE -CFLAGS_KERNEL = -PERL = perl -AWK = awk -RPM := $(shell if [ -x "/usr/bin/rpmbuild" ]; then echo rpmbuild; \ - else echo rpm; fi) - -export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ - CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ - CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL AWK - -all: do-it-all - -# -# Make "config" the default target if there is no configuration file or -# "depend" the target if there is no top-level dependency information. -# - -ifeq (.config,$(wildcard .config)) -include .config -ifeq (.depend,$(wildcard .depend)) -include .depend -do-it-all: Version vmlinux -else -CONFIGURATION = depend -do-it-all: depend -endif -else -CONFIGURATION = config -do-it-all: config -endif - -# -# INSTALL_PATH specifies where to place the updated kernel and system map -# images. Uncomment if you want to place them anywhere other than root. -# - -#export INSTALL_PATH=/boot - -# -# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory -# relocations required by build roots. This is not defined in the -# makefile but the arguement can be passed to make if needed. -# - -MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) -export MODLIB - -# -# standard CFLAGS -# - -CPPFLAGS := -D__KERNEL__ -I$(HPATH) - -CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ - -fno-strict-aliasing -fno-common -ifndef CONFIG_FRAME_POINTER -CFLAGS += -fomit-frame-pointer -endif -AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) - -# -# ROOT_DEV specifies the default root-device when making the image. -# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case -# the default of FLOPPY is used by 'build'. -# This is i386 specific. -# - -export ROOT_DEV = CURRENT - -# -# If you want to preset the SVGA mode, uncomment the next line and -# set SVGA_MODE to whatever number you want. -# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. -# The number is the same as you would ordinarily press at bootup. -# This is i386 specific. -# - -export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA - -# -# If you want the RAM disk device, define this to be the size in blocks. -# This is i386 specific. -# - -#export RAMDISK = -DRAMDISK=512 - -CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o -NETWORKS =net/network.o - -LIBS =$(TOPDIR)/lib/lib.a -SUBDIRS =kernel drivers mm fs net ipc lib crypto - -DRIVERS-n := -DRIVERS-y := -DRIVERS-m := -DRIVERS- := - -DRIVERS-$(CONFIG_ACPI_BOOT) += drivers/acpi/acpi.o -DRIVERS-$(CONFIG_PARPORT) += drivers/parport/driver.o -DRIVERS-y += drivers/char/char.o \ - drivers/block/block.o \ - drivers/misc/misc.o \ - drivers/net/net.o -DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o -DRIVERS-$(CONFIG_DRM_NEW) += drivers/char/drm/drm.o -DRIVERS-$(CONFIG_DRM_OLD) += drivers/char/drm-4.0/drm.o -DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a -DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o -DRIVERS-$(CONFIG_DEV_APPLETALK) += drivers/net/appletalk/appletalk.o -DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.o -DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o -DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnetdrv.o -DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o -DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o -DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a -DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o -DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o -DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o - -ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),) -DRIVERS-y += drivers/cdrom/driver.o -endif - -DRIVERS-$(CONFIG_SOUND) += drivers/sound/sounddrivers.o -DRIVERS-$(CONFIG_PCI) += drivers/pci/driver.o -DRIVERS-$(CONFIG_MTD) += drivers/mtd/mtdlink.o -DRIVERS-$(CONFIG_PCMCIA) += drivers/pcmcia/pcmcia.o -DRIVERS-$(CONFIG_NET_PCMCIA) += drivers/net/pcmcia/pcmcia_net.o -DRIVERS-$(CONFIG_NET_WIRELESS) += drivers/net/wireless/wireless_net.o -DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o -DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a -DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o -DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o -DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a -DRIVERS-$(CONFIG_PPC32) += drivers/macintosh/macintosh.o -DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o -DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o -DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o -DRIVERS-$(CONFIG_VT) += drivers/video/video.o -DRIVERS-$(CONFIG_PARIDE) += drivers/block/paride/paride.a -DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o -DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a -DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o -DRIVERS-$(CONFIG_USB_GADGET) += drivers/usb/gadget/built-in.o -DRIVERS-y +=drivers/media/media.o -DRIVERS-$(CONFIG_INPUT) += drivers/input/inputdrv.o -DRIVERS-$(CONFIG_HIL) += drivers/hil/hil.o -DRIVERS-$(CONFIG_I2O) += drivers/message/i2o/i2o.o -DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o -DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o -DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o -DRIVERS-$(CONFIG_GSC) += drivers/gsc/gscbus.o -DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o -DRIVERS-$(CONFIG_HOTPLUG_PCI) += drivers/hotplug/vmlinux-obj.o -DRIVERS-$(CONFIG_ISDN_BOOL) += drivers/isdn/vmlinux-obj.o -DRIVERS-$(CONFIG_CRYPTO) += crypto/crypto.o - -DRIVERS := $(DRIVERS-y) - - -# files removed with 'make clean' -CLEAN_FILES = \ - kernel/ksyms.lst include/linux/compile.h \ - vmlinux System.map \ - .tmp* \ - drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \ - drivers/char/conmakehash \ - drivers/char/drm/*-mod.c \ - drivers/pci/devlist.h drivers/pci/classlist.h drivers/pci/gen-devlist \ - drivers/zorro/devlist.h drivers/zorro/gen-devlist \ - drivers/sound/bin2hex drivers/sound/hex2hex \ - drivers/atm/fore200e_mkfirm drivers/atm/{pca,sba}*{.bin,.bin1,.bin2} \ - drivers/scsi/aic7xxx/aicasm/aicasm \ - drivers/scsi/aic7xxx/aicasm/aicasm_gram.c \ - drivers/scsi/aic7xxx/aicasm/aicasm_gram.h \ - drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.c \ - drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.h \ - drivers/scsi/aic7xxx/aicasm/aicasm_macro_scan.c \ - drivers/scsi/aic7xxx/aicasm/aicasm_scan.c \ - drivers/scsi/aic7xxx/aicasm/aicdb.h \ - drivers/scsi/aic7xxx/aicasm/y.tab.h \ - drivers/scsi/53c700_d.h \ - drivers/tc/lk201-map.c \ - net/khttpd/make_times_h \ - net/khttpd/times.h \ - submenu* \ - drivers/ieee1394/oui.c -# directories removed with 'make clean' -CLEAN_DIRS = \ - modules - -# files removed with 'make mrproper' -MRPROPER_FILES = \ - include/linux/autoconf.h include/linux/version.h \ - lib/crc32table.h lib/gen_crc32table \ - drivers/net/hamradio/soundmodem/sm_tbl_{afsk1200,afsk2666,fsk9600}.h \ - drivers/net/hamradio/soundmodem/sm_tbl_{hapn4800,psk4800}.h \ - drivers/net/hamradio/soundmodem/sm_tbl_{afsk2400_7,afsk2400_8}.h \ - drivers/net/hamradio/soundmodem/gentbl \ - drivers/sound/*_boot.h drivers/sound/.*.boot \ - drivers/sound/msndinit.c \ - drivers/sound/msndperm.c \ - drivers/sound/pndsperm.c \ - drivers/sound/pndspini.c \ - drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \ - .version .config* config.in config.old \ - scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \ - scripts/lxdialog/*.o scripts/lxdialog/lxdialog \ - .menuconfig.log \ - include/asm \ - .hdepend scripts/mkdep scripts/split-include scripts/docproc \ - $(TOPDIR)/include/linux/modversions.h \ - kernel.spec - -# directories removed with 'make mrproper' -MRPROPER_DIRS = \ - include/config \ - $(TOPDIR)/include/linux/modules - - -include arch/$(ARCH)/Makefile - -# Extra cflags for kbuild 2.4. The default is to forbid includes by kernel code -# from user space headers. Some UML code requires user space headers, in the -# UML Makefiles add 'kbuild_2_4_nostdinc :=' before include Rules.make. No -# other kernel code should include user space headers, if you need -# 'kbuild_2_4_nostdinc :=' or -I/usr/include for kernel code and you are not UML -# then your code is broken! KAO. - -kbuild_2_4_nostdinc := -nostdinc -iwithprefix include -export kbuild_2_4_nostdinc - -export CPPFLAGS CFLAGS CFLAGS_KERNEL AFLAGS AFLAGS_KERNEL - -export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS - -.S.s: - $(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -o $*.s $< -.S.o: - $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -c -o $*.o $< - -Version: dummy - @rm -f include/linux/compile.h - -boot: vmlinux - @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot - -vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o init/do_mounts.o linuxsubdirs - $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \ - --start-group \ - $(CORE_FILES) \ - $(DRIVERS) \ - $(NETWORKS) \ - $(LIBS) \ - --end-group \ - -o vmlinux - $(NM) vmlinux | grep -v '$compiled$\|$\.o$$$\|$ [aUw] $\|$\.\.ng$$$\|$LASH[RL]DI$' | sort > System.map - -symlinks: - rm -f include/asm - ( cd include ; ln -sf asm-$(ARCH) asm) - @if [ ! -d include/linux/modules ]; then \ - mkdir include/linux/modules; \ - fi - -oldconfig: symlinks - $(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in - -xconfig: symlinks - $(MAKE) -C scripts kconfig.tk - wish -f scripts/kconfig.tk - -menuconfig: include/linux/version.h symlinks - $(MAKE) -C scripts/lxdialog all - $(CONFIG_SHELL) scripts/Menuconfig arch/$(ARCH)/config.in - -config: symlinks - $(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in - -include/config/MARKER: scripts/split-include include/linux/autoconf.h - scripts/split-include include/linux/autoconf.h include/config - @ touch include/config/MARKER - -linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS)) - -$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER - $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@) - -$(TOPDIR)/include/linux/version.h: include/linux/version.h -$(TOPDIR)/include/linux/compile.h: include/linux/compile.h - -newversion: - . scripts/mkversion > .tmpversion - @mv -f .tmpversion .version - -uts_len := 64 -uts_truncate := sed -e 's/$.\{1,$(uts_len)\}$.*/\1/' - -include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion - @echo -n \#`cat .version` > .ver1 - @if [ -n "$(CONFIG_SMP)" ] ; then echo -n " SMP" >> .ver1; fi - @if [ -f .name ]; then echo -n \-`cat .name` >> .ver1; fi - @LANG=C echo ' '`date` >> .ver1 - @echo \#define UTS_VERSION \"`cat .ver1 | $(uts_truncate)`\" > .ver - @LANG=C echo \#define LINUX_COMPILE_TIME \"`date +%T`\" >> .ver - @echo \#define LINUX_COMPILE_BY \"`whoami`\" >> .ver - @echo \#define LINUX_COMPILE_HOST \"`hostname | $(uts_truncate)`\" >> .ver - @([ -x /bin/dnsdomainname ] && /bin/dnsdomainname > .ver1) || \ - ([ -x /bin/domainname ] && /bin/domainname > .ver1) || \ - echo > .ver1 - @echo \#define LINUX_COMPILE_DOMAIN \"`cat .ver1 | $(uts_truncate)`\" >> .ver - @echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -n 1`\" >> .ver - @mv -f .ver $@ - @rm -f .ver1 - -include/linux/version.h: ./Makefile - @expr length "$(KERNELRELEASE)" \<= $(uts_len) > /dev/null || \ - (echo KERNELRELEASE \"$(KERNELRELEASE)\" exceeds $(uts_len) characters >&2; false) - @echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver - @echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver - @echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver - @mv -f .ver $@ - -comma := , - -init/version.o: init/version.c include/linux/compile.h include/config/MARKER - $(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DUTS_MACHINE='"$(SUBARCH)"' -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o init/version.o init/version.c - -init/main.o: init/main.c include/config/MARKER - $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o $@ $< - -init/do_mounts.o: init/do_mounts.c include/config/MARKER - $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -c -o $@ $< - -fs lib mm ipc kernel drivers net: dummy - $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@) - -TAGS: dummy - { find include/asm-${ARCH} -name '*.h' -print ; \ - find include -type d $ -name "asm-*" -o -name config $ -prune -o -name '*.h' -print ; \ - find $(SUBDIRS) init arch/${ARCH} -name '*.[chS]' ; } | grep -v SCCS | grep -v '\.svn' | etags - - -# Exuberant ctags works better with -I -tags: dummy - CTAGSF=`ctags --version | grep -i exuberant >/dev/null && echo "-I __initdata,__exitdata,EXPORT_SYMBOL,EXPORT_SYMBOL_NOVERS"`; \ - ctags $$CTAGSF `find include/asm-$(ARCH) -name '*.h'` && \ - find include -type d $ -name "asm-*" -o -name config $ -prune -o -name '*.h' -print | xargs ctags $$CTAGSF -a && \ - find $(SUBDIRS) init -name '*.[ch]' | xargs ctags $$CTAGSF -a - -ifdef CONFIG_MODULES -ifdef CONFIG_MODVERSIONS -MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h -endif - -.PHONY: modules -modules: $(patsubst %, _mod_%, $(SUBDIRS)) - -.PHONY: $(patsubst %, _mod_%, $(SUBDIRS)) -$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h include/config/MARKER - $(MAKE) -C $(patsubst _mod_%, %, $@) CFLAGS="$(CFLAGS) $(MODFLAGS)" MAKING_MODULES=1 modules - -.PHONY: modules_install -modules_install: _modinst_ $(patsubst %, _modinst_%, $(SUBDIRS)) _modinst_post - -.PHONY: _modinst_ -_modinst_: - @rm -rf $(MODLIB)/kernel - @rm -f $(MODLIB)/build - @mkdir -p $(MODLIB)/kernel - @ln -s $(TOPDIR) $(MODLIB)/build - -# If System.map exists, run depmod. This deliberately does not have a -# dependency on System.map since that would run the dependency tree on -# vmlinux. This depmod is only for convenience to give the initial -# boot a modules.dep even before / is mounted read-write. However the -# boot script depmod is the master version. -ifeq "$(strip $(INSTALL_MOD_PATH))" "" -depmod_opts := -else -depmod_opts := -b $(INSTALL_MOD_PATH) -r -endif -.PHONY: _modinst_post -_modinst_post: _modinst_post_pcmcia - if [ -r System.map ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi - -# Backwards compatibilty symlinks for people still using old versions -# of pcmcia-cs with hard coded pathnames on insmod. Remove -# _modinst_post_pcmcia for kernel 2.4.1. -.PHONY: _modinst_post_pcmcia -_modinst_post_pcmcia: - cd $(MODLIB); \ - mkdir -p pcmcia; \ - find kernel -path '*/pcmcia/*' -name '*.o' | xargs -i -r ln -sf ../{} pcmcia - -.PHONY: $(patsubst %, _modinst_%, $(SUBDIRS)) -$(patsubst %, _modinst_%, $(SUBDIRS)) : - $(MAKE) -C $(patsubst _modinst_%, %, $@) modules_install - -# modules disabled.... - -else -modules modules_install: dummy - @echo - @echo "The present kernel configuration has modules disabled." - @echo "Type 'make config' and enable loadable module support." - @echo "Then build a kernel with module support enabled." - @echo - @exit 1 -endif - -clean: archclean - find . $ -name '*.[oas]' -o -name core -o -name '.*.flags' $ -type f -print \ - | grep -v lxdialog/ | xargs rm -f - rm -f $(CLEAN_FILES) - rm -rf $(CLEAN_DIRS) - $(MAKE) -C Documentation/DocBook clean - -mrproper: clean archmrproper - find . $ -size 0 -o -name .depend $ -type f -print | xargs rm -f - rm -f $(MRPROPER_FILES) - rm -rf $(MRPROPER_DIRS) - $(MAKE) -C Documentation/DocBook mrproper - -distclean: mrproper - rm -f core `find . $ -not -type d $ -and \ - $ -name '*.orig' -o -name '*.rej' -o -name '*~' \ - -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ - -o -name '.*.rej' -o -name '.SUMS' -o -size 0 $ -type f -print` TAGS tags - -backup: mrproper - cd .. && tar cf - linux/ | gzip -9 > backup.gz - sync - -sgmldocs: - chmod 755 $(TOPDIR)/scripts/docgen - chmod 755 $(TOPDIR)/scripts/gen-all-syms - chmod 755 $(TOPDIR)/scripts/kernel-doc - $(MAKE) -C $(TOPDIR)/Documentation/DocBook books - -psdocs: sgmldocs - $(MAKE) -C Documentation/DocBook ps - -pdfdocs: sgmldocs - $(MAKE) -C Documentation/DocBook pdf - -htmldocs: sgmldocs - $(MAKE) -C Documentation/DocBook html - -mandocs: - chmod 755 $(TOPDIR)/scripts/kernel-doc - chmod 755 $(TOPDIR)/scripts/split-man - $(MAKE) -C Documentation/DocBook man - -sums: - find . -type f -print | sort | xargs sum > .SUMS - -dep-files: scripts/mkdep archdep include/linux/version.h - rm -f .depend .hdepend - $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" -ifdef CONFIG_MODVERSIONS - $(MAKE) update-modverfile -endif - scripts/mkdep -- `find $(FINDHPATH) $ -name SCCS -o -name .svn $ -prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend - scripts/mkdep -- init/*.c > .depend - -ifdef CONFIG_MODVERSIONS -MODVERFILE := $(TOPDIR)/include/linux/modversions.h -else -MODVERFILE := -endif -export MODVERFILE - -depend dep: dep-files - -checkconfig: - find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkconfig.pl - -checkhelp: - find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w scripts/checkhelp.pl - -checkincludes: - find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkincludes.pl - -ifdef CONFIGURATION -..$(CONFIGURATION): - @echo - @echo "You have a bad or nonexistent" .$(CONFIGURATION) ": running 'make" $(CONFIGURATION)"'" - @echo - $(MAKE) $(CONFIGURATION) - @echo - @echo "Successful. Try re-making (ignore the error that follows)" - @echo - exit 1 - -#dummy: ..$(CONFIGURATION) -dummy: - -else - -dummy: - -endif - -include Rules.make - -# -# This generates dependencies for the .h files. -# - -scripts/mkdep: scripts/mkdep.c - $(HOSTCC) $(HOSTCFLAGS) -o scripts/mkdep scripts/mkdep.c - -scripts/split-include: scripts/split-include.c - $(HOSTCC) $(HOSTCFLAGS) -o scripts/split-include scripts/split-include.c - -# -# RPM target -# -# If you do a make spec before packing the tarball you can rpm -ta it -# -spec: - . scripts/mkspec >kernel.spec - -# -# Build a tar ball, generate an rpm from it and pack the result -# There arw two bits of magic here -# 1) The use of /. to avoid tar packing just the symlink -# 2) Removing the .dep files as they have source paths in them that -# will become invalid -# -rpm: clean spec - find . $ -size 0 -o -name .depend -o -name .hdepend $ -type f -print | xargs rm -f - set -e; \ - cd $(TOPDIR)/.. ; \ - ln -sf $(TOPDIR) $(KERNELPATH) ; \ - tar -cvz --exclude CVS -f $(KERNELPATH).tar.gz $(KERNELPATH)/. ; \ - rm $(KERNELPATH) ; \ - cd $(TOPDIR) ; \ - . scripts/mkversion > .version ; \ - $(RPM) -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \ - rm $(TOPDIR)/../$(KERNELPATH).tar.gz diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/Makefile --- a/linux-2.4-xen-sparse/arch/xen/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,137 +0,0 @@ -# -# xen/Makefile -# -# This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1994 by Linus Torvalds -# -# 19990713 Artur Skawina <skawina@xxxxxxxxxxxxx> -# Added '-march' and '-mpreferred-stack-boundary' support -# - -# If no .config file exists then use the appropriate defconfig-* file -ifneq (.config,$(wildcard .config)) -DUMMYX:=$(shell cp $(TOPDIR)/arch/xen/defconfig$(EXTRAVERSION) $(TOPDIR)/.config) --include $(TOPDIR)/.config -endif - -LD=$(CROSS_COMPILE)ld -m elf_i386 -OBJCOPY=$(CROSS_COMPILE)objcopy -R .note -R .comment -S -LDFLAGS=-e stext -LINKFLAGS =-T $(TOPDIR)/arch/xen/vmlinux.lds $(LDFLAGS) - -CFLAGS += -pipe - -check_gcc = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi) - -# prevent gcc from keeping the stack 16 byte aligned -CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,) - -ifdef CONFIG_M686 -CFLAGS += -march=i686 -endif - -ifdef CONFIG_MPENTIUMIII -CFLAGS += -march=i686 -endif - -ifdef CONFIG_MPENTIUM4 -CFLAGS += -march=i686 -endif - -ifdef CONFIG_MK7 -CFLAGS += $(call check_gcc,-march=athlon,-march=i686 -malign-functions=4) -endif - -# Disable unit-at-a-time mode, it makes gcc use a lot more stack -# due to the lack of sharing of stacklots. -CFLAGS += $(call check_gcc,-fno-unit-at-a-time,) - -HEAD := arch/xen/kernel/head.o arch/xen/kernel/init_task.o - -SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib -SUBDIRS += arch/xen/drivers/console -SUBDIRS += arch/xen/drivers/evtchn -SUBDIRS += arch/xen/drivers/blkif -SUBDIRS += arch/xen/drivers/netif -SUBDIRS += arch/xen/drivers/balloon -ifdef CONFIG_XEN_PRIVILEGED_GUEST -SUBDIRS += arch/xen/drivers/dom0 -endif - -CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o -CORE_FILES += arch/xen/drivers/evtchn/drv.o -CORE_FILES += arch/xen/drivers/console/drv.o -DRIVERS += arch/xen/drivers/blkif/drv.o -DRIVERS += arch/xen/drivers/netif/drv.o -ifdef CONFIG_XEN_PRIVILEGED_GUEST -CORE_FILES += arch/xen/drivers/dom0/drv.o -endif -CORE_FILES += arch/xen/drivers/balloon/drv.o -LIBS := $(TOPDIR)/arch/xen/lib/lib.a $(LIBS) $(TOPDIR)/arch/xen/lib/lib.a - -arch/xen/kernel: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/kernel - -arch/xen/mm: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/mm - -arch/xen/drivers/console: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/console - -arch/xen/drivers/network: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/network - -arch/xen/drivers/block: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/block - -arch/xen/drivers/dom0: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/dom0 - -arch/xen/drivers/balloon: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/balloon - -MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot - -vmlinux: arch/xen/vmlinux.lds - -FORCE: ; - -.PHONY: bzImage compressed clean archclean archmrproper archdep - - -bzImage: vmlinux - @$(MAKEBOOT) bzImage - -INSTALL_NAME ?= $(KERNELRELEASE) -install: bzImage - mkdir -p $(INSTALL_PATH)/boot - ln -f -s vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(XENGUEST)$(INSTALL_SUFFIX) - rm -f $(INSTALL_PATH)/boot/vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) - install -m0644 arch/$(ARCH)/boot/bzImage $(INSTALL_PATH)/boot/vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) - install -m0644 vmlinux $(INSTALL_PATH)/boot/vmlinux-syms-$(INSTALL_NAME)$(INSTALL_SUFFIX) - install -m0664 .config $(INSTALL_PATH)/boot/config-$(INSTALL_NAME)$(INSTALL_SUFFIX) - install -m0664 System.map $(INSTALL_PATH)/boot/System.map-$(INSTALL_NAME)$(INSTALL_SUFFIX) - ln -f -s vmlinuz-$(INSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(XENGUEST)$(INSTALL_SUFFIX) - -%_config: arch/xen/defconfig-% - rm -f .config arch/xen/defconfig - cp -f arch/xen/defconfig-$(@:_config=) arch/xen/defconfig - cp -f arch/xen/defconfig-$(@:_config=) .config - - -archclean: - @$(MAKEBOOT) clean - -archmrproper: - rm -f include/asm-xen/xen-public/arch - -archdep: - @$(MAKEBOOT) dep diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/boot/Makefile --- a/linux-2.4-xen-sparse/arch/xen/boot/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,13 +0,0 @@ -# -# arch/xen/boot/Makefile -# - -bzImage: $(TOPDIR)/vmlinux - $(OBJCOPY) $< Image - gzip -f -9 < Image > $@ - rm -f Image - -dep: - -clean: - rm -f bzImage Image diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/config.in --- a/linux-2.4-xen-sparse/arch/xen/config.in Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,327 +0,0 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/config-language.txt. -# -mainmenu_name "Linux Kernel Configuration" - -define_bool CONFIG_XEN y - -define_bool CONFIG_X86 y -define_bool CONFIG_ISA y -define_bool CONFIG_SBUS n - -define_bool CONFIG_UID16 y - -mainmenu_option next_comment -comment 'Xen' -bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST -bool 'Device-driver domain (physical device access)' CONFIG_XEN_PHYSDEV_ACCESS -bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES -bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND -bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND -bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT -endmenu -# The IBM S/390 patch needs this. -define_bool CONFIG_NO_IDLE_HZ y - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - define_bool CONFIG_FOREIGN_PAGES y -else - define_bool CONFIG_FOREIGN_PAGES n - define_bool CONFIG_NETDEVICES y - define_bool CONFIG_VT n -fi - -mainmenu_option next_comment -comment 'Code maturity level options' -bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL -endmenu - -mainmenu_option next_comment -comment 'Loadable module support' -bool 'Enable loadable module support' CONFIG_MODULES -if [ "$CONFIG_MODULES" = "y" ]; then - bool ' Set version information on all module symbols' CONFIG_MODVERSIONS - bool ' Kernel module loader' CONFIG_KMOD -fi -endmenu - -mainmenu_option next_comment -comment 'Processor type and features' -choice 'Processor family' \ - "Pentium-Pro/Celeron/Pentium-II CONFIG_M686 \ - Pentium-III/Celeron(Coppermine) CONFIG_MPENTIUMIII \ - Pentium-4 CONFIG_MPENTIUM4 \ - Athlon/Duron/K7 CONFIG_MK7 \ - Opteron/Athlon64/Hammer/K8 CONFIG_MK8 \ - VIA-C3-2 CONFIG_MVIAC3_2" Pentium-Pro - - define_bool CONFIG_X86_WP_WORKS_OK y - define_bool CONFIG_X86_INVLPG y - define_bool CONFIG_X86_CMPXCHG y - define_bool CONFIG_X86_XADD y - define_bool CONFIG_X86_BSWAP y - define_bool CONFIG_X86_POPAD_OK y - define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n - define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y - - define_bool CONFIG_X86_GOOD_APIC y - define_bool CONFIG_X86_PGE y - define_bool CONFIG_X86_USE_PPRO_CHECKSUM y - define_bool CONFIG_X86_TSC y - -if [ "$CONFIG_M686" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 5 -fi -if [ "$CONFIG_MPENTIUMIII" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 5 -fi -if [ "$CONFIG_MPENTIUM4" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 7 -fi -if [ "$CONFIG_MK8" = "y" ]; then - define_bool CONFIG_MK7 y -fi -if [ "$CONFIG_MK7" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 6 - define_bool CONFIG_X86_USE_3DNOW y -fi -if [ "$CONFIG_MVIAC3_2" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 5 -fi - -#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -# tristate 'BIOS Enhanced Disk Drive calls determine boot disk (EXPERIMENTAL)' CONFIG_EDD -#fi - -choice 'High Memory Support' \ - "off CONFIG_NOHIGHMEM \ - 4GB CONFIG_HIGHMEM4G" off -# 64GB CONFIG_HIGHMEM64G" off -if [ "$CONFIG_HIGHMEM4G" = "y" ]; then - define_bool CONFIG_HIGHMEM y -fi -if [ "$CONFIG_HIGHMEM64G" = "y" ]; then - define_bool CONFIG_HIGHMEM y - define_bool CONFIG_X86_PAE y -fi - -if [ "$CONFIG_HIGHMEM" = "y" ]; then - bool 'HIGHMEM I/O support' CONFIG_HIGHIO -fi - -define_int CONFIG_FORCE_MAX_ZONEORDER 11 - -#bool 'Symmetric multi-processing support' CONFIG_SMP -#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then -# define_bool CONFIG_HAVE_DEC_LOCK y -#fi -endmenu - -mainmenu_option next_comment -comment 'General setup' - -bool 'Networking support' CONFIG_NET - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - bool 'PCI support' CONFIG_PCI - source drivers/pci/Config.in - - bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG - - if [ "$CONFIG_HOTPLUG" = "y" ] ; then - source drivers/pcmcia/Config.in - source drivers/hotplug/Config.in - else - define_bool CONFIG_PCMCIA n - define_bool CONFIG_HOTPLUG_PCI n - fi -fi - -bool 'System V IPC' CONFIG_SYSVIPC -bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT -bool 'Sysctl support' CONFIG_SYSCTL -if [ "$CONFIG_PROC_FS" = "y" ]; then - choice 'Kernel core (/proc/kcore) format' \ - "ELF CONFIG_KCORE_ELF \ - A.OUT CONFIG_KCORE_AOUT" ELF -fi -tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF -tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC -bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER - -endmenu - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - source drivers/mtd/Config.in - - source drivers/parport/Config.in - - source drivers/pnp/Config.in - - source drivers/block/Config.in - - source drivers/md/Config.in -fi - -if [ "$CONFIG_NET" = "y" ]; then - source net/Config.in -fi - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - mainmenu_option next_comment - comment 'ATA/IDE/MFM/RLL support' - - tristate 'ATA/IDE/MFM/RLL support' CONFIG_IDE - - if [ "$CONFIG_IDE" != "n" ]; then - source drivers/ide/Config.in - else - define_bool CONFIG_BLK_DEV_HD n - fi - endmenu -fi - -mainmenu_option next_comment -comment 'SCSI support' - -tristate 'SCSI support' CONFIG_SCSI - -if [ "$CONFIG_SCSI" != "n" ]; then - source drivers/scsi/Config.in -fi -endmenu - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - source drivers/message/fusion/Config.in - - source drivers/ieee1394/Config.in - - source drivers/message/i2o/Config.in - - if [ "$CONFIG_NET" = "y" ]; then - mainmenu_option next_comment - comment 'Network device support' - - bool 'Network device support' CONFIG_NETDEVICES - if [ "$CONFIG_NETDEVICES" = "y" ]; then - source drivers/net/Config.in - if [ "$CONFIG_ATM" = "y" -o "$CONFIG_ATM" = "m" ]; then - source drivers/atm/Config.in - fi - fi - endmenu - fi - - source net/ax25/Config.in - - source net/irda/Config.in - - mainmenu_option next_comment - comment 'ISDN subsystem' - if [ "$CONFIG_NET" != "n" ]; then - tristate 'ISDN support' CONFIG_ISDN - if [ "$CONFIG_ISDN" != "n" ]; then - source drivers/isdn/Config.in - fi - fi - endmenu - - if [ "$CONFIG_ISA" = "y" ]; then - mainmenu_option next_comment - comment 'Old CD-ROM drivers (not SCSI, not IDE)' - - bool 'Support non-SCSI/IDE/ATAPI CDROM drives' CONFIG_CD_NO_IDESCSI - if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then - source drivers/cdrom/Config.in - fi - endmenu - fi - - # - # input before char - char/joystick depends on it. As does USB. - # - source drivers/input/Config.in -else - # - # Block device driver configuration - # - mainmenu_option next_comment - comment 'Block devices' - tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP - dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET - tristate 'RAM disk support' CONFIG_BLK_DEV_RAM - if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then - int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 - fi - dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM - bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS - define_bool CONFIG_BLK_DEV_HD n - endmenu -fi - -source drivers/char/Config.in - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - source drivers/media/Config.in -fi - -source fs/Config.in - -mainmenu_option next_comment -comment 'Console drivers' - -define_bool CONFIG_XEN_CONSOLE y - -if [ "$CONFIG_VT" = "y" ]; then - bool 'VGA text console' CONFIG_VGA_CONSOLE - bool 'Dummy console' CONFIG_DUMMY_CONSOLE - if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - bool 'Video mode selection support' CONFIG_VIDEO_SELECT - if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - tristate 'MDA text console (dual-headed) (EXPERIMENTAL)' CONFIG_MDA_CONSOLE - source drivers/video/Config.in - fi - fi -fi -endmenu - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - mainmenu_option next_comment - comment 'Sound' - - tristate 'Sound card support' CONFIG_SOUND - if [ "$CONFIG_SOUND" != "n" ]; then - source drivers/sound/Config.in - fi - endmenu -fi - -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then - source drivers/usb/Config.in - source net/bluetooth/Config.in -fi - -mainmenu_option next_comment -comment 'Kernel hacking' - -bool 'Kernel debugging' CONFIG_DEBUG_KERNEL -if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then - bool ' Check for stack overflows' CONFIG_DEBUG_STACKOVERFLOW - bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM - bool ' Debug memory allocations' CONFIG_DEBUG_SLAB - bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT - bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ - bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK - bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE - bool ' Load all symbols for debugging' CONFIG_KALLSYMS - bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER -fi - -int 'Kernel messages buffer length shift (0 = default)' CONFIG_LOG_BUF_SHIFT 0 - -endmenu - -source crypto/Config.in -source lib/Config.in diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/defconfig-xen0 --- a/linux-2.4-xen-sparse/arch/xen/defconfig-xen0 Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,927 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_XEN=y -CONFIG_X86=y -CONFIG_ISA=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Xen -# -CONFIG_XEN_PRIVILEGED_GUEST=y -CONFIG_XEN_PHYSDEV_ACCESS=y -# CONFIG_XEN_USB_BACKEND is not set -CONFIG_XEN_SCRUB_PAGES=y -CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_BLKDEV_FRONTEND=y -CONFIG_XEN_BLKDEV_GRANT=y -# CONFIG_XEN_USB_FRONTEND is not set -CONFIG_NO_IDLE_HZ=y -CONFIG_FOREIGN_PAGES=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MVIAC3_2 is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_TSC=y -CONFIG_X86_L1_CACHE_SHIFT=5 -CONFIG_NOHIGHMEM=y -# CONFIG_HIGHMEM4G is not set -CONFIG_FORCE_MAX_ZONEORDER=11 - -# -# General setup -# -CONFIG_NET=y -CONFIG_PCI=y -CONFIG_PCI_NAMES=y -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -# CONFIG_PCMCIA is not set - -# -# PCI Hotplug Support -# -# CONFIG_HOTPLUG_PCI is not set -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_SHPC is not set -# CONFIG_HOTPLUG_PCI_SHPC_POLL_EVENT_MODE is not set -# CONFIG_HOTPLUG_PCI_PCIE is not set -# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_OOM_KILLER is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Plug and Play configuration -# -CONFIG_PNP=y -# CONFIG_ISAPNP is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_XD is not set -# CONFIG_PARIDE is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_CISS_SCSI_TAPE is not set -# CONFIG_CISS_MONITOR_THREAD is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -# CONFIG_BLK_DEV_SX8 is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_BLK_STATS is not set - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=y -# CONFIG_MD_RAID0 is not set -CONFIG_MD_RAID1=y -# CONFIG_MD_RAID5 is not set -# CONFIG_MD_MULTIPATH is not set -CONFIG_BLK_DEV_LVM=y - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -# CONFIG_SYN_COOKIES is not set - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -# CONFIG_IP_NF_AMANDA is not set -CONFIG_IP_NF_TFTP=m -CONFIG_IP_NF_IRC=m -# CONFIG_IP_NF_QUEUE is not set -CONFIG_IP_NF_IPTABLES=y -# CONFIG_IP_NF_MATCH_LIMIT is not set -# CONFIG_IP_NF_MATCH_MAC is not set -# CONFIG_IP_NF_MATCH_PKTTYPE is not set -# CONFIG_IP_NF_MATCH_MARK is not set -# CONFIG_IP_NF_MATCH_MULTIPORT is not set -# CONFIG_IP_NF_MATCH_TOS is not set -# CONFIG_IP_NF_MATCH_RECENT is not set -# CONFIG_IP_NF_MATCH_ECN is not set -# CONFIG_IP_NF_MATCH_DSCP is not set -# CONFIG_IP_NF_MATCH_AH_ESP is not set -# CONFIG_IP_NF_MATCH_LENGTH is not set -# CONFIG_IP_NF_MATCH_TTL is not set -# CONFIG_IP_NF_MATCH_TCPMSS is not set -# CONFIG_IP_NF_MATCH_HELPER is not set -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -# CONFIG_IP_NF_MATCH_UNCLEAN is not set -# CONFIG_IP_NF_MATCH_OWNER is not set -CONFIG_IP_NF_MATCH_PHYSDEV=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_REJECT=y -# CONFIG_IP_NF_TARGET_MIRROR is not set -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -# CONFIG_IP_NF_NAT_SNMP_BASIC is not set -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -# CONFIG_IP_NF_MANGLE is not set -CONFIG_IP_NF_TARGET_LOG=y -CONFIG_IP_NF_TARGET_ULOG=y -# CONFIG_IP_NF_TARGET_TCPMSS is not set -# CONFIG_IP_NF_ARPTABLES is not set - -# -# IP: Virtual Server Configuration -# -# CONFIG_IP_VS is not set -# CONFIG_IPV6 is not set -# CONFIG_KHTTPD is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -CONFIG_VLAN_8021Q=y -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_DECNET is not set -CONFIG_BRIDGE=y -CONFIG_BRIDGE_NF_EBTABLES=m -CONFIG_BRIDGE_EBT_T_FILTER=m -CONFIG_BRIDGE_EBT_T_NAT=m -CONFIG_BRIDGE_EBT_BROUTE=m -CONFIG_BRIDGE_EBT_LOG=m -CONFIG_BRIDGE_EBT_IPF=m -CONFIG_BRIDGE_EBT_ARPF=m -CONFIG_BRIDGE_EBT_AMONG=m -CONFIG_BRIDGE_EBT_LIMIT=m -CONFIG_BRIDGE_EBT_VLANF=m -CONFIG_BRIDGE_EBT_802_3=m -CONFIG_BRIDGE_EBT_PKTTYPE=m -CONFIG_BRIDGE_EBT_STP=m -CONFIG_BRIDGE_EBT_MARKF=m -CONFIG_BRIDGE_EBT_ARPREPLY=m -CONFIG_BRIDGE_EBT_SNAT=m -CONFIG_BRIDGE_EBT_DNAT=m -CONFIG_BRIDGE_EBT_REDIRECT=m -CONFIG_BRIDGE_EBT_MARK_T=m -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -# CONFIG_BLK_DEV_IDE_SATA is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_IDEDISK_STROKE=y -# CONFIG_BLK_DEV_IDECS is not set -# CONFIG_BLK_DEV_DELKIN is not set -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_IDETAPE=y -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=y -CONFIG_IDE_TASK_IOCTL=y -CONFIG_BLK_DEV_CMD640=y -CONFIG_BLK_DEV_CMD640_ENHANCED=y -# CONFIG_BLK_DEV_ISAPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -CONFIG_BLK_DEV_OFFBOARD=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -CONFIG_WDC_ALI15X3=y -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_AMD74XX_OVERRIDE=y -# CONFIG_BLK_DEV_ATIIXP is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_BLK_DEV_NS87415=y -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -CONFIG_PDC202XX_BURST=y -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -CONFIG_BLK_DEV_SC1200=y -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -CONFIG_BLK_DEV_TRM290=y -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_IDE_CHIPSETS=y -# CONFIG_BLK_DEV_4DRIVES is not set -# CONFIG_BLK_DEV_ALI14XX is not set -# CONFIG_BLK_DEV_DTC2278 is not set -# CONFIG_BLK_DEV_HT6560B is not set -# CONFIG_BLK_DEV_PDC4030 is not set -# CONFIG_BLK_DEV_QD65XX is not set -# CONFIG_BLK_DEV_UMC8672 is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -# CONFIG_BLK_DEV_ATARAID is not set -# CONFIG_BLK_DEV_ATARAID_PDC is not set -# CONFIG_BLK_DEV_ATARAID_HPT is not set -# CONFIG_BLK_DEV_ATARAID_MEDLEY is not set -# CONFIG_BLK_DEV_ATARAID_SII is not set - -# -# SCSI support -# -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -# CONFIG_CHR_DEV_ST is not set -# CONFIG_CHR_DEV_OSST is not set -# CONFIG_BLK_DEV_SR is not set -CONFIG_CHR_DEV_SG=y -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -# CONFIG_SCSI_CONSTANTS is not set -# CONFIG_SCSI_LOGGING is not set - -# -# SCSI low-level drivers -# -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=y -CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=y -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -# CONFIG_AIC79XX_ENABLE_RD_STRM is not set -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -CONFIG_SCSI_MEGARAID=y -# CONFIG_SCSI_MEGARAID2 is not set -CONFIG_SCSI_SATA=y -# CONFIG_SCSI_SATA_AHCI is not set -# CONFIG_SCSI_SATA_SVW is not set -CONFIG_SCSI_ATA_PIIX=y -# CONFIG_SCSI_SATA_NV is not set -# CONFIG_SCSI_SATA_QSTOR is not set -CONFIG_SCSI_SATA_PROMISE=y -CONFIG_SCSI_SATA_SX4=y -CONFIG_SCSI_SATA_SIL=y -CONFIG_SCSI_SATA_SIS=y -# CONFIG_SCSI_SATA_ULI is not set -CONFIG_SCSI_SATA_VIA=y -CONFIG_SCSI_SATA_VITESSE=y -CONFIG_SCSI_BUSLOGIC=y -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_QLOGIC_ISP is not set -# CONFIG_SCSI_QLOGIC_FC is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_SEAGATE is not set -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -# CONFIG_SCSI_ULTRASTOR is not set -# CONFIG_SCSI_NSP32 is not set -# CONFIG_SCSI_DEBUG is not set - -# -# Fusion MPT device support -# -# CONFIG_FUSION is not set -# CONFIG_FUSION_BOOT is not set -# CONFIG_FUSION_ISENSE is not set -# CONFIG_FUSION_CTL is not set -# CONFIG_FUSION_LAN is not set - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set -# CONFIG_I2O_PCI is not set -# CONFIG_I2O_BLOCK is not set -# CONFIG_I2O_LAN is not set -# CONFIG_I2O_SCSI is not set -# CONFIG_I2O_PROC is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -# CONFIG_SUNGEM is not set -CONFIG_NET_VENDOR_3COM=y -# CONFIG_EL1 is not set -# CONFIG_EL2 is not set -# CONFIG_ELPLUS is not set -# CONFIG_EL16 is not set -# CONFIG_EL3 is not set -# CONFIG_3C515 is not set -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=y -# CONFIG_TYPHOON is not set -# CONFIG_LANCE is not set -# CONFIG_NET_VENDOR_SMC is not set -# CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_AT1700 is not set -# CONFIG_DEPCA is not set -# CONFIG_HP100 is not set -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -CONFIG_PCNET32=y -# CONFIG_AMD8111_ETH is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_AC3200 is not set -# CONFIG_APRICOT is not set -# CONFIG_B44 is not set -# CONFIG_CS89x0 is not set -# CONFIG_TULIP is not set -# CONFIG_DE4X5 is not set -# CONFIG_DGRS is not set -# CONFIG_DM9102 is not set -# CONFIG_EEPRO100 is not set -# CONFIG_EEPRO100_PIO is not set -CONFIG_E100=y -# CONFIG_LNE390 is not set -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set -CONFIG_NE2K_PCI=y -# CONFIG_FORCEDETH is not set -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_OLD_RX_RESET is not set -# CONFIG_SIS900 is not set -# CONFIG_EPIC100 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_SUNDANCE_MMIO is not set -# CONFIG_TLAN is not set -# CONFIG_VIA_RHINE is not set -# CONFIG_VIA_RHINE_MMIO is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_NET_POCKET is not set - -# -# Ethernet (1000 Mbit) -# -# CONFIG_ACENIC is not set -# CONFIG_DL2K is not set -CONFIG_E1000=y -# CONFIG_E1000_NAPI is not set -# CONFIG_MYRI_SBUS is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set -# CONFIG_SK98LIN is not set -CONFIG_TIGON3=y -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_PLIP is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices -# -# CONFIG_TR is not set -# CONFIG_NET_FC is not set -# CONFIG_RCPCI is not set -# CONFIG_SHAPER is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# IrDA (infrared) support -# -# CONFIG_IRDA is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -# CONFIG_INPUT is not set -# CONFIG_INPUT_KEYBDEV is not set -# CONFIG_INPUT_MOUSEDEV is not set -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_EVDEV is not set -# CONFIG_INPUT_UINPUT is not set - -# -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -# CONFIG_SERIAL is not set -# CONFIG_SERIAL_EXTENDED is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -# CONFIG_MK712_MOUSE is not set - -# -# Joysticks -# -# CONFIG_INPUT_GAMEPORT is not set -# CONFIG_QIC02_TAPE is not set -# CONFIG_IPMI_HANDLER is not set -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_DEVICE_INTERFACE is not set -# CONFIG_IPMI_KCS is not set -# CONFIG_IPMI_WATCHDOG is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_SCx200 is not set -# CONFIG_SCx200_GPIO is not set -# CONFIG_AMD_RNG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set - -# -# Direct Rendering Manager (XFree86 DRI support) -# -# CONFIG_DRM is not set -# CONFIG_MWAVE is not set -# CONFIG_OBMOUSE is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# File systems -# -# CONFIG_QUOTA is not set -# CONFIG_QFMT_V2 is not set -CONFIG_AUTOFS_FS=y -CONFIG_AUTOFS4_FS=y -# CONFIG_REISERFS_FS is not set -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y -CONFIG_UMSDOS_FS=y -CONFIG_VFAT_FS=y -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -# CONFIG_CRAMFS is not set -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set -# CONFIG_XFS_FS is not set -# CONFIG_XFS_QUOTA is not set -# CONFIG_XFS_RT is not set -# CONFIG_XFS_TRACE is not set -# CONFIG_XFS_DEBUG is not set - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_DIRECTIO is not set -CONFIG_ROOT_NFS=y -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -# CONFIG_SMB_FS is not set -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -CONFIG_ZISOFS_FS=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_EFI_PARTITION is not set -# CONFIG_SMB_NLS is not set -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8559-1" -# CONFIG_NLS_CODEPAGE_437 is not set -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set -CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_UTF8 is not set - -# -# Console drivers -# -CONFIG_XEN_CONSOLE=y -CONFIG_VGA_CONSOLE=y -CONFIG_DUMMY_CONSOLE=y -# CONFIG_VIDEO_SELECT is not set -# CONFIG_MDA_CONSOLE is not set - -# -# Frame-buffer support -# -# CONFIG_FB is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB is not set - -# -# Support for USB gadgets -# -# CONFIG_USB_GADGET is not set - -# -# Bluetooth support -# -# CONFIG_BLUEZ is not set - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_STACKOVERFLOW is not set -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_KALLSYMS=y -# CONFIG_FRAME_POINTER is not set -CONFIG_LOG_BUF_SHIFT=0 - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=m -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -# CONFIG_CRYPTO_WP512 is not set -CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_DEFLATE=m -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_TEST is not set - -# -# Library routines -# -# CONFIG_CRC32 is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -# CONFIG_FW_LOADER is not set diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/defconfig-xenU --- a/linux-2.4-xen-sparse/arch/xen/defconfig-xenU Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,562 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_XEN=y -CONFIG_X86=y -CONFIG_ISA=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Xen -# -# CONFIG_XEN_PRIVILEGED_GUEST is not set -# CONFIG_XEN_PHYSDEV_ACCESS is not set -CONFIG_XEN_SCRUB_PAGES=y -CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_BLKDEV_FRONTEND=y -CONFIG_XEN_BLKDEV_GRANT=y -# CONFIG_XEN_USB_FRONTEND is not set -CONFIG_NO_IDLE_HZ=y -# CONFIG_FOREIGN_PAGES is not set -CONFIG_NETDEVICES=y -# CONFIG_VT is not set - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MVIAC3_2 is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_TSC=y -CONFIG_X86_L1_CACHE_SHIFT=5 -CONFIG_NOHIGHMEM=y -# CONFIG_HIGHMEM4G is not set -CONFIG_FORCE_MAX_ZONEORDER=11 - -# -# General setup -# -CONFIG_NET=y -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_OOM_KILLER is not set - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -# CONFIG_SYN_COOKIES is not set - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=y -CONFIG_IP_NF_FTP=y -# CONFIG_IP_NF_AMANDA is not set -CONFIG_IP_NF_TFTP=y -CONFIG_IP_NF_IRC=y -# CONFIG_IP_NF_QUEUE is not set -CONFIG_IP_NF_IPTABLES=y -# CONFIG_IP_NF_MATCH_LIMIT is not set -# CONFIG_IP_NF_MATCH_MAC is not set -# CONFIG_IP_NF_MATCH_PKTTYPE is not set -# CONFIG_IP_NF_MATCH_MARK is not set -# CONFIG_IP_NF_MATCH_MULTIPORT is not set -# CONFIG_IP_NF_MATCH_TOS is not set -# CONFIG_IP_NF_MATCH_RECENT is not set -# CONFIG_IP_NF_MATCH_ECN is not set -# CONFIG_IP_NF_MATCH_DSCP is not set -# CONFIG_IP_NF_MATCH_AH_ESP is not set -# CONFIG_IP_NF_MATCH_LENGTH is not set -# CONFIG_IP_NF_MATCH_TTL is not set -# CONFIG_IP_NF_MATCH_TCPMSS is not set -# CONFIG_IP_NF_MATCH_HELPER is not set -CONFIG_IP_NF_MATCH_STATE=y -CONFIG_IP_NF_MATCH_CONNTRACK=y -# CONFIG_IP_NF_MATCH_UNCLEAN is not set -# CONFIG_IP_NF_MATCH_OWNER is not set -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_REJECT=y -# CONFIG_IP_NF_TARGET_MIRROR is not set -CONFIG_IP_NF_NAT=y -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=y -CONFIG_IP_NF_TARGET_REDIRECT=y -# CONFIG_IP_NF_NAT_SNMP_BASIC is not set -CONFIG_IP_NF_NAT_IRC=y -CONFIG_IP_NF_NAT_FTP=y -CONFIG_IP_NF_NAT_TFTP=y -# CONFIG_IP_NF_MANGLE is not set -CONFIG_IP_NF_TARGET_LOG=y -CONFIG_IP_NF_TARGET_ULOG=y -# CONFIG_IP_NF_TARGET_TCPMSS is not set -# CONFIG_IP_NF_ARPTABLES is not set - -# -# IP: Virtual Server Configuration -# -# CONFIG_IP_VS is not set -# CONFIG_IPV6 is not set -# CONFIG_KHTTPD is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -CONFIG_VLAN_8021Q=y - -# -# -# -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# SCSI support -# -CONFIG_SCSI=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -# CONFIG_CHR_DEV_ST is not set -# CONFIG_CHR_DEV_OSST is not set -# CONFIG_BLK_DEV_SR is not set -CONFIG_CHR_DEV_SG=y - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -# CONFIG_SCSI_CONSTANTS is not set -# CONFIG_SCSI_LOGGING is not set - -# -# SCSI low-level drivers -# -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -# CONFIG_SCSI_AACRAID is not set -# CONFIG_SCSI_AIC7XXX is not set -# CONFIG_SCSI_AIC79XX is not set -# CONFIG_SCSI_AIC7XXX_OLD is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -# CONFIG_SCSI_MEGARAID is not set -# CONFIG_SCSI_MEGARAID2 is not set -# CONFIG_SCSI_SATA is not set -# CONFIG_SCSI_SATA_AHCI is not set -# CONFIG_SCSI_SATA_SVW is not set -# CONFIG_SCSI_ATA_PIIX is not set -# CONFIG_SCSI_SATA_NV is not set -# CONFIG_SCSI_SATA_QSTOR is not set -# CONFIG_SCSI_SATA_PROMISE is not set -# CONFIG_SCSI_SATA_SX4 is not set -# CONFIG_SCSI_SATA_SIL is not set -# CONFIG_SCSI_SATA_SIS is not set -# CONFIG_SCSI_SATA_ULI is not set -# CONFIG_SCSI_SATA_VIA is not set -# CONFIG_SCSI_SATA_VITESSE is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_PPA is not set -# CONFIG_SCSI_IMM is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_SEAGATE is not set -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -# CONFIG_SCSI_ULTRASTOR is not set -# CONFIG_SCSI_NSP32 is not set -# CONFIG_SCSI_DEBUG is not set - -# -# Block devices -# -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_BLK_STATS is not set -# CONFIG_BLK_DEV_HD is not set - -# -# Character devices -# -# CONFIG_VT is not set -# CONFIG_SERIAL is not set -# CONFIG_SERIAL_EXTENDED is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -# CONFIG_PRINTER is not set -# CONFIG_PPDEV is not set -# CONFIG_TIPAR is not set - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -# CONFIG_MK712_MOUSE is not set - -# -# Joysticks -# -# CONFIG_INPUT_GAMEPORT is not set -# CONFIG_INPUT_NS558 is not set -# CONFIG_INPUT_LIGHTNING is not set -# CONFIG_INPUT_PCIGAME is not set -# CONFIG_INPUT_CS461X is not set -# CONFIG_INPUT_EMU10K1 is not set -# CONFIG_INPUT_SERIO is not set -# CONFIG_INPUT_SERPORT is not set - -# -# Joysticks -# -# CONFIG_INPUT_ANALOG is not set -# CONFIG_INPUT_A3D is not set -# CONFIG_INPUT_ADI is not set -# CONFIG_INPUT_COBRA is not set -# CONFIG_INPUT_GF2K is not set -# CONFIG_INPUT_GRIP is not set -# CONFIG_INPUT_INTERACT is not set -# CONFIG_INPUT_TMDC is not set -# CONFIG_INPUT_SIDEWINDER is not set -# CONFIG_INPUT_IFORCE_USB is not set -# CONFIG_INPUT_IFORCE_232 is not set -# CONFIG_INPUT_WARRIOR is not set -# CONFIG_INPUT_MAGELLAN is not set -# CONFIG_INPUT_SPACEORB is not set -# CONFIG_INPUT_SPACEBALL is not set -# CONFIG_INPUT_STINGER is not set -# CONFIG_INPUT_DB9 is not set -# CONFIG_INPUT_GAMECON is not set -# CONFIG_INPUT_TURBOGRAFX is not set -# CONFIG_QIC02_TAPE is not set -# CONFIG_IPMI_HANDLER is not set -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_DEVICE_INTERFACE is not set -# CONFIG_IPMI_KCS is not set -# CONFIG_IPMI_WATCHDOG is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_SCx200 is not set -# CONFIG_SCx200_GPIO is not set -# CONFIG_AMD_RNG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set - -# -# Direct Rendering Manager (XFree86 DRI support) -# -# CONFIG_DRM is not set -# CONFIG_MWAVE is not set -# CONFIG_OBMOUSE is not set - -# -# File systems -# -# CONFIG_QUOTA is not set -# CONFIG_QFMT_V2 is not set -CONFIG_AUTOFS_FS=y -CONFIG_AUTOFS4_FS=y -# CONFIG_REISERFS_FS is not set -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y -CONFIG_UMSDOS_FS=y -CONFIG_VFAT_FS=y -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -# CONFIG_CRAMFS is not set -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set -# CONFIG_XFS_FS is not set -# CONFIG_XFS_QUOTA is not set -# CONFIG_XFS_RT is not set -# CONFIG_XFS_TRACE is not set -# CONFIG_XFS_DEBUG is not set - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_DIRECTIO is not set -CONFIG_ROOT_NFS=y -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -# CONFIG_SMB_FS is not set -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -CONFIG_ZISOFS_FS=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_EFI_PARTITION is not set -# CONFIG_SMB_NLS is not set -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8559-1" -# CONFIG_NLS_CODEPAGE_437 is not set -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set -CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_UTF8 is not set - -# -# Console drivers -# -CONFIG_XEN_CONSOLE=y - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_STACKOVERFLOW is not set -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_KALLSYMS=y -# CONFIG_FRAME_POINTER is not set -CONFIG_LOG_BUF_SHIFT=0 - -# -# Cryptographic options -# -# CONFIG_CRYPTO is not set - -# -# Library routines -# -# CONFIG_CRC32 is not set -CONFIG_ZLIB_INFLATE=y -# CONFIG_ZLIB_DEFLATE is not set diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/balloon/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/balloon/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,4 +0,0 @@ -O_TARGET := drv.o -export-objs := balloon.o -obj-y := balloon.o -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/blkif/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,10 +0,0 @@ - -O_TARGET := drv.o - -subdir-$(CONFIG_XEN_BLKDEV_FRONTEND) += frontend -obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += frontend/drv.o - -subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend -obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o - -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/blkif/backend/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/backend/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := main.o control.o interface.o vbd.o -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := blkfront.o vbd.o -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/common.h --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/common.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,93 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/blkif/frontend/common.h - * - * Shared definitions between all levels of XenoLinux Virtual block devices. - */ - -#ifndef __XEN_DRIVERS_COMMON_H__ -#define __XEN_DRIVERS_COMMON_H__ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/hdreg.h> -#include <linux/blkdev.h> -#include <linux/major.h> -#include <asm-xen/xen-public/xen.h> -#include <asm/io.h> -#include <asm/atomic.h> -#include <asm/uaccess.h> -#include <asm-xen/xen-public/io/blkif.h> - -#if 1 -#define IPRINTK(fmt, args...) \ - printk(KERN_INFO "xen_blk: " fmt, ##args) -#else -#define IPRINTK(fmt, args...) ((void)0) -#endif - -#if 1 -#define WPRINTK(fmt, args...) \ - printk(KERN_WARNING "xen_blk: " fmt, ##args) -#else -#define WPRINTK(fmt, args...) ((void)0) -#endif - -#if 0 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) -#else -#define DPRINTK(_f, _a...) ((void)0) -#endif - -#if 0 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) -#else -#define DPRINTK_IOCTL(_f, _a...) ((void)0) -#endif - -/* Private gendisk->flags[] values. */ -#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */ -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ - -/* - * We have one of these per vbd, whether ide, scsi or 'other'. - * They hang in an array off the gendisk structure. We may end up putting - * all kinds of interesting stuff here :-) - */ -typedef struct xl_disk { - int usage; -} xl_disk_t; - -extern int blkif_open(struct inode *inode, struct file *filep); -extern int blkif_release(struct inode *inode, struct file *filep); -extern int blkif_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument); -extern int blkif_check(kdev_t dev); -extern int blkif_revalidate(kdev_t dev); -extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); -extern void do_blkif_request (request_queue_t *rq); - -extern void xlvbd_update_vbds(void); - -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) -{ - struct gendisk *gd = get_gendisk(xldev); - - if ( gd == NULL ) - return NULL; - - return (xl_disk_t *)gd->real_devices + - (MINOR(xldev) >> gd->minor_shift); -} - - -/* Virtual block-device subsystem. */ -extern int xlvbd_init(void); -extern void xlvbd_cleanup(void); - -#endif /* __XEN_DRIVERS_COMMON_H__ */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c --- a/linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,540 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/blkif/frontend/vbd.c - * - * Xenolinux virtual block-device driver. - * - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge - */ - -#include "common.h" -#include <linux/blk.h> - -/* - * For convenience we distinguish between ide, scsi and 'other' (i.e. - * potentially combinations of the two) in the naming scheme and in a few - * other places (like default readahead, etc). - */ -#define XLIDE_MAJOR_NAME "hd" -#define XLSCSI_MAJOR_NAME "sd" -#define XLVBD_MAJOR_NAME "xvd" - -#define XLIDE_DEVS_PER_MAJOR 2 -#define XLSCSI_DEVS_PER_MAJOR 16 -#define XLVBD_DEVS_PER_MAJOR 16 - -#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ -#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */ - -#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ -#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */ - -#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ -#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ - -/* The below are for the generic drivers/block/ll_rw_block.c code. */ -static int xlide_blksize_size[256]; -static int xlide_hardsect_size[256]; -static int xlide_max_sectors[256]; -static int xlscsi_blksize_size[256]; -static int xlscsi_hardsect_size[256]; -static int xlscsi_max_sectors[256]; -static int xlvbd_blksize_size[256]; -static int xlvbd_hardsect_size[256]; -static int xlvbd_max_sectors[256]; - -/* Information about our VBDs. */ -#define MAX_VBDS 64 -static int nr_vbds; -static vdisk_t *vbd_info; - -static struct block_device_operations xlvbd_block_fops = -{ - open: blkif_open, - release: blkif_release, - ioctl: blkif_ioctl, - check_media_change: blkif_check, - revalidate: blkif_revalidate, -}; - -static int xlvbd_get_vbd_info(vdisk_t *disk_info) -{ - vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL); - blkif_request_t req; - blkif_response_t rsp; - int nr; - - memset(&req, 0, sizeof(req)); - req.operation = BLKIF_OP_PROBE; - req.nr_segments = 1; -#ifdef CONFIG_XEN_BLKDEV_GRANT - blkif_control_probe_send(&req, &rsp, - (unsigned long)(virt_to_machine(buf))); -#else - req.frame_and_sects[0] = virt_to_machine(buf) | 7; - - blkif_control_send(&req, &rsp); -#endif - - if ( rsp.status <= 0 ) - { - printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status); - return -1; - } - - if ( (nr = rsp.status) > MAX_VBDS ) - nr = MAX_VBDS; - memcpy(disk_info, buf, nr * sizeof(vdisk_t)); - - return nr; -} - -/* - * xlvbd_init_device - initialise a VBD device - * @disk: a vdisk_t describing the VBD - * - * Takes a vdisk_t * that describes a VBD the domain has access to. - * Performs appropriate initialisation and registration of the device. - * - * Care needs to be taken when making re-entrant calls to ensure that - * corruption does not occur. Also, devices that are in use should not have - * their details updated. This is the caller's responsibility. - */ -static int xlvbd_init_device(vdisk_t *xd) -{ - int device = xd->device; - int major = MAJOR(device); - int minor = MINOR(device); - int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ - int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ - char *major_name; - struct gendisk *gd; - struct block_device *bd; - xl_disk_t *disk; - int i, rc = 0, max_part, partno; - unsigned long capacity; - - unsigned char buf[64]; - - if ( (bd = bdget(device)) == NULL ) - return -1; - - if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) ) - { - printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device); - rc = -1; - goto out; - } - - if ( is_ide ) { - - major_name = XLIDE_MAJOR_NAME; - max_part = XLIDE_MAX_PART; - - } else if ( is_scsi ) { - - major_name = XLSCSI_MAJOR_NAME; - max_part = XLSCSI_MAX_PART; - - } else { - - /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */ - printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", - major, minor); - is_scsi = 1; - major_name = "cciss"; - max_part = XLSCSI_MAX_PART; - - } - - partno = minor & (max_part - 1); - - if ( (gd = get_gendisk(device)) == NULL ) - { - rc = register_blkdev(major, major_name, &xlvbd_block_fops); - if ( rc < 0 ) - { - printk(KERN_ALERT "XL VBD: can't get major %d\n", major); - goto out; - } - - if ( is_ide ) - { - blksize_size[major] = xlide_blksize_size; - hardsect_size[major] = xlide_hardsect_size; - max_sectors[major] = xlide_max_sectors; - read_ahead[major] = 8; - } - else if ( is_scsi ) - { - blksize_size[major] = xlscsi_blksize_size; - hardsect_size[major] = xlscsi_hardsect_size; - max_sectors[major] = xlscsi_max_sectors; - read_ahead[major] = 8; - } - else - { - blksize_size[major] = xlvbd_blksize_size; - hardsect_size[major] = xlvbd_hardsect_size; - max_sectors[major] = xlvbd_max_sectors; - read_ahead[major] = 8; - } - - blk_init_queue(BLK_DEFAULT_QUEUE(major), do_blkif_request); - - /* - * Turn off barking 'headactive' mode. We dequeue buffer heads as - * soon as we pass them to the back-end driver. - */ - blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); - - /* Construct an appropriate gendisk structure. */ - gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); - gd->major = major; - gd->major_name = major_name; - - gd->max_p = max_part; - if ( is_ide ) - { - gd->minor_shift = XLIDE_PARTN_SHIFT; - gd->nr_real = XLIDE_DEVS_PER_MAJOR; - } - else if ( is_scsi ) - { - gd->minor_shift = XLSCSI_PARTN_SHIFT; - gd->nr_real = XLSCSI_DEVS_PER_MAJOR; - } - else - { - gd->minor_shift = XLVBD_PARTN_SHIFT; - gd->nr_real = XLVBD_DEVS_PER_MAJOR; - } - - /* - ** The sizes[] and part[] arrays hold the sizes and other - ** information about every partition with this 'major' (i.e. - ** every disk sharing the 8 bit prefix * max partns per disk) - */ - gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); - gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), - GFP_KERNEL); - memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); - memset(gd->part, 0, max_part * gd->nr_real - * sizeof(struct hd_struct)); - - - gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), - GFP_KERNEL); - memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); - - gd->next = NULL; - gd->fops = &xlvbd_block_fops; - - gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), - GFP_KERNEL); - gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); - - memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); - memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); - - add_gendisk(gd); - - blk_size[major] = gd->sizes; - } - - if ( xd->info & VDISK_READONLY ) - set_device_ro(device, 1); - - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN; - - /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */ - capacity = (unsigned long)xd->capacity; - - if ( partno != 0 ) - { - /* - * If this was previously set up as a real disc we will have set - * up partition-table information. Virtual partitions override - * 'real' partitions, and the two cannot coexist on a device. - */ - if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && - (gd->sizes[minor & ~(max_part-1)] != 0) ) - { - /* - * Any non-zero sub-partition entries must be cleaned out before - * installing 'virtual' partition entries. The two types cannot - * coexist, and virtual partitions are favoured. - */ - kdev_t dev = device & ~(max_part-1); - for ( i = max_part - 1; i > 0; i-- ) - { - invalidate_device(dev+i, 1); - gd->part[MINOR(dev+i)].start_sect = 0; - gd->part[MINOR(dev+i)].nr_sects = 0; - gd->sizes[MINOR(dev+i)] = 0; - } - printk(KERN_ALERT - "Virtual partitions found for /dev/%s - ignoring any " - "real partition information we may have found.\n", - disk_name(gd, MINOR(device), buf)); - } - - /* Need to skankily setup 'partition' information */ - gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = capacity; - gd->sizes[minor] = capacity >>(BLOCK_SIZE_BITS-9); - - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; - } - else - { - gd->part[minor].nr_sects = capacity; - gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9); - - /* Some final fix-ups depending on the device type */ - if ( xd->info & VDISK_REMOVABLE ) - { - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; - printk(KERN_ALERT - "Skipping partition check on %s /dev/%s\n", - (xd->info & VDISK_CDROM) ? "cdrom" : "removable", - disk_name(gd, MINOR(device), buf)); - } - else - { - /* Only check partitions on real discs (not virtual!). */ - if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) - { - printk(KERN_ALERT - "Skipping partition check on virtual /dev/%s\n", - disk_name(gd, MINOR(device), buf)); - break; - } - register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity); - } - } - - out: - bdput(bd); - return rc; -} - - -/* - * xlvbd_remove_device - remove a device node if possible - * @device: numeric device ID - * - * Updates the gendisk structure and invalidates devices. - * - * This is OK for now but in future, should perhaps consider where this should - * deallocate gendisks / unregister devices. - */ -static int xlvbd_remove_device(int device) -{ - int i, rc = 0, minor = MINOR(device); - struct gendisk *gd; - struct block_device *bd; - xl_disk_t *disk = NULL; - - if ( (bd = bdget(device)) == NULL ) - return -1; - - if ( ((gd = get_gendisk(device)) == NULL) || - ((disk = xldev_to_xldisk(device)) == NULL) ) - BUG(); - - if ( disk->usage != 0 ) - { - printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); - rc = -1; - goto out; - } - - if ( (minor & (gd->max_p-1)) != 0 ) - { - /* 1: The VBD is mapped to a partition rather than a whole unit. */ - invalidate_device(device, 1); - gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = 0; - gd->sizes[minor] = 0; - - /* Clear the consists-of-virtual-partitions flag if possible. */ - gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; - for ( i = 1; i < gd->max_p; i++ ) - if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 ) - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; - - /* - * If all virtual partitions are now gone, and a 'whole unit' VBD is - * present, then we can try to grok the unit's real partition table. - */ - if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && - (gd->sizes[minor & ~(gd->max_p-1)] != 0) && - !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) ) - { - register_disk(gd, - device&~(gd->max_p-1), - gd->max_p, - &xlvbd_block_fops, - gd->part[minor&~(gd->max_p-1)].nr_sects); - } - } - else - { - /* - * 2: The VBD is mapped to an entire 'unit'. Clear all partitions. - * NB. The partition entries are only cleared if there are no VBDs - * mapped to individual partitions on this unit. - */ - i = gd->max_p - 1; /* Default: clear subpartitions as well. */ - if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) - i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */ - while ( i >= 0 ) - { - invalidate_device(device+i, 1); - gd->part[minor+i].start_sect = 0; - gd->part[minor+i].nr_sects = 0; - gd->sizes[minor+i] = 0; - i--; - } - } - - out: - bdput(bd); - return rc; -} - -/* - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver - * state. The VBDs need to be updated in this way when the domain is - * initialised and also each time we receive an XLBLK_UPDATE event. - */ -void xlvbd_update_vbds(void) -{ - int i, j, k, old_nr, new_nr; - vdisk_t *old_info, *new_info, *merged_info; - - old_info = vbd_info; - old_nr = nr_vbds; - - new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); - if (!new_info) - return; - - if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) - goto out; - - /* - * Final list maximum size is old list + new list. This occurs only when - * old list and new list do not overlap at all, and we cannot yet destroy - * VBDs in the old list because the usage counts are busy. - */ - merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL); - if (!merged_info) - goto out; - - /* @i tracks old list; @j tracks new list; @k tracks merged list. */ - i = j = k = 0; - - while ( (i < old_nr) && (j < new_nr) ) - { - if ( old_info[i].device < new_info[j].device ) - { - if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); - i++; - } - else if ( old_info[i].device > new_info[j].device ) - { - if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); - j++; - } - else - { - if ( ((old_info[i].capacity == new_info[j].capacity) && - (old_info[i].info == new_info[j].info)) || - (xlvbd_remove_device(old_info[i].device) != 0) ) - memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); - else if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); - i++; j++; - } - } - - for ( ; i < old_nr; i++ ) - { - if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); - } - - for ( ; j < new_nr; j++ ) - { - if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); - } - - vbd_info = merged_info; - nr_vbds = k; - - kfree(old_info); -out: - kfree(new_info); -} - - -/* - * Set up all the linux device goop for the virtual block devices (vbd's) that - * we know about. Note that although from the backend driver's p.o.v. VBDs are - * addressed simply an opaque 16-bit device number, the domain creation tools - * conventionally allocate these numbers to correspond to those used by 'real' - * linux -- this is just for convenience as it means e.g. that the same - * /etc/fstab can be used when booting with or without Xen. - */ -int xlvbd_init(void) -{ - int i; - - /* - * If compiled as a module, we don't support unloading yet. We therefore - * permanently increment the reference count to disallow it. - */ - SET_MODULE_OWNER(&xlvbd_block_fops); - MOD_INC_USE_COUNT; - - /* Initialize the global arrays. */ - for ( i = 0; i < 256; i++ ) - { - xlide_blksize_size[i] = 1024; - xlide_hardsect_size[i] = 512; - xlide_max_sectors[i] = 512; - - xlscsi_blksize_size[i] = 1024; - xlscsi_hardsect_size[i] = 512; - xlscsi_max_sectors[i] = 512; - - xlvbd_blksize_size[i] = 512; - xlvbd_hardsect_size[i] = 512; - xlvbd_max_sectors[i] = 512; - } - - vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); - if (!vbd_info) - return -ENOMEM; - - nr_vbds = xlvbd_get_vbd_info(vbd_info); - - if ( nr_vbds < 0 ) - { - kfree(vbd_info); - vbd_info = NULL; - nr_vbds = 0; - } - else - { - for ( i = 0; i < nr_vbds; i++ ) - xlvbd_init_device(&vbd_info[i]); - } - - return 0; -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/console/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/console/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-$(CONFIG_XEN_CONSOLE) := console.o -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/dom0/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/dom0/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := core.o -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/evtchn/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/evtchn/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := evtchn.o -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/netif/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/netif/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,10 +0,0 @@ - -O_TARGET := drv.o - -subdir-$(CONFIG_XEN_NETDEV_FRONTEND) += frontend -obj-$(CONFIG_XEN_NETDEV_FRONTEND) += frontend/drv.o - -subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend -obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o - -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/netif/backend/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/netif/backend/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,4 +0,0 @@ -O_TARGET := drv.o -export-objs := interface.o -obj-y := main.o control.o interface.o -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/drivers/netif/frontend/Makefile --- a/linux-2.4-xen-sparse/arch/xen/drivers/netif/frontend/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := main.o -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/Makefile --- a/linux-2.4-xen-sparse/arch/xen/kernel/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,20 +0,0 @@ - -.S.o: - $(CC) $(AFLAGS) -traditional -c $< -o $*.o - -all: kernel.o head.o init_task.o - -O_TARGET := kernel.o - -export-objs := i386_ksyms.o gnttab.o skbuff.o ctrl_if.o - -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ - ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \ - i386_ksyms.o i387.o evtchn.o ctrl_if.o pci-dma.o \ - reboot.o fixup.o gnttab.o skbuff.o - -ifdef CONFIG_PCI -obj-y += pci-i386.o pci-pc.o -endif - -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/entry.S --- a/linux-2.4-xen-sparse/arch/xen/kernel/entry.S Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,779 +0,0 @@ -/* - * linux/arch/i386/entry.S - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * entry.S contains the system-call and fault low-level handling routines. - * This also contains the timer-interrupt handler, as well as all interrupts - * and faults that can result in a task-switch. - * - * NOTE: This code handles signal-recognition, which happens every time - * after a timer-interrupt and after each system call. - * - * I changed all the .align's to 4 (16 byte alignment), as that's faster - * on a 486. - * - * Stack layout in 'ret_to_user': - * ptrace needs to have all regs on the stack. - * if the order here is changed, it needs to be - * updated in fork.c:copy_process, signal.c:do_signal, - * ptrace.c and ptrace.h - * - * 0(%esp) - %ebx - * 4(%esp) - %ecx - * 8(%esp) - %edx - * C(%esp) - %esi - * 10(%esp) - %edi - * 14(%esp) - %ebp - * 18(%esp) - %eax - * 1C(%esp) - %ds - * 20(%esp) - %es - * 24(%esp) - orig_eax - * 28(%esp) - %eip - * 2C(%esp) - %cs - * 30(%esp) - %eflags - * 34(%esp) - %oldesp - * 38(%esp) - %oldss - * - * "current" is in register %ebx during any slow entries. - */ - -#include <linux/config.h> -#include <linux/sys.h> -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/smp.h> - -EBX = 0x00 -ECX = 0x04 -EDX = 0x08 -ESI = 0x0C -EDI = 0x10 -EBP = 0x14 -EAX = 0x18 -DS = 0x1C -ES = 0x20 -ORIG_EAX = 0x24 -EIP = 0x28 -CS = 0x2C -EFLAGS = 0x30 -OLDESP = 0x34 -OLDSS = 0x38 - -CF_MASK = 0x00000001 -TF_MASK = 0x00000100 -IF_MASK = 0x00000200 -DF_MASK = 0x00000400 -NT_MASK = 0x00004000 - -/* Offsets into task_struct. */ -state = 0 -flags = 4 -sigpending = 8 -addr_limit = 12 -exec_domain = 16 -need_resched = 20 -tsk_ptrace = 24 -processor = 52 - -/* Offsets into shared_info_t. */ -#define evtchn_upcall_pending /* 0 */ -#define evtchn_upcall_mask 1 - -ENOSYS = 38 - - -#define SAVE_ALL \ - cld; \ - pushl %es; \ - pushl %ds; \ - pushl %eax; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ - pushl %ebx; \ - movl $(__KERNEL_DS),%edx; \ - movl %edx,%ds; \ - movl %edx,%es; - -#define RESTORE_ALL \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ - popl %esi; \ - popl %edi; \ - popl %ebp; \ - popl %eax; \ -1: popl %ds; \ -2: popl %es; \ - addl $4,%esp; \ -3: iret; \ -.section .fixup,"ax"; \ -4: movl $0,(%esp); \ - jmp 1b; \ -5: movl $0,(%esp); \ - jmp 2b; \ -6: pushl %ss; \ - popl %ds; \ - pushl %ss; \ - popl %es; \ - pushl $11; \ - call do_exit; \ -.previous; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ - .long 3b,6b; \ -.previous - -#define GET_CURRENT(reg) \ - movl $-8192, reg; \ - andl %esp, reg - -ENTRY(lcall7) - pushfl # We get a different stack layout with call - pushl %eax # gates, which has to be cleaned up later.. - SAVE_ALL - movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. - movl CS(%esp),%edx # this is eip.. - movl EFLAGS(%esp),%ecx # and this is cs.. - movl %eax,EFLAGS(%esp) # - andl $~(NT_MASK|TF_MASK|DF_MASK), %eax - pushl %eax - popfl - movl %edx,EIP(%esp) # Now we move them to their "normal" places - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx - andl $-8192,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x7 - call *%edx - addl $4, %esp - popl %eax - jmp ret_to_user - -ENTRY(lcall27) - pushfl # We get a different stack layout with call - pushl %eax # gates, which has to be cleaned up later.. - SAVE_ALL - movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. - movl CS(%esp),%edx # this is eip.. - movl EFLAGS(%esp),%ecx # and this is cs.. - movl %eax,EFLAGS(%esp) # - andl $~(NT_MASK|TF_MASK|DF_MASK), %eax - pushl %eax - popfl - movl %edx,EIP(%esp) # Now we move them to their "normal" places - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx - andl $-8192,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x27 - call *%edx - addl $4, %esp - popl %eax - jmp ret_to_user - -ENTRY(ret_from_fork) - pushl %ebx - call SYMBOL_NAME(schedule_tail) - addl $4, %esp - GET_CURRENT(%ebx) - testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS - jne tracesys_exit - jmp ret_to_user - -/* - * Return to user mode is not as complex as all this looks, - * but we want the default path for a system call return to - * go as quickly as possible which is why some of this is - * less clear than it otherwise should be. - */ -ENTRY(system_call) - pushl %eax # save orig_eax - SAVE_ALL - GET_CURRENT(%ebx) - testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS - jne tracesys - cmpl $(NR_syscalls),%eax - jae badsys - call *SYMBOL_NAME(sys_call_table)(,%eax,4) - movl %eax,EAX(%esp) # save the return value -ret_to_user: - movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi - movb $1,evtchn_upcall_mask(%esi) # make tests atomic -ret_to_user_nocli: - cmpl $0,need_resched(%ebx) - jne reschedule - cmpl $0,sigpending(%ebx) - je safesti # ensure need_resched updates are seen -/*signal_return:*/ - movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks - movl %esp,%eax - xorl %edx,%edx - call SYMBOL_NAME(do_signal) - jmp safesti - - ALIGN -restore_all: - RESTORE_ALL - - ALIGN -tracesys: - movl $-ENOSYS,EAX(%esp) - call SYMBOL_NAME(syscall_trace) - movl ORIG_EAX(%esp),%eax - cmpl $(NR_syscalls),%eax - jae tracesys_exit - call *SYMBOL_NAME(sys_call_table)(,%eax,4) - movl %eax,EAX(%esp) # save the return value -tracesys_exit: - call SYMBOL_NAME(syscall_trace) - jmp ret_to_user -badsys: - movl $-ENOSYS,EAX(%esp) - jmp ret_to_user - - ALIGN -ENTRY(ret_from_intr) - GET_CURRENT(%ebx) -ret_from_exception: - movb CS(%esp),%al - testl $2,%eax - jne ret_to_user - jmp restore_all - - ALIGN -reschedule: - movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks - call SYMBOL_NAME(schedule) # test - jmp ret_to_user - -ENTRY(divide_error) - pushl $0 # no error code - pushl $ SYMBOL_NAME(do_divide_error) - ALIGN -error_code: - pushl %ds - pushl %eax - xorl %eax,%eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - decl %eax # eax = -1 - pushl %ecx - pushl %ebx - GET_CURRENT(%ebx) - cld - movl %es,%ecx - movl ORIG_EAX(%esp), %esi # get the error code - movl ES(%esp), %edi # get the function address - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) - movl %esp,%edx - pushl %esi # push the error code - pushl %edx # push the pt_regs pointer - movl $(__KERNEL_DS),%edx - movl %edx,%ds - movl %edx,%es - call *%edi - addl $8,%esp - jmp ret_from_exception - -# A note on the "critical region" in our callback handler. -# We want to avoid stacking callback handlers due to events occurring -# during handling of the last event. To do this, we keep events disabled -# until we've done all processing. HOWEVER, we must enable events before -# popping the stack frame (can't be done atomically) and so it would still -# be possible to get enough handler activations to overflow the stack. -# Although unlikely, bugs of that kind are hard to track down, so we'd -# like to avoid the possibility. -# So, on entry to the handler we detect whether we interrupted an -# existing activation in its critical region -- if so, we pop the current -# activation and restart the handler using the previous one. -ENTRY(hypervisor_callback) - pushl %eax - SAVE_ALL - GET_CURRENT(%ebx) - movl EIP(%esp),%eax - cmpl $scrit,%eax - jb 11f - cmpl $ecrit,%eax - jb critical_region_fixup -11: push %esp - call evtchn_do_upcall - add $4,%esp - movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi - movb CS(%esp),%cl - test $2,%cl # slow return to ring 2 or 3 - jne ret_to_user_nocli -safesti:movb $0,evtchn_upcall_mask(%esi) # reenable event callbacks -scrit: /**** START OF CRITICAL REGION ****/ - testb $0xFF,evtchn_upcall_pending(%esi) - jnz 14f # process more events if necessary... - RESTORE_ALL -14: movb $1,evtchn_upcall_mask(%esi) - jmp 11b -ecrit: /**** END OF CRITICAL REGION ****/ -# [How we do the fixup]. We want to merge the current stack frame with the -# just-interrupted frame. How we do this depends on where in the critical -# region the interrupted handler was executing, and so how many saved -# registers are in each frame. We do this quickly using the lookup table -# 'critical_fixup_table'. For each byte offset in the critical region, it -# provides the number of bytes which have already been popped from the -# interrupted stack frame. -critical_region_fixup: - addl $critical_fixup_table-scrit,%eax - movzbl (%eax),%eax # %eax contains num bytes popped - mov %esp,%esi - add %eax,%esi # %esi points at end of src region - mov %esp,%edi - add $0x34,%edi # %edi points at end of dst region - mov %eax,%ecx - shr $2,%ecx # convert words to bytes - je 16f # skip loop if nothing to copy -15: subl $4,%esi # pre-decrementing copy loop - subl $4,%edi - movl (%esi),%eax - movl %eax,(%edi) - loop 15b -16: movl %edi,%esp # final %edi is top of merged stack - jmp 11b - -critical_fixup_table: - .byte 0x00,0x00,0x00 # testb $0xFF,(%esi) - .byte 0x00,0x00 # jnz 14f - .byte 0x00 # pop %ebx - .byte 0x04 # pop %ecx - .byte 0x08 # pop %edx - .byte 0x0c # pop %esi - .byte 0x10 # pop %edi - .byte 0x14 # pop %ebp - .byte 0x18 # pop %eax - .byte 0x1c # pop %ds - .byte 0x20 # pop %es - .byte 0x24,0x24,0x24 # add $4,%esp - .byte 0x28 # iret - .byte 0x00,0x00,0x00,0x00 # movb $1,4(%esi) - .byte 0x00,0x00 # jmp 11b - -# Hypervisor uses this for application faults while it executes. -ENTRY(failsafe_callback) -1: popl %ds -2: popl %es -3: popl %fs -4: popl %gs -5: iret -.section .fixup,"ax"; \ -6: movl $0,(%esp); \ - jmp 1b; \ -7: movl $0,(%esp); \ - jmp 2b; \ -8: movl $0,(%esp); \ - jmp 3b; \ -9: movl $0,(%esp); \ - jmp 4b; \ -10: pushl %ss; \ - popl %ds; \ - pushl %ss; \ - popl %es; \ - pushl $11; \ - call do_exit; \ -.previous; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,6b; \ - .long 2b,7b; \ - .long 3b,8b; \ - .long 4b,9b; \ - .long 5b,10b; \ -.previous - -ENTRY(coprocessor_error) - pushl $0 - pushl $ SYMBOL_NAME(do_coprocessor_error) - jmp error_code - -ENTRY(simd_coprocessor_error) - pushl $0 - pushl $ SYMBOL_NAME(do_simd_coprocessor_error) - jmp error_code - -ENTRY(device_not_available) - pushl $-1 # mark this as an int - SAVE_ALL - GET_CURRENT(%ebx) - call SYMBOL_NAME(math_state_restore) - jmp ret_from_exception - -ENTRY(debug) - pushl $0 - pushl $ SYMBOL_NAME(do_debug) - jmp error_code - -ENTRY(int3) - pushl $0 - pushl $ SYMBOL_NAME(do_int3) - jmp error_code - -ENTRY(overflow) - pushl $0 - pushl $ SYMBOL_NAME(do_overflow) - jmp error_code - -ENTRY(bounds) - pushl $0 - pushl $ SYMBOL_NAME(do_bounds) - jmp error_code - -ENTRY(invalid_op) - pushl $0 - pushl $ SYMBOL_NAME(do_invalid_op) - jmp error_code - -ENTRY(coprocessor_segment_overrun) - pushl $0 - pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) - jmp error_code - -ENTRY(double_fault) - pushl $ SYMBOL_NAME(do_double_fault) - jmp error_code - -ENTRY(invalid_TSS) - pushl $ SYMBOL_NAME(do_invalid_TSS) - jmp error_code - -ENTRY(segment_not_present) - pushl $ SYMBOL_NAME(do_segment_not_present) - jmp error_code - -ENTRY(stack_segment) - pushl $ SYMBOL_NAME(do_stack_segment) - jmp error_code - -ENTRY(general_protection) - pushl $ SYMBOL_NAME(do_general_protection) - jmp error_code - -ENTRY(alignment_check) - pushl $ SYMBOL_NAME(do_alignment_check) - jmp error_code - -# This handler is special, because it gets an extra value on its stack, -# which is the linear faulting address. -#define PAGE_FAULT_STUB(_name1, _name2) \ -ENTRY(_name1) \ - pushl %ds ; \ - pushl %eax ; \ - xorl %eax,%eax ; \ - pushl %ebp ; \ - pushl %edi ; \ - pushl %esi ; \ - pushl %edx ; \ - decl %eax /* eax = -1 */ ; \ - pushl %ecx ; \ - pushl %ebx ; \ - GET_CURRENT(%ebx) ; \ - cld ; \ - movl %es,%ecx ; \ - movl ORIG_EAX(%esp), %esi /* get the error code */ ; \ - movl ES(%esp), %edi /* get the faulting address */ ; \ - movl %eax, ORIG_EAX(%esp) ; \ - movl %ecx, ES(%esp) ; \ - movl %esp,%edx ; \ - pushl %edi /* push the faulting address */ ; \ - pushl %esi /* push the error code */ ; \ - pushl %edx /* push the pt_regs pointer */ ; \ - movl $(__KERNEL_DS),%edx ; \ - movl %edx,%ds ; \ - movl %edx,%es ; \ - call SYMBOL_NAME(_name2) ; \ - addl $12,%esp ; \ - jmp ret_from_exception ; -PAGE_FAULT_STUB(page_fault, do_page_fault) - -ENTRY(machine_check) - pushl $0 - pushl $ SYMBOL_NAME(do_machine_check) - jmp error_code - -ENTRY(fixup_4gb_segment) - pushl $ SYMBOL_NAME(do_fixup_4gb_segment) - jmp error_code - -.data -ENTRY(sys_call_table) - .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ - .long SYMBOL_NAME(sys_exit) - .long SYMBOL_NAME(sys_fork) - .long SYMBOL_NAME(sys_read) - .long SYMBOL_NAME(sys_write) - .long SYMBOL_NAME(sys_open) /* 5 */ - .long SYMBOL_NAME(sys_close) - .long SYMBOL_NAME(sys_waitpid) - .long SYMBOL_NAME(sys_creat) - .long SYMBOL_NAME(sys_link) - .long SYMBOL_NAME(sys_unlink) /* 10 */ - .long SYMBOL_NAME(sys_execve) - .long SYMBOL_NAME(sys_chdir) - .long SYMBOL_NAME(sys_time) - .long SYMBOL_NAME(sys_mknod) - .long SYMBOL_NAME(sys_chmod) /* 15 */ - .long SYMBOL_NAME(sys_lchown16) - .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ - .long SYMBOL_NAME(sys_stat) - .long SYMBOL_NAME(sys_lseek) - .long SYMBOL_NAME(sys_getpid) /* 20 */ - .long SYMBOL_NAME(sys_mount) - .long SYMBOL_NAME(sys_oldumount) - .long SYMBOL_NAME(sys_setuid16) - .long SYMBOL_NAME(sys_getuid16) - .long SYMBOL_NAME(sys_stime) /* 25 */ - .long SYMBOL_NAME(sys_ptrace) - .long SYMBOL_NAME(sys_alarm) - .long SYMBOL_NAME(sys_fstat) - .long SYMBOL_NAME(sys_pause) - .long SYMBOL_NAME(sys_utime) /* 30 */ - .long SYMBOL_NAME(sys_ni_syscall) /* old stty syscall holder */ - .long SYMBOL_NAME(sys_ni_syscall) /* old gtty syscall holder */ - .long SYMBOL_NAME(sys_access) - .long SYMBOL_NAME(sys_nice) - .long SYMBOL_NAME(sys_ni_syscall) /* 35 */ /* old ftime syscall holder */ - .long SYMBOL_NAME(sys_sync) - .long SYMBOL_NAME(sys_kill) - .long SYMBOL_NAME(sys_rename) - .long SYMBOL_NAME(sys_mkdir) - .long SYMBOL_NAME(sys_rmdir) /* 40 */ - .long SYMBOL_NAME(sys_dup) - .long SYMBOL_NAME(sys_pipe) - .long SYMBOL_NAME(sys_times) - .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ - .long SYMBOL_NAME(sys_brk) /* 45 */ - .long SYMBOL_NAME(sys_setgid16) - .long SYMBOL_NAME(sys_getgid16) - .long SYMBOL_NAME(sys_signal) - .long SYMBOL_NAME(sys_geteuid16) - .long SYMBOL_NAME(sys_getegid16) /* 50 */ - .long SYMBOL_NAME(sys_acct) - .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ - .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ - .long SYMBOL_NAME(sys_ioctl) - .long SYMBOL_NAME(sys_fcntl) /* 55 */ - .long SYMBOL_NAME(sys_ni_syscall) /* old mpx syscall holder */ - .long SYMBOL_NAME(sys_setpgid) - .long SYMBOL_NAME(sys_ni_syscall) /* old ulimit syscall holder */ - .long SYMBOL_NAME(sys_olduname) - .long SYMBOL_NAME(sys_umask) /* 60 */ - .long SYMBOL_NAME(sys_chroot) - .long SYMBOL_NAME(sys_ustat) - .long SYMBOL_NAME(sys_dup2) - .long SYMBOL_NAME(sys_getppid) - .long SYMBOL_NAME(sys_getpgrp) /* 65 */ - .long SYMBOL_NAME(sys_setsid) - .long SYMBOL_NAME(sys_sigaction) - .long SYMBOL_NAME(sys_sgetmask) - .long SYMBOL_NAME(sys_ssetmask) - .long SYMBOL_NAME(sys_setreuid16) /* 70 */ - .long SYMBOL_NAME(sys_setregid16) - .long SYMBOL_NAME(sys_sigsuspend) - .long SYMBOL_NAME(sys_sigpending) - .long SYMBOL_NAME(sys_sethostname) - .long SYMBOL_NAME(sys_setrlimit) /* 75 */ - .long SYMBOL_NAME(sys_old_getrlimit) - .long SYMBOL_NAME(sys_getrusage) - .long SYMBOL_NAME(sys_gettimeofday) - .long SYMBOL_NAME(sys_settimeofday) - .long SYMBOL_NAME(sys_getgroups16) /* 80 */ - .long SYMBOL_NAME(sys_setgroups16) - .long SYMBOL_NAME(old_select) - .long SYMBOL_NAME(sys_symlink) - .long SYMBOL_NAME(sys_lstat) - .long SYMBOL_NAME(sys_readlink) /* 85 */ - .long SYMBOL_NAME(sys_uselib) - .long SYMBOL_NAME(sys_swapon) - .long SYMBOL_NAME(sys_reboot) - .long SYMBOL_NAME(old_readdir) - .long SYMBOL_NAME(old_mmap) /* 90 */ - .long SYMBOL_NAME(sys_munmap) - .long SYMBOL_NAME(sys_truncate) - .long SYMBOL_NAME(sys_ftruncate) - .long SYMBOL_NAME(sys_fchmod) - .long SYMBOL_NAME(sys_fchown16) /* 95 */ - .long SYMBOL_NAME(sys_getpriority) - .long SYMBOL_NAME(sys_setpriority) - .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ - .long SYMBOL_NAME(sys_statfs) - .long SYMBOL_NAME(sys_fstatfs) /* 100 */ - .long SYMBOL_NAME(sys_ioperm) - .long SYMBOL_NAME(sys_socketcall) - .long SYMBOL_NAME(sys_syslog) - .long SYMBOL_NAME(sys_setitimer) - .long SYMBOL_NAME(sys_getitimer) /* 105 */ - .long SYMBOL_NAME(sys_newstat) - .long SYMBOL_NAME(sys_newlstat) - .long SYMBOL_NAME(sys_newfstat) - .long SYMBOL_NAME(sys_uname) - .long SYMBOL_NAME(sys_iopl) /* 110 */ - .long SYMBOL_NAME(sys_vhangup) - .long SYMBOL_NAME(sys_ni_syscall) /* old "idle" system call */ - .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ - .long SYMBOL_NAME(sys_wait4) - .long SYMBOL_NAME(sys_swapoff) /* 115 */ - .long SYMBOL_NAME(sys_sysinfo) - .long SYMBOL_NAME(sys_ipc) - .long SYMBOL_NAME(sys_fsync) - .long SYMBOL_NAME(sys_sigreturn) - .long SYMBOL_NAME(sys_clone) /* 120 */ - .long SYMBOL_NAME(sys_setdomainname) - .long SYMBOL_NAME(sys_newuname) - .long SYMBOL_NAME(sys_modify_ldt) - .long SYMBOL_NAME(sys_adjtimex) - .long SYMBOL_NAME(sys_mprotect) /* 125 */ - .long SYMBOL_NAME(sys_sigprocmask) - .long SYMBOL_NAME(sys_create_module) - .long SYMBOL_NAME(sys_init_module) - .long SYMBOL_NAME(sys_delete_module) - .long SYMBOL_NAME(sys_get_kernel_syms) /* 130 */ - .long SYMBOL_NAME(sys_quotactl) - .long SYMBOL_NAME(sys_getpgid) - .long SYMBOL_NAME(sys_fchdir) - .long SYMBOL_NAME(sys_bdflush) - .long SYMBOL_NAME(sys_sysfs) /* 135 */ - .long SYMBOL_NAME(sys_personality) - .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ - .long SYMBOL_NAME(sys_setfsuid16) - .long SYMBOL_NAME(sys_setfsgid16) - .long SYMBOL_NAME(sys_llseek) /* 140 */ - .long SYMBOL_NAME(sys_getdents) - .long SYMBOL_NAME(sys_select) - .long SYMBOL_NAME(sys_flock) - .long SYMBOL_NAME(sys_msync) - .long SYMBOL_NAME(sys_readv) /* 145 */ - .long SYMBOL_NAME(sys_writev) - .long SYMBOL_NAME(sys_getsid) - .long SYMBOL_NAME(sys_fdatasync) - .long SYMBOL_NAME(sys_sysctl) - .long SYMBOL_NAME(sys_mlock) /* 150 */ - .long SYMBOL_NAME(sys_munlock) - .long SYMBOL_NAME(sys_mlockall) - .long SYMBOL_NAME(sys_munlockall) - .long SYMBOL_NAME(sys_sched_setparam) - .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ - .long SYMBOL_NAME(sys_sched_setscheduler) - .long SYMBOL_NAME(sys_sched_getscheduler) - .long SYMBOL_NAME(sys_sched_yield) - .long SYMBOL_NAME(sys_sched_get_priority_max) - .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ - .long SYMBOL_NAME(sys_sched_rr_get_interval) - .long SYMBOL_NAME(sys_nanosleep) - .long SYMBOL_NAME(sys_mremap) - .long SYMBOL_NAME(sys_setresuid16) - .long SYMBOL_NAME(sys_getresuid16) /* 165 */ - .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ - .long SYMBOL_NAME(sys_query_module) - .long SYMBOL_NAME(sys_poll) - .long SYMBOL_NAME(sys_nfsservctl) - .long SYMBOL_NAME(sys_setresgid16) /* 170 */ - .long SYMBOL_NAME(sys_getresgid16) - .long SYMBOL_NAME(sys_prctl) - .long SYMBOL_NAME(sys_rt_sigreturn) - .long SYMBOL_NAME(sys_rt_sigaction) - .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ - .long SYMBOL_NAME(sys_rt_sigpending) - .long SYMBOL_NAME(sys_rt_sigtimedwait) - .long SYMBOL_NAME(sys_rt_sigqueueinfo) - .long SYMBOL_NAME(sys_rt_sigsuspend) - .long SYMBOL_NAME(sys_pread) /* 180 */ - .long SYMBOL_NAME(sys_pwrite) - .long SYMBOL_NAME(sys_chown16) - .long SYMBOL_NAME(sys_getcwd) - .long SYMBOL_NAME(sys_capget) - .long SYMBOL_NAME(sys_capset) /* 185 */ - .long SYMBOL_NAME(sys_sigaltstack) - .long SYMBOL_NAME(sys_sendfile) - .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ - .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ - .long SYMBOL_NAME(sys_vfork) /* 190 */ - .long SYMBOL_NAME(sys_getrlimit) - .long SYMBOL_NAME(sys_mmap2) - .long SYMBOL_NAME(sys_truncate64) - .long SYMBOL_NAME(sys_ftruncate64) - .long SYMBOL_NAME(sys_stat64) /* 195 */ - .long SYMBOL_NAME(sys_lstat64) - .long SYMBOL_NAME(sys_fstat64) - .long SYMBOL_NAME(sys_lchown) - .long SYMBOL_NAME(sys_getuid) - .long SYMBOL_NAME(sys_getgid) /* 200 */ - .long SYMBOL_NAME(sys_geteuid) - .long SYMBOL_NAME(sys_getegid) - .long SYMBOL_NAME(sys_setreuid) - .long SYMBOL_NAME(sys_setregid) - .long SYMBOL_NAME(sys_getgroups) /* 205 */ - .long SYMBOL_NAME(sys_setgroups) - .long SYMBOL_NAME(sys_fchown) - .long SYMBOL_NAME(sys_setresuid) - .long SYMBOL_NAME(sys_getresuid) - .long SYMBOL_NAME(sys_setresgid) /* 210 */ - .long SYMBOL_NAME(sys_getresgid) - .long SYMBOL_NAME(sys_chown) - .long SYMBOL_NAME(sys_setuid) - .long SYMBOL_NAME(sys_setgid) - .long SYMBOL_NAME(sys_setfsuid) /* 215 */ - .long SYMBOL_NAME(sys_setfsgid) - .long SYMBOL_NAME(sys_pivot_root) - .long SYMBOL_NAME(sys_mincore) - .long SYMBOL_NAME(sys_madvise) - .long SYMBOL_NAME(sys_getdents64) /* 220 */ - .long SYMBOL_NAME(sys_fcntl64) - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */ - .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */ - .long SYMBOL_NAME(sys_gettid) - .long SYMBOL_NAME(sys_readahead) /* 225 */ - .long SYMBOL_NAME(sys_setxattr) - .long SYMBOL_NAME(sys_lsetxattr) - .long SYMBOL_NAME(sys_fsetxattr) - .long SYMBOL_NAME(sys_getxattr) - .long SYMBOL_NAME(sys_lgetxattr) /* 230 */ - .long SYMBOL_NAME(sys_fgetxattr) - .long SYMBOL_NAME(sys_listxattr) - .long SYMBOL_NAME(sys_llistxattr) - .long SYMBOL_NAME(sys_flistxattr) - .long SYMBOL_NAME(sys_removexattr) /* 235 */ - .long SYMBOL_NAME(sys_lremovexattr) - .long SYMBOL_NAME(sys_fremovexattr) - .long SYMBOL_NAME(sys_tkill) - .long SYMBOL_NAME(sys_sendfile64) - .long SYMBOL_NAME(sys_ni_syscall) /* 240 reserved for futex */ - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_setaffinity */ - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_getaffinity */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_thread_area */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_get_thread_area */ - .long SYMBOL_NAME(sys_ni_syscall) /* 245 sys_io_setup */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_destroy */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_getevents */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_submit */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_cancel */ - .long SYMBOL_NAME(sys_ni_syscall) /* 250 sys_alloc_hugepages */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_free_hugepages */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_exit_group */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_lookup_dcookie */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_create */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_ctl 255 */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_wait */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_remap_file_pages */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_tid_address */ - - .rept NR_syscalls-(.-sys_call_table)/4 - .long SYMBOL_NAME(sys_ni_syscall) - .endr diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/head.S --- a/linux-2.4-xen-sparse/arch/xen/kernel/head.S Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,41 +0,0 @@ - -.section __xen_guest - .ascii "GUEST_OS=linux,GUEST_VER=2.4,XEN_VER=3.0,VIRT_BASE=0xC0000000" - .ascii ",LOADER=generic" - .byte 0 - -.text -#include <linux/config.h> -#include <linux/threads.h> -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/desc.h> - -ENTRY(stext) -ENTRY(_stext) - cld - lss stack_start,%esp - /* Copy the necessary stuff from xen_start_info structure. */ - mov $SYMBOL_NAME(xen_start_info_union),%edi - mov $128,%ecx - rep movsl - jmp SYMBOL_NAME(start_kernel) - -ENTRY(stack_start) - .long SYMBOL_NAME(init_task_union)+8192, __KERNEL_DS - -.org 0x1000 -ENTRY(empty_zero_page) - -.org 0x2000 -ENTRY(default_ldt) - -.org 0x3000 -ENTRY(cpu0_pte_quicklist) - -.org 0x3400 -ENTRY(cpu0_pgd_quicklist) - -.org 0x3800 diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/i386_ksyms.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/i386_ksyms.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,180 +0,0 @@ -#include <linux/config.h> -#include <linux/module.h> -#include <linux/smp.h> -#include <linux/user.h> -#include <linux/elfcore.h> -#include <linux/mca.h> -#include <linux/sched.h> -#include <linux/in6.h> -#include <linux/interrupt.h> -#include <linux/smp_lock.h> -#include <linux/pm.h> -#include <linux/pci.h> -#include <linux/apm_bios.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/tty.h> - -#include <asm/semaphore.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/uaccess.h> -#include <asm/checksum.h> -#include <asm/io.h> -#include <asm/hardirq.h> -#include <asm/delay.h> -#include <asm/irq.h> -#include <asm/mmx.h> -#include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> - -extern void dump_thread(struct pt_regs *, struct user *); -extern spinlock_t rtc_lock; - -#if defined(CONFIG_APMXXX) || defined(CONFIG_APM_MODULEXXX) -extern void machine_real_restart(unsigned char *, int); -EXPORT_SYMBOL(machine_real_restart); -extern void default_idle(void); -EXPORT_SYMBOL(default_idle); -#endif - -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -#endif - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -extern struct drive_info_struct drive_info; -EXPORT_SYMBOL(drive_info); -#endif - -// XXX extern unsigned long get_cmos_time(void); - -/* platform dependent support */ -EXPORT_SYMBOL(boot_cpu_data); -EXPORT_SYMBOL(dump_thread); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(dump_extended_fpu); -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(enable_irq); -EXPORT_SYMBOL(disable_irq); -EXPORT_SYMBOL(disable_irq_nosync); -EXPORT_SYMBOL(probe_irq_mask); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(pm_idle); -EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(apm_info); -//EXPORT_SYMBOL(gdt); -EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(phys_to_machine_mapping); - - -#ifdef CONFIG_DEBUG_IOVIRT -EXPORT_SYMBOL(__io_virt_debug); -#endif - -EXPORT_SYMBOL_NOVERS(__down_failed); -EXPORT_SYMBOL_NOVERS(__down_failed_interruptible); -EXPORT_SYMBOL_NOVERS(__down_failed_trylock); -EXPORT_SYMBOL_NOVERS(__up_wakeup); -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); -/* Delay loops */ -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); - -EXPORT_SYMBOL_NOVERS(__get_user_1); -EXPORT_SYMBOL_NOVERS(__get_user_2); -EXPORT_SYMBOL_NOVERS(__get_user_4); - -EXPORT_SYMBOL(strtok); -EXPORT_SYMBOL(strpbrk); -EXPORT_SYMBOL(strstr); - -EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(clear_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__generic_copy_from_user); -EXPORT_SYMBOL(__generic_copy_to_user); -EXPORT_SYMBOL(strnlen_user); - - -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); - -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pcibios_penalize_isa_irq); -EXPORT_SYMBOL(pci_mem_start); -#endif - - -#ifdef CONFIG_X86_USE_3DNOW -EXPORT_SYMBOL(_mmx_memcpy); -EXPORT_SYMBOL(mmx_clear_page); -EXPORT_SYMBOL(mmx_copy_page); -#endif - -#ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(kernel_flag_cacheline); -EXPORT_SYMBOL(smp_num_cpus); -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL_NOVERS(__write_lock_failed); -EXPORT_SYMBOL_NOVERS(__read_lock_failed); - -/* Global SMP irq stuff */ -EXPORT_SYMBOL(synchronize_irq); -EXPORT_SYMBOL(global_irq_holder); -EXPORT_SYMBOL(__global_cli); -EXPORT_SYMBOL(__global_sti); -EXPORT_SYMBOL(__global_save_flags); -EXPORT_SYMBOL(__global_restore_flags); -EXPORT_SYMBOL(smp_call_function); - -/* TLB flushing */ -EXPORT_SYMBOL(flush_tlb_page); - -/* HT support */ -EXPORT_SYMBOL(smp_num_siblings); -EXPORT_SYMBOL(cpu_sibling_map); -#endif - -#ifdef CONFIG_X86_IO_APIC -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -#endif - -#ifdef CONFIG_VT -EXPORT_SYMBOL(screen_info); -#endif - -EXPORT_SYMBOL(get_wchan); - -EXPORT_SYMBOL(rtc_lock); - -#undef memcpy -#undef memset -extern void * memset(void *,int,__kernel_size_t); -extern void * memcpy(void *,const void *,__kernel_size_t); -EXPORT_SYMBOL_NOVERS(memcpy); -EXPORT_SYMBOL_NOVERS(memset); - -#ifdef CONFIG_HAVE_DEC_LOCK -EXPORT_SYMBOL(atomic_dec_and_lock); -#endif - -#ifdef CONFIG_MULTIQUAD -EXPORT_SYMBOL(xquad_portio); -#endif - -#include <asm/xen_proc.h> -EXPORT_SYMBOL(create_xen_proc_entry); -EXPORT_SYMBOL(remove_xen_proc_entry); - -EXPORT_SYMBOL(evtchn_do_upcall); -EXPORT_SYMBOL(force_evtchn_callback); -EXPORT_SYMBOL(HYPERVISOR_shared_info); diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/irq.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/irq.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,1242 +0,0 @@ -/* - * linux/arch/i386/kernel/irq.c - * - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the code used by various IRQ handling routines: - * asking for different IRQ's should be done through these routines - * instead of just grabbing them. Thus setups with different IRQ numbers - * shouldn't result in any weird surprises, and installing new handlers - * should be easier. - */ - -/* - * (mostly architecture independent, will move to kernel/irq.c in 2.5.) - * - * IRQs are in fact implemented a bit like signal handlers for the kernel. - * Naturally it's not a 1:1 relation, but there are similarities. - */ - -#include <linux/config.h> -#include <linux/ptrace.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/smp_lock.h> -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/irq.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> - -#include <asm/atomic.h> -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/system.h> -#include <asm/bitops.h> -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#include <asm/delay.h> -#include <asm/desc.h> -#include <asm/irq.h> - - - -/* - * Linux has a controller-independent x86 interrupt architecture. - * every controller has a 'controller-template', that is used - * by the main code to do the right thing. Each driver-visible - * interrupt source is transparently wired to the apropriate - * controller. Thus drivers need not be aware of the - * interrupt-controller. - * - * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, - * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. - * (IO-APICs assumed to be messaging to Pentium local-APICs) - * - * the code is designed to be easily extended with new/different - * interrupt controllers, without having to do assembly magic. - */ - -/* - * Controller mappings for all interrupt sources: - */ -irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = - { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; - -static void register_irq_proc (unsigned int irq); - -/* - * Special irq handlers. - */ - -void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } - -/* - * Generic no controller code - */ - -static void enable_none(unsigned int irq) { } -static unsigned int startup_none(unsigned int irq) { return 0; } -static void disable_none(unsigned int irq) { } -static void ack_none(unsigned int irq) -{ -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves, it doesnt deserve - * a generic callback i think. - */ -#if CONFIG_X86 - printk("unexpected IRQ trap at vector %02x\n", irq); -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - */ - ack_APIC_irq(); -#endif -#endif -} - -/* startup is the same as "enable", shutdown is same as "disable" */ -#define shutdown_none disable_none -#define end_none enable_none - -struct hw_interrupt_type no_irq_type = { - "none", - startup_none, - shutdown_none, - enable_none, - disable_none, - ack_none, - end_none -}; - -atomic_t irq_err_count; -#ifdef CONFIG_X86_IO_APIC -#ifdef APIC_MISMATCH_DEBUG -atomic_t irq_mis_count; -#endif -#endif - -/* - * Generic, controller-independent functions: - */ - -int show_interrupts(struct seq_file *p, void *v) -{ - int i, j; - struct irqaction * action; - - seq_printf(p, " "); - for (j=0; j<smp_num_cpus; j++) - seq_printf(p, "CPU%d ",j); - seq_putc(p,'\n'); - - for (i = 0 ; i < NR_IRQS ; i++) { - action = irq_desc[i].action; - if (!action) - continue; - seq_printf(p, "%3d: ",i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for (j = 0; j < smp_num_cpus; j++) - seq_printf(p, "%10u ", - kstat.irqs[cpu_logical_map(j)][i]); -#endif - seq_printf(p, " %14s", irq_desc[i].handler->typename); - seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - seq_putc(p,'\n'); - } - seq_printf(p, "NMI: "); - for (j = 0; j < smp_num_cpus; j++) - seq_printf(p, "%10u ", - nmi_count(cpu_logical_map(j))); - seq_printf(p, "\n"); -#if CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for (j = 0; j < smp_num_cpus; j++) - seq_printf(p, "%10u ", - apic_timer_irqs[cpu_logical_map(j)]); - seq_printf(p, "\n"); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#ifdef CONFIG_X86_IO_APIC -#ifdef APIC_MISMATCH_DEBUG - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif -#endif - - return 0; -} - - -/* - * Global interrupt locks for SMP. Allow interrupts to come in on any - * CPU, yet make cli/sti act globally to protect critical regions.. - */ - -#ifdef CONFIG_SMP -unsigned char global_irq_holder = NO_PROC_ID; -unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ - -extern void show_stack(unsigned long* esp); - -static void show(char * str) -{ - int i; - int cpu = smp_processor_id(); - - printk("\n%s, CPU %d:\n", str, cpu); - printk("irq: %d [",irqs_running()); - for(i=0;i < smp_num_cpus;i++) - printk(" %d",local_irq_count(i)); - printk(" ]\nbh: %d [",spin_is_locked(&global_bh_lock) ? 1 : 0); - for(i=0;i < smp_num_cpus;i++) - printk(" %d",local_bh_count(i)); - - printk(" ]\nStack dumps:"); - for(i = 0; i < smp_num_cpus; i++) { - unsigned long esp; - if (i == cpu) - continue; - printk("\nCPU %d:",i); - esp = init_tss[i].esp0; - if (!esp) { - /* tss->esp0 is set to NULL in cpu_init(), - * it's initialized when the cpu returns to user - * space. -- manfreds - */ - printk(" <unknown> "); - continue; - } - esp &= ~(THREAD_SIZE-1); - esp += sizeof(struct task_struct); - show_stack((void*)esp); - } - printk("\nCPU %d:",cpu); - show_stack(NULL); - printk("\n"); -} - -#define MAXCOUNT 100000000 - -/* - * I had a lockup scenario where a tight loop doing - * spin_unlock()/spin_lock() on CPU#1 was racing with - * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but - * apparently the spin_unlock() information did not make it - * through to CPU#0 ... nasty, is this by design, do we have to limit - * 'memory update oscillation frequency' artificially like here? - * - * Such 'high frequency update' races can be avoided by careful design, but - * some of our major constructs like spinlocks use similar techniques, - * it would be nice to clarify this issue. Set this define to 0 if you - * want to check whether your system freezes. I suspect the delay done - * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but - * i thought that such things are guaranteed by design, since we use - * the 'LOCK' prefix. - */ -#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 - -#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND -# define SYNC_OTHER_CORES(x) udelay(x+1) -#else -/* - * We have to allow irqs to arrive between __sti and __cli - */ -# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") -#endif - -static inline void wait_on_irq(int cpu) -{ - int count = MAXCOUNT; - - for (;;) { - - /* - * Wait until all interrupts are gone. Wait - * for bottom half handlers unless we're - * already executing in one.. - */ - if (!irqs_running()) - if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) - break; - - /* Duh, we have to loop. Release the lock to avoid deadlocks */ - clear_bit(0,&global_irq_lock); - - for (;;) { - if (!--count) { - show("wait_on_irq"); - count = ~0; - } - __sti(); - SYNC_OTHER_CORES(cpu); - __cli(); - if (irqs_running()) - continue; - if (global_irq_lock) - continue; - if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) - continue; - if (!test_and_set_bit(0,&global_irq_lock)) - break; - } - } -} - -/* - * This is called when we want to synchronize with - * interrupts. We may for example tell a device to - * stop sending interrupts: but to make sure there - * are no interrupts that are executing on another - * CPU we need to call this function. - */ -void synchronize_irq(void) -{ - if (irqs_running()) { - /* Stupid approach */ - cli(); - sti(); - } -} - -static inline void get_irqlock(int cpu) -{ - if (test_and_set_bit(0,&global_irq_lock)) { - /* do we already hold the lock? */ - if ((unsigned char) cpu == global_irq_holder) - return; - /* Uhhuh.. Somebody else got it. Wait.. */ - do { - do { - rep_nop(); - } while (test_bit(0,&global_irq_lock)); - } while (test_and_set_bit(0,&global_irq_lock)); - } - /* - * We also to make sure that nobody else is running - * in an interrupt context. - */ - wait_on_irq(cpu); - - /* - * Ok, finally.. - */ - global_irq_holder = cpu; -} - -/* - * A global "cli()" while in an interrupt context - * turns into just a local cli(). Interrupts - * should use spinlocks for the (very unlikely) - * case that they ever want to protect against - * each other. - * - * If we already have local interrupts disabled, - * this will not turn a local disable into a - * global one (problems with spinlocks: this makes - * save_flags+cli+sti usable inside a spinlock). - */ -void __global_cli(void) -{ - unsigned int flags; - - __save_flags(flags); - if (!flags) { - int cpu = smp_processor_id(); - __cli(); - if (!local_irq_count(cpu)) - get_irqlock(cpu); - } -} - -void __global_sti(void) -{ - int cpu = smp_processor_id(); - - if (!local_irq_count(cpu)) - release_irqlock(cpu); - __sti(); -} - -/* - * SMP flags value to restore to: - * 0 - global cli - * 1 - global sti - * 2 - local cli - * 3 - local sti - */ -unsigned long __global_save_flags(void) -{ - int retval; - int local_enabled; - unsigned long flags; - int cpu = smp_processor_id(); - - __save_flags(flags); - local_enabled = !flags; - /* default to local */ - retval = 2 + local_enabled; - - /* check for global flags if we're not in an interrupt */ - if (!local_irq_count(cpu)) { - if (local_enabled) - retval = 1; - if (global_irq_holder == cpu) - retval = 0; - } - return retval; -} - -void __global_restore_flags(unsigned long flags) -{ - switch (flags) { - case 0: - __global_cli(); - break; - case 1: - __global_sti(); - break; - case 2: - __cli(); - break; - case 3: - __sti(); - break; - default: - printk("global_restore_flags: %08lx (%08lx)\n", - flags, (&flags)[-1]); - } -} - -#endif - -/* - * This should really return information about whether - * we should do bottom half handling etc. Right now we - * end up _always_ checking the bottom half, which is a - * waste of time and is not what some drivers would - * prefer. - */ -int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) -{ - int status; - int cpu = smp_processor_id(); - - irq_enter(cpu, irq); - - status = 1; /* Force the "do bottom halves" bit */ - - if (!(action->flags & SA_INTERRUPT)) - __sti(); - - do { - status |= action->flags; - action->handler(irq, action->dev_id, regs); - action = action->next; - } while (action); - if (status & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - __cli(); - - irq_exit(cpu, irq); - - return status; -} - -/* - * Generic enable/disable code: this just calls - * down into the PIC-specific version for the actual - * hardware disable after having gotten the irq - * controller lock. - */ - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. - * - * This function may be called from IRQ context. - */ - -inline void disable_irq_nosync(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->handler->disable(irq); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ - -void disable_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - - if (!local_irq_count(smp_processor_id())) { - do { - barrier(); - cpu_relax(); - } while (irq_desc[irq].status & IRQ_INPROGRESS); - } -} - -/** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable - * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. - * - * This function may be called from IRQ context. - */ - -void enable_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - switch (desc->depth) { - case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; - desc->status = status; - if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - desc->status = status | IRQ_REPLAY; - hw_resend_irq(desc->handler,irq); - } - desc->handler->enable(irq); - /* fall-through */ - } - default: - desc->depth--; - break; - case 0: - printk("enable_irq(%u) unbalanced from %p\n", irq, - __builtin_return_address(0)); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -asmlinkage unsigned int do_IRQ(struct pt_regs *regs) -{ - /* - * We ack quickly, we don't want the irq controller - * thinking we're snobs just because some other CPU has - * disabled global interrupts (we have already done the - * INT_ACK cycles, it's too late to try to pretend to the - * controller that we aren't taking the interrupt). - * - * 0 return value means that this irq is already being - * handled by some other CPU. (or is disabled) - */ - int irq = regs->orig_eax & 0xff; /* high bits used in ret_from_ code */ - int cpu = smp_processor_id(); - irq_desc_t *desc = irq_desc + irq; - struct irqaction * action; - unsigned int status; -#ifdef CONFIG_DEBUG_STACKOVERFLOW - long esp; - - /* Debugging check for stack overflow: is there less than 1KB free? */ - __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); - if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { - extern void show_stack(unsigned long *); - - printk("do_IRQ: stack overflow: %ld\n", - esp - sizeof(struct task_struct)); - __asm__ __volatile__("movl %%esp,%0" : "=r" (esp)); - show_stack((void *)esp); - } -#endif - - kstat.irqs[cpu][irq]++; - spin_lock(&desc->lock); - desc->handler->ack(irq); - /* - REPLAY is when Linux resends an IRQ that was dropped earlier - WAITING is used by probe to mark irqs that are being tested - */ - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); - status |= IRQ_PENDING; /* we _want_ to handle it */ - - /* - * If the IRQ is disabled for whatever reason, we cannot - * use the action we have. - */ - action = NULL; - if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { - action = desc->action; - status &= ~IRQ_PENDING; /* we commit to handling */ - status |= IRQ_INPROGRESS; /* we are handling it */ - } - desc->status = status; - - /* - * If there is no IRQ handler or it was disabled, exit early. - Since we set PENDING, if another processor is handling - a different instance of this same irq, the other processor - will take care of it. - */ - if (!action) - goto out; - - /* - * Edge triggered interrupts need to remember - * pending events. - * This applies to any hw interrupts that allow a second - * instance of the same irq to arrive while we are in do_IRQ - * or in the handler. But the code here only handles the _second_ - * instance of the irq, not the third or fourth. So it is mostly - * useful for irq hardware that does not mask cleanly in an - * SMP environment. - */ - for (;;) { - spin_unlock(&desc->lock); - handle_IRQ_event(irq, regs, action); - spin_lock(&desc->lock); - - if (!(desc->status & IRQ_PENDING)) - break; - desc->status &= ~IRQ_PENDING; - } - desc->status &= ~IRQ_INPROGRESS; -out: - /* - * The ->end() handler has to deal with interrupts which got - * disabled while the handler was running. - */ - desc->handler->end(irq); - spin_unlock(&desc->lock); - - if (softirq_pending(cpu)) - do_softirq(); - return 1; -} - -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - * - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ - -int request_irq(unsigned int irq, - void (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, - const char * devname, - void *dev_id) -{ - int retval; - struct irqaction * action; - -#if 1 - /* - * Sanity-check: shared interrupts should REALLY pass in - * a real dev-ID, otherwise we'll have trouble later trying - * to figure out which interrupt is which (messes up the - * interrupt freeing logic etc). - */ - if (irqflags & SA_SHIRQ) { - if (!dev_id) - printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]); - } -#endif - - if (irq >= NR_IRQS) - return -EINVAL; - if (!handler) - return -EINVAL; - - action = (struct irqaction *) - kmalloc(sizeof(struct irqaction), GFP_KERNEL); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irqflags; - action->mask = 0; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - if (retval) - kfree(action); - return retval; -} - -/* - * Internal function to unregister an irqaction - typically used to - * deallocate special interrupts that are part of the architecture. - */ -int teardown_irq(unsigned int irq, struct irqaction * old) -{ - irq_desc_t *desc; - struct irqaction **p; - unsigned long flags; - - if (irq >= NR_IRQS) - return -ENOENT; - - desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - for (;;) { - struct irqaction * action = *p; - if (action) { - struct irqaction **pp = p; - p = &action->next; - if (action != old) - continue; - - /* Found it - now remove it from the list of entries */ - *pp = action->next; - if (!desc->action) { - desc->status |= IRQ_DISABLED; - desc->handler->shutdown(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - -#ifdef CONFIG_SMP - /* Wait to make sure it's not being used on another CPU */ - while (desc->status & IRQ_INPROGRESS) { - barrier(); - cpu_relax(); - } -#endif - return 0; - } - printk("Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); - return -ENOENT; - } -} - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function may be called from interrupt context. - * - * Bugs: Attempting to free an irq in a handler for the same irq hangs - * the machine. - */ - -void free_irq(unsigned int irq, void *dev_id) -{ - irq_desc_t *desc; - struct irqaction *action; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock,flags); - for (action = desc->action; action != NULL; action = action->next) { - if (action->dev_id != dev_id) - continue; - - spin_unlock_irqrestore(&desc->lock,flags); - - if (teardown_irq(irq, action) == 0) - kfree(action); - return; - } - printk("Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); - return; -} - -/* - * IRQ autodetection code.. - * - * This depends on the fact that any interrupt that - * comes in on to an unassigned handler will get stuck - * with "IRQ_WAITING" cleared and the interrupt - * disabled. - */ - -static DECLARE_MUTEX(probe_sem); - -/** - * probe_irq_on - begin an interrupt autodetect - * - * Commence probing for an interrupt. The interrupts are scanned - * and a mask of potential interrupt lines is returned. - * - */ - -unsigned long probe_irq_on(void) -{ - unsigned int i; - irq_desc_t *desc; - unsigned long val; - unsigned long delay; - - down(&probe_sem); - /* - * something may have generated an irq long ago and we want to - * flush such a longstanding irq before considering it as spurious. - */ - for (i = NR_PIRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!irq_desc[i].action) - irq_desc[i].handler->startup(i); - spin_unlock_irq(&desc->lock); - } - - /* Wait for longstanding interrupts to trigger. */ - for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) - /* about 20ms delay */ synchronize_irq(); - - /* - * enable any unassigned irqs - * (we must startup again here because if a longstanding irq - * happened in the previous stage, it may have masked itself) - */ - for (i = NR_PIRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!desc->action) { - desc->status |= IRQ_AUTODETECT | IRQ_WAITING; - if (desc->handler->startup(i)) - desc->status |= IRQ_PENDING; - } - spin_unlock_irq(&desc->lock); - } - - /* - * Wait for spurious interrupts to trigger - */ - for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) - /* about 100ms delay */ synchronize_irq(); - - /* - * Now filter out any obviously spurious interrupts - */ - val = 0; - for (i = 0; i < NR_PIRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - /* It triggered already - consider it spurious. */ - if (!(status & IRQ_WAITING)) { - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } else - if (i < 32) - val |= 1 << i; - } - spin_unlock_irq(&desc->lock); - } - - return val; -} - -/* - * Return a mask of triggered interrupts (this - * can handle only legacy ISA interrupts). - */ - -/** - * probe_irq_mask - scan a bitmap of interrupt lines - * @val: mask of interrupts to consider - * - * Scan the ISA bus interrupt lines and return a bitmap of - * active interrupts. The interrupt probe logic state is then - * returned to its previous value. - * - * Note: we need to scan all the irq's even though we will - * only return ISA irq numbers - just so that we reset them - * all to a known state. - */ -unsigned int probe_irq_mask(unsigned long val) -{ - int i; - unsigned int mask; - - mask = 0; - for (i = 0; i < NR_PIRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (i < 16 && !(status & IRQ_WAITING)) - mask |= 1 << i; - - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - return mask & val; -} - -/* - * Return the one interrupt that triggered (this can - * handle any interrupt source). - */ - -/** - * probe_irq_off - end an interrupt autodetect - * @val: mask of potential interrupts (unused) - * - * Scans the unused interrupt lines and returns the line which - * appears to have triggered the interrupt. If no interrupt was - * found then zero is returned. If more than one interrupt is - * found then minus the first candidate is returned to indicate - * their is doubt. - * - * The interrupt probe logic state is returned to its previous - * value. - * - * BUGS: When used in a module (which arguably shouldnt happen) - * nothing prevents two IRQ probe callers from overlapping. The - * results of this are non-optimal. - */ - -int probe_irq_off(unsigned long val) -{ - int i, irq_found, nr_irqs; - - nr_irqs = 0; - irq_found = 0; - for (i = 0; i < NR_PIRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (!(status & IRQ_WAITING)) { - if (!nr_irqs) - irq_found = i; - nr_irqs++; - } - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - if (nr_irqs > 1) - irq_found = -irq_found; - return irq_found; -} - -/* this was setup_x86_irq but it seems pretty generic */ -int setup_irq(unsigned int irq, struct irqaction * new) -{ - int shared = 0; - unsigned long flags; - struct irqaction *old, **p; - irq_desc_t *desc = irq_desc + irq; - - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & SA_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - /* - * The following block of code has to be executed atomically - */ - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&desc->lock,flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; - } - - *p = new; - - if (!shared) { - desc->depth = 0; - desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS); - desc->handler->startup(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - - register_irq_proc(irq); - return 0; -} - -static struct proc_dir_entry * root_irq_dir; -static struct proc_dir_entry * irq_dir [NR_IRQS]; - -#define HEX_DIGITS 8 - -static unsigned int parse_hex_value (const char *buffer, - unsigned long count, unsigned long *ret) -{ - unsigned char hexnum [HEX_DIGITS]; - unsigned long value; - int i; - - if (!count) - return -EINVAL; - if (count > HEX_DIGITS) - count = HEX_DIGITS; - if (copy_from_user(hexnum, buffer, count)) - return -EFAULT; - - /* - * Parse the first 8 characters as a hex string, any non-hex char - * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. - */ - value = 0; - - for (i = 0; i < count; i++) { - unsigned int c = hexnum[i]; - - switch (c) { - case '0' ... '9': c -= '0'; break; - case 'a' ... 'f': c -= 'a'-10; break; - case 'A' ... 'F': c -= 'A'-10; break; - default: - goto out; - } - value = (value << 4) | c; - } -out: - *ret = value; - return 0; -} - -#if CONFIG_SMP - -static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; - -static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; -static int irq_affinity_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - if (count < HEX_DIGITS+1) - return -EINVAL; - return sprintf (page, "%08lx\n", irq_affinity[(long)data]); -} - -static int irq_affinity_write_proc (struct file *file, const char *buffer, - unsigned long count, void *data) -{ - int irq = (long) data, full_count = count, err; - unsigned long new_value; - - if (!irq_desc[irq].handler->set_affinity) - return -EIO; - - err = parse_hex_value(buffer, count, &new_value); - - /* - * Do not allow disabling IRQs completely - it's a too easy - * way to make the system unusable accidentally :-) At least - * one online CPU still has to be targeted. - */ - if (!(new_value & cpu_online_map)) - return -EINVAL; - - irq_affinity[irq] = new_value; - irq_desc[irq].handler->set_affinity(irq, new_value); - - return full_count; -} - -#endif - -static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - unsigned long *mask = (unsigned long *) data; - if (count < HEX_DIGITS+1) - return -EINVAL; - return sprintf (page, "%08lx\n", *mask); -} - -static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, - unsigned long count, void *data) -{ - unsigned long *mask = (unsigned long *) data, full_count = count, err; - unsigned long new_value; - - err = parse_hex_value(buffer, count, &new_value); - if (err) - return err; - - *mask = new_value; - return full_count; -} - -#define MAX_NAMELEN 10 - -static void register_irq_proc (unsigned int irq) -{ - char name [MAX_NAMELEN]; - - if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || - irq_dir[irq]) - return; - - memset(name, 0, MAX_NAMELEN); - sprintf(name, "%d", irq); - - /* create /proc/irq/1234 */ - irq_dir[irq] = proc_mkdir(name, root_irq_dir); - -#if CONFIG_SMP - { - struct proc_dir_entry *entry; - - /* create /proc/irq/1234/smp_affinity */ - entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); - - if (entry) { - entry->nlink = 1; - entry->data = (void *)(long)irq; - entry->read_proc = irq_affinity_read_proc; - entry->write_proc = irq_affinity_write_proc; - } - - smp_affinity_entry[irq] = entry; - } -#endif -} - -unsigned long prof_cpu_mask = -1; - -void init_irq_proc (void) -{ - struct proc_dir_entry *entry; - int i; - - /* create /proc/irq */ - root_irq_dir = proc_mkdir("irq", 0); - - /* create /proc/irq/prof_cpu_mask */ - entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); - - if (!entry) - return; - - entry->nlink = 1; - entry->data = (void *)&prof_cpu_mask; - entry->read_proc = prof_cpu_mask_read_proc; - entry->write_proc = prof_cpu_mask_write_proc; - - /* - * Create entries for all existing IRQs. - */ - for (i = 0; i < NR_IRQS; i++) - register_irq_proc(i); -} - diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/ldt.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/ldt.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,272 +0,0 @@ -/* - * linux/kernel/ldt.c - * - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx> - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> - -#include <asm/mmu_context.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/ldt.h> -#include <asm/desc.h> - -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ -static void flush_ldt(void *mm) -{ - if (current->active_mm) - load_LDT(&current->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt; - void *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - wmb(); - pc->ldt = newldt; - pc->size = mincount; - if (reload) { - make_pages_readonly( - pc->ldt, - (pc->size*LDT_ENTRY_SIZE)/PAGE_SIZE); - load_LDT(pc); -#ifdef CONFIG_SMP - if (current->mm->cpu_vm_mask != (1<<smp_processor_id())) - smp_call_function(flush_ldt, 0, 1, 1); -#endif - } - wmb(); - if (oldsize) { - make_pages_writable( - oldldt, (oldsize*LDT_ENTRY_SIZE)/PAGE_SIZE); - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - if (err < 0) { - printk(KERN_WARNING "ldt allocation failed\n"); - new->size = 0; - return err; - } - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); - make_pages_readonly(new->ldt, (new->size*LDT_ENTRY_SIZE)/PAGE_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct * old_mm; - int retval = 0; - - init_MUTEX(&mm->context.sem); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); - retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); - } - return retval; -} - -/* - * No need to lock the MM as we are the last user - * Do not touch the ldt register, we are already - * in the next thread. - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - make_pages_writable( - mm->context.ldt, - (mm->context.size*LDT_ENTRY_SIZE)/PAGE_SIZE); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct * mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - - down(&mm->context.sem); - size = mm->context.size*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - up(&mm->context.sem); - if (err < 0) - return err; - if (size != bytecount) { - /* zero-fill the rest */ - clear_user(ptr+size, bytecount-size); - } - return bytecount; -} - -static int read_default_ldt(void * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - void *address; - - err = 0; - address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); - if (size > bytecount) - size = bytecount; - - err = size; - if (copy_to_user(ptr, address, size)) - err = -EFAULT; - - return err; -} - -static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2, *lp; - unsigned long mach_lp; - int error; - struct modify_ldt_ldt_s ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - down(&mm->context.sem); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1); - if (error < 0) - goto out_unlock; - } - - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); - mach_lp = arbitrary_virt_to_machine(lp); - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || - (ldt_info.contents == 0 && - ldt_info.read_exec_only == 1 && - ldt_info.seg_32bit == 0 && - ldt_info.limit_in_pages == 0 && - ldt_info.seg_not_present == 1 && - ldt_info.useable == 0 )) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | - (ldt_info.limit & 0x0ffff); - entry_2 = (ldt_info.base_addr & 0xff000000) | - ((ldt_info.base_addr & 0x00ff0000) >> 16) | - (ldt_info.limit & 0xf0000) | - ((ldt_info.read_exec_only ^ 1) << 9) | - (ldt_info.contents << 10) | - ((ldt_info.seg_not_present ^ 1) << 15) | - (ldt_info.seg_32bit << 22) | - (ldt_info.limit_in_pages << 23) | - 0x7000; - if (!oldmode) - entry_2 |= (ldt_info.useable << 20); - - /* Install the new entry ... */ -install: - error = HYPERVISOR_update_descriptor(mach_lp, entry_1, entry_2); - -out_unlock: - up(&mm->context.sem); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/pci-pc.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/pci-pc.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,260 +0,0 @@ -/* - * Low-Level PCI Support for PC - * - * (c) 1999--2000 Martin Mares <mj@xxxxxx> - * - * Adjusted to use Xen's interface by Rolf Neugebauer, Intel Research Cambridge - * Further modifications by Keir Fraser, University of Cambridge - */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/pci.h> -#include <linux/init.h> -#include <linux/ioport.h> - -#include <asm/segment.h> -#include <asm/io.h> - -#include <asm-xen/xen-public/xen.h> -#include <asm-xen/xen-public/physdev.h> - -#include "pci-i386.h" - -/* - * NB. The following interface functions are not included here: - * 1. void eisa_set_level_irq(unsigned int irq) - * 2. irq_routing_table * __devinit pcibios_get_irq_routing_table(void) - * 3. int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) - * All are used by the ACPI driver. This should be ported to Xen if it is - * ever required -- Xen is the ultimate source for IRQ-routing knowledge. - */ - -struct pci_ops *pci_root_ops = NULL; - -int (*pci_config_read)(int seg, int bus, int dev, int fn, - int reg, int len, u32 *value) = NULL; -int (*pci_config_write)(int seg, int bus, int dev, int fn, - int reg, int len, u32 value) = NULL; - -unsigned int pci_probe = PCI_PROBE_BIOS; - -struct pci_fixup pcibios_fixups[] = { { 0 } }; - -static int pci_confx_read(int seg, int bus, int dev, int fn, int reg, - int len, u32 *value) -{ - int ret; - physdev_op_t op; - - if (bus > 255 || dev > 31 || fn > 7 || reg > 255) - return -EINVAL; - - op.cmd = PHYSDEVOP_PCI_CFGREG_READ; - op.u.pci_cfgreg_read.bus = bus; - op.u.pci_cfgreg_read.dev = dev; - op.u.pci_cfgreg_read.func = fn; - op.u.pci_cfgreg_read.reg = reg; - op.u.pci_cfgreg_read.len = len; - - if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) - return ret; - - *value = op.u.pci_cfgreg_read.value; - - return 0; -} - -static int pci_confx_write(int seg, int bus, int dev, int fn, int reg, - int len, u32 value) -{ - int ret; - physdev_op_t op; - - if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) - return -EINVAL; - - op.cmd = PHYSDEVOP_PCI_CFGREG_WRITE; - op.u.pci_cfgreg_write.bus = bus; - op.u.pci_cfgreg_write.dev = dev; - op.u.pci_cfgreg_write.func = fn; - op.u.pci_cfgreg_write.reg = reg; - op.u.pci_cfgreg_write.len = len; - op.u.pci_cfgreg_write.value = value; - - if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) - return ret; - return 0; -} - - -static int pci_confx_read_config_byte(struct pci_dev *dev, - int where, u8 *value) -{ - int result; - u32 data; - - result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, &data); - - *value = (u8)data; - - return result; -} - -static int pci_confx_read_config_word(struct pci_dev *dev, - int where, u16 *value) -{ - int result; - u32 data; - - result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, &data); - - *value = (u16)data; - - return result; -} - -static int pci_confx_read_config_dword(struct pci_dev *dev, - int where, u32 *value) -{ - return pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static int pci_confx_write_config_byte(struct pci_dev *dev, - int where, u8 value) -{ - return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, value); -} - -static int pci_confx_write_config_word(struct pci_dev *dev, - int where, u16 value) -{ - return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, value); -} - -static int pci_confx_write_config_dword(struct pci_dev *dev, - int where, u32 value) -{ - return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static struct pci_ops pci_conf_xen = { - pci_confx_read_config_byte, - pci_confx_read_config_word, - pci_confx_read_config_dword, - pci_confx_write_config_byte, - pci_confx_write_config_word, - pci_confx_write_config_dword -}; - -void pcibios_penalize_isa_irq(int irq) -{ - /* nothing */ -} - -void __devinit pcibios_fixup_bus(struct pci_bus *b) -{ - pci_read_bridge_bases(b); -} - -struct pci_bus * __devinit pcibios_scan_root(int busnum) -{ - struct list_head *list; - struct pci_bus *bus; - - list_for_each ( list, &pci_root_buses ) - { - bus = pci_bus_b(list); - if ( bus->number == busnum ) - return bus; - } - - printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); - return pci_scan_bus(busnum, pci_root_ops, NULL); -} - -void __init pcibios_init(void) -{ - int bus; - physdev_op_t op; - - if ( !pci_probe ) - return; - - pci_root_ops = &pci_conf_xen; - pci_config_read = pci_confx_read; - pci_config_write = pci_confx_write; - - pcibios_set_cacheline_size(); - - op.cmd = PHYSDEVOP_PCI_PROBE_ROOT_BUSES; - if ( HYPERVISOR_physdev_op(&op) != 0 ) - { - printk(KERN_WARNING "PCI: System does not support PCI\n"); - return; - } - - printk(KERN_INFO "PCI: Probing PCI hardware\n"); - for ( bus = 0; bus < 256; bus++ ) - if ( test_bit(bus, &op.u.pci_probe_root_buses.busmask[0]) ) - (void)pcibios_scan_root(bus); - - pcibios_resource_survey(); -} - -char * __devinit pcibios_setup(char *str) -{ - if ( !strcmp(str, "off") ) - pci_probe = 0; - return NULL; -} - -unsigned int pcibios_assign_all_busses(void) -{ - return 0; -} - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - int err; - u8 pin; - physdev_op_t op; - - /* Inform Xen that we are going to use this device. */ - op.cmd = PHYSDEVOP_PCI_INITIALISE_DEVICE; - op.u.pci_initialise_device.bus = dev->bus->number; - op.u.pci_initialise_device.dev = PCI_SLOT(dev->devfn); - op.u.pci_initialise_device.func = PCI_FUNC(dev->devfn); - if ( (err = HYPERVISOR_physdev_op(&op)) != 0 ) - return err; - - /* Now we can bind to the very final IRQ line. */ - pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &pin); - dev->irq = pin; - - /* Turn on device I/O and memory access as necessary. */ - if ( (err = pcibios_enable_resources(dev, mask)) < 0 ) - return err; - - /* Sanity-check that an interrupt-producing device is routed to an IRQ. */ - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if ( pin != 0 ) - { - if ( dev->irq != 0 ) - printk(KERN_INFO "PCI: Obtained IRQ %d for device %s\n", - dev->irq, dev->slot_name); - else - printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of " - "device %s.\n", 'A' + pin - 1, dev->slot_name); - } - - return 0; -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/process.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/process.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,448 +0,0 @@ -/* - * linux/arch/i386/kernel/process.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 - */ - -/* - * This file handles the architecture-dependent parts of process handling.. - */ - -#define __KERNEL_SYSCALLS__ -#include <stdarg.h> - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/user.h> -#include <linux/a.out.h> -#include <linux/interrupt.h> -#include <linux/config.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/init.h> -#include <linux/mc146818rtc.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/ldt.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/desc.h> -#include <asm/mmu_context.h> -#include <asm-xen/xen-public/physdev.h> - -#include <linux/irq.h> - -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); - -int hlt_counter; - -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); - -/* - * Power off function, if any - */ -void (*pm_power_off)(void); - -void disable_hlt(void) -{ - hlt_counter++; -} - -void enable_hlt(void) -{ - hlt_counter--; -} - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle (void) -{ - extern int set_timeout_timer(void); - - /* Endless idle loop with no priority at all. */ - init_idle(); - current->nice = 20; - current->counter = -100; - - for ( ; ; ) - { - while ( !current->need_resched ) - { - __cli(); - if ( current->need_resched ) - { - /* The race-free check for events failed. */ - __sti(); - break; - } - else if ( set_timeout_timer() == 0 ) - { - /* NB. Blocking reenable events in a race-free manner. */ - HYPERVISOR_block(); - } - else - { - /* No race here: yielding will get us the CPU again anyway. */ - __sti(); - HYPERVISOR_yield(); - } - } - schedule(); - check_pgt_cache(); - } -} - -extern void show_trace(unsigned long* esp); - -void show_regs(struct pt_regs * regs) -{ - printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); - if (regs->xcs & 2) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s\n",regs->eflags, print_tainted()); - printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes); - - show_trace(&regs->esp); -} - - -/* - * Create a kernel thread - */ -int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - long retval, d0; - - __asm__ __volatile__( - "movl %%esp,%%esi\n\t" - "int $0x80\n\t" /* Linux/i386 system call */ - "cmpl %%esp,%%esi\n\t" /* child or parent? */ - "je 1f\n\t" /* parent - jump */ - /* Load the argument into eax, and push it. That way, it does - * not matter whether the called function is compiled with - * -mregparm or not. */ - "movl %4,%%eax\n\t" - "pushl %%eax\n\t" - "call *%5\n\t" /* call fn */ - "movl %3,%0\n\t" /* exit */ - "int $0x80\n" - "1:\t" - :"=&a" (retval), "=&S" (d0) - :"0" (__NR_clone), "i" (__NR_exit), - "r" (arg), "r" (fn), - "b" (flags | CLONE_VM) - : "memory"); - - return retval; -} - -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ - /* nothing to do ... */ -} - -void flush_thread(void) -{ - struct task_struct *tsk = current; - - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); - - /* - * Forget coprocessor state.. - */ - clear_fpu(tsk); - tsk->used_math = 0; -} - -void release_thread(struct task_struct *dead_task) -{ - if (dead_task->mm) { - // temporary debugging check - if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%08x>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); - BUG(); - } - } - //release_x86_irqs(dead_task); -} - - -/* - * Save a segment. - */ -#define savesegment(seg,value) \ - asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value))) - -int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, - unsigned long unused, - struct task_struct * p, struct pt_regs * regs) -{ - struct pt_regs * childregs; - - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; - struct_cpy(childregs, regs); - childregs->eax = 0; - childregs->esp = esp; - - p->thread.esp = (unsigned long) childregs; - p->thread.esp0 = (unsigned long) (childregs+1); - - p->thread.eip = (unsigned long) ret_from_fork; - - savesegment(fs,p->thread.fs); - savesegment(gs,p->thread.gs); - - unlazy_fpu(current); - struct_cpy(&p->thread.i387, &current->thread.i387); - - p->thread.io_pl = current->thread.io_pl; - - return 0; -} - -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - int i; - -/* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - for (i = 0; i < 8; i++) - dump->u_debugreg[i] = current->thread.debugreg[i]; - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - - dump->regs.ebx = regs->ebx; - dump->regs.ecx = regs->ecx; - dump->regs.edx = regs->edx; - dump->regs.esi = regs->esi; - dump->regs.edi = regs->edi; - dump->regs.ebp = regs->ebp; - dump->regs.eax = regs->eax; - dump->regs.ds = regs->xds; - dump->regs.es = regs->xes; - savesegment(fs,dump->regs.fs); - savesegment(gs,dump->regs.gs); - dump->regs.orig_eax = regs->orig_eax; - dump->regs.eip = regs->eip; - dump->regs.cs = regs->xcs; - dump->regs.eflags = regs->eflags; - dump->regs.esp = regs->esp; - dump->regs.ss = regs->xss; - - dump->u_fpvalid = dump_fpu (regs, &dump->i387); -} - -/* - * switch_to(x,yn) should switch tasks from x to y. - * - * We fsave/fwait so that an exception goes off at the right time - * (as a call from the fsave or fwait in effect) rather than to - * the wrong process. Lazy FP saving no longer makes any sense - * with modern CPU's, and this simplifies a lot of things (SMP - * and UP become the same). - * - * NOTE! We used to use the x86 hardware context switching. The - * reason for not using it any more becomes apparent when you - * try to recover gracefully from saved state that is no longer - * valid (stale segment register values in particular). With the - * hardware task-switch, there is no way to fix up bad state in - * a reasonable manner. - * - * The fact that Intel documents the hardware task-switching to - * be slow is a fairly red herring - this code is not noticeably - * faster. However, there _is_ some room for improvement here, - * so the performance issues may eventually be a valid point. - * More important, however, is the fact that this allows us much - * more flexibility. - */ -void fastcall __switch_to(struct task_struct *prev_p, struct task_struct *next_p) -{ - struct thread_struct *next = &next_p->thread; - physdev_op_t op; - multicall_entry_t _mcl[8], *mcl = _mcl; - - /* - * This is basically 'unlazy_fpu', except that we queue a multicall to - * indicate FPU task switch, rather than synchronously trapping to Xen. - */ - if ( prev_p->flags & PF_USEDFPU ) - { - if ( cpu_has_fxsr ) - asm volatile( "fxsave %0 ; fnclex" - : "=m" (prev_p->thread.i387.fxsave) ); - else - asm volatile( "fnsave %0 ; fwait" - : "=m" (prev_p->thread.i387.fsave) ); - prev_p->flags &= ~PF_USEDFPU; - mcl->op = __HYPERVISOR_fpu_taskswitch; - mcl->args[0] = 1; - mcl++; - } - - mcl->op = __HYPERVISOR_stack_switch; - mcl->args[0] = __KERNEL_DS; - mcl->args[1] = next->esp0; - mcl++; - - if ( prev_p->thread.io_pl != next->io_pl ) - { - op.cmd = PHYSDEVOP_SET_IOPL; - op.u.set_iopl.iopl = next->io_pl; - mcl->op = __HYPERVISOR_physdev_op; - mcl->args[0] = (unsigned long)&op; - mcl++; - } - - (void)HYPERVISOR_multicall(_mcl, mcl - _mcl); - - /* - * Restore %fs and %gs. - */ - loadsegment(fs, next->fs); - loadsegment(gs, next->gs); - - /* - * Now maybe reload the debug registers - */ - if ( next->debugreg[7] != 0 ) - { - HYPERVISOR_set_debugreg(0, next->debugreg[0]); - HYPERVISOR_set_debugreg(1, next->debugreg[1]); - HYPERVISOR_set_debugreg(2, next->debugreg[2]); - HYPERVISOR_set_debugreg(3, next->debugreg[3]); - /* no 4 and 5 */ - HYPERVISOR_set_debugreg(6, next->debugreg[6]); - HYPERVISOR_set_debugreg(7, next->debugreg[7]); - } -} - -asmlinkage int sys_fork(struct pt_regs regs) -{ - return do_fork(SIGCHLD, regs.esp, &regs, 0); -} - -asmlinkage int sys_clone(struct pt_regs regs) -{ - unsigned long clone_flags; - unsigned long newsp; - - clone_flags = regs.ebx; - newsp = regs.ecx; - if (!newsp) - newsp = regs.esp; - return do_fork(clone_flags, newsp, &regs, 0); -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -asmlinkage int sys_vfork(struct pt_regs regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0); -} - -/* - * sys_execve() executes a new program. - */ -asmlinkage int sys_execve(struct pt_regs regs) -{ - int error; - char * filename; - - filename = getname((char *) regs.ebx); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs); - if (error == 0) - current->ptrace &= ~PT_DTRACE; - putname(filename); - out: - return error; -} - -/* - * These bracket the sleeping functions.. - */ -extern void scheduling_functions_start_here(void); -extern void scheduling_functions_end_here(void); -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long ebp, esp, eip; - unsigned long stack_page; - int count = 0; - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack_page = (unsigned long)p; - esp = p->thread.esp; - if (!stack_page || esp < stack_page || esp > 8188+stack_page) - return 0; - /* include/asm-i386/system.h:switch_to() pushes ebp last. */ - ebp = *(unsigned long *) esp; - do { - if (ebp < stack_page || ebp > 8184+stack_page) - return 0; - eip = *(unsigned long *) (ebp+4); - if (eip < first_sched || eip >= last_sched) - return eip; - ebp = *(unsigned long *) ebp; - } while (count++ < 16); - return 0; -} -#undef last_sched -#undef first_sched diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/setup.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/setup.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,1213 +0,0 @@ -/* - * linux/arch/i386/kernel/setup.c - * - * Copyright (C) 1995 Linus Torvalds - */ - -/* - * This file handles the architecture-dependent parts of initialization - */ - -#define __KERNEL_SYSCALLS__ -static int errno; -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/slab.h> -#include <linux/user.h> -#include <linux/a.out.h> -#include <linux/tty.h> -#include <linux/ioport.h> -#include <linux/delay.h> -#include <linux/config.h> -#include <linux/init.h> -#include <linux/apm_bios.h> -#ifdef CONFIG_BLK_DEV_RAM -#include <linux/blk.h> -#endif -#include <linux/highmem.h> -#include <linux/bootmem.h> -#include <linux/seq_file.h> -#include <linux/reboot.h> -#include <asm/processor.h> -#include <linux/console.h> -#include <linux/module.h> -#include <asm/mtrr.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/msr.h> -#include <asm/desc.h> -#include <asm/dma.h> -#include <asm/mpspec.h> -#include <asm/mmu_context.h> -#include <asm/ctrl_if.h> -#include <asm/hypervisor.h> -#include <asm-xen/xen-public/physdev.h> -#include <linux/netdevice.h> -#include <linux/rtnetlink.h> -#include <linux/tqueue.h> -#include <net/pkt_sched.h> /* dev_(de)activate */ - -/* - * Point at the empty zero page to start with. We map the real shared_info - * page as soon as fixmap is up and running. - */ -shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; - -unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list; - -/* - * Machine setup.. - */ - -char ignore_irq13; /* set if exception 16 works */ -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; - -unsigned long mmu_cr4_features; - -unsigned char * vgacon_mmap; - -/* - * Bus types .. - */ -#ifdef CONFIG_EISA -int EISA_bus; -#endif -int MCA_bus; - -/* for MCA, but anyone else can use it if they want */ -unsigned int machine_id; -unsigned int machine_submodel_id; -unsigned int BIOS_revision; -unsigned int mca_pentium_flag; - -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; - -/* - * Setup options - */ -struct drive_info_struct { char dummy[32]; } drive_info; -struct screen_info screen_info; -struct apm_info apm_info; -struct sys_desc_table_struct { - unsigned short length; - unsigned char table[0]; -}; - -unsigned char aux_device_present; - -extern int root_mountflags; -extern char _text, _etext, _edata, _end; - -extern int blk_nohighio; - -int enable_acpi_smp_table; - -/* Raw start-of-day parameters from the hypervisor. */ -union xen_start_info_union xen_start_info_union; - -#define COMMAND_LINE_SIZE MAX_GUEST_CMDLINE -static char command_line[COMMAND_LINE_SIZE]; -char saved_command_line[COMMAND_LINE_SIZE]; - -/* parse_mem_cmdline() - * returns the value of the mem= boot param converted to pages or 0 - */ -static int __init parse_mem_cmdline (char ** cmdline_p) -{ - char c = ' ', *to = command_line, *from = saved_command_line; - int len = 0; - unsigned long long bytes; - int mem_param = 0; - - /* Save unparsed command line copy for /proc/cmdline */ - memcpy(saved_command_line, xen_start_info.cmd_line, COMMAND_LINE_SIZE); - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; - - for (;;) { - /* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to <mem>, overriding the bios size. - * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * <start> to <start>+<mem>, overriding the bios size. - */ - if (c == ' ' && !memcmp(from, "mem=", 4)) { - if (to != command_line) - to--; - if (!memcmp(from+4, "nopentium", 9)) { - from += 9+4; - } else if (!memcmp(from+4, "exactmap", 8)) { - from += 8+4; - } else { - bytes = memparse(from+4, &from); - mem_param = bytes>>PAGE_SHIFT; - if (*from == '@') - (void)memparse(from+1, &from); - } - } - - c = *(from++); - if (!c) - break; - if (COMMAND_LINE_SIZE <= ++len) - break; - *(to++) = c; - } - *to = '\0'; - *cmdline_p = command_line; - - return mem_param; -} - -/* - * Every exception-fixup table is sorted (i.e., kernel main table, and every - * module table. Some elements may be out of order if they reference text.init, - * for example. - */ -static void sort_exception_table(struct exception_table_entry *start, - struct exception_table_entry *end) -{ - struct exception_table_entry *p, *q, tmp; - - for ( p = start; p < end; p++ ) - { - for ( q = p-1; q > start; q-- ) - if ( p->insn > q->insn ) - break; - if ( ++q != p ) - { - tmp = *p; - memmove(q+1, q, (p-q)*sizeof(*p)); - *q = tmp; - } - } -} - -int xen_module_init(struct module *mod) -{ - sort_exception_table(mod->ex_table_start, mod->ex_table_end); - return 0; -} - -void __init setup_arch(char **cmdline_p) -{ - int i,j; - unsigned long bootmap_size, start_pfn, lmax_low_pfn; - int mem_param; /* user specified memory size in pages */ - int boot_pfn; /* low pages available for bootmem */ - physdev_op_t op; - - extern void hypervisor_callback(void); - extern void failsafe_callback(void); - - extern unsigned long cpu0_pte_quicklist[]; - extern unsigned long cpu0_pgd_quicklist[]; - - extern const struct exception_table_entry __start___ex_table[]; - extern const struct exception_table_entry __stop___ex_table[]; - - extern char _stext; - - /* Force a quick death if the kernel panics. */ - extern int panic_timeout; - if ( panic_timeout == 0 ) - panic_timeout = 1; - - /* Ensure that the kernel exception-fixup table is sorted. */ - sort_exception_table(__start___ex_table, __stop___ex_table); - -#ifndef CONFIG_HIGHIO - blk_nohighio = 1; -#endif - - HYPERVISOR_vm_assist( - VMASST_CMD_enable, VMASST_TYPE_4gb_segments); - HYPERVISOR_vm_assist( - VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); - - HYPERVISOR_set_callbacks( - __KERNEL_CS, (unsigned long)hypervisor_callback, - __KERNEL_CS, (unsigned long)failsafe_callback); - - boot_cpu_data.pgd_quick = cpu0_pgd_quicklist; - boot_cpu_data.pte_quick = cpu0_pte_quicklist; - - /* This must be initialized to UNNAMED_MAJOR for ipconfig to work - properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd. */ - ROOT_DEV = MKDEV(UNNAMED_MAJOR,0); - memset(&drive_info, 0, sizeof(drive_info)); - memset(&screen_info, 0, sizeof(screen_info)); - - /* This is drawn from a dump from vgacon:startup in standard Linux. */ - screen_info.orig_video_mode = 3; - screen_info.orig_video_isVGA = 1; - screen_info.orig_video_lines = 25; - screen_info.orig_video_cols = 80; - screen_info.orig_video_ega_bx = 3; - screen_info.orig_video_points = 16; - - memset(&apm_info.bios, 0, sizeof(apm_info.bios)); - aux_device_present = 0; -#ifdef CONFIG_BLK_DEV_RAM - rd_image_start = 0; - rd_prompt = 0; - rd_doload = 0; -#endif - - root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) &_text; - init_mm.end_code = (unsigned long) &_etext; - init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; - - /* The mem= kernel command line param overrides the detected amount - * of memory. For xenolinux, if this override is larger than detected - * memory, then boot using only detected memory and make provisions to - * use all of the override value. The hypervisor can give this - * domain more memory later on and it will be added to the free - * lists at that time. See claim_new_pages() in - * arch/xen/drivers/balloon/balloon.c - */ - mem_param = parse_mem_cmdline(cmdline_p); - if (mem_param < xen_start_info.nr_pages) - mem_param = xen_start_info.nr_pages; - -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) - -/* - * 128MB for vmalloc(), iomap(), kmap(), and fixaddr mappings. - */ -#define VMALLOC_RESERVE (unsigned long)(128 << 20) -#define MAXMEM (unsigned long)(HYPERVISOR_VIRT_START-PAGE_OFFSET-VMALLOC_RESERVE) -#define MAXMEM_PFN PFN_DOWN(MAXMEM) -#define MAX_NONPAE_PFN (1 << 20) - - /* - * Determine low and high memory ranges: - */ - lmax_low_pfn = max_pfn = mem_param; - if (lmax_low_pfn > MAXMEM_PFN) { - lmax_low_pfn = MAXMEM_PFN; -#ifndef CONFIG_HIGHMEM - /* Maximum memory usable is what is directly addressable */ - printk(KERN_WARNING "Warning only %ldMB will be used.\n", - MAXMEM>>20); - if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); - else - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); - max_pfn = lmax_low_pfn; -#else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_X86_PAE - if (max_pfn > MAX_NONPAE_PFN) { - max_pfn = MAX_NONPAE_PFN; - printk(KERN_WARNING "Warning only 4GB will be used.\n"); - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); - } -#endif /* !CONFIG_X86_PAE */ -#endif /* !CONFIG_HIGHMEM */ - } - -#ifdef CONFIG_HIGHMEM - highstart_pfn = highend_pfn = max_pfn; - if (max_pfn > MAXMEM_PFN) { - highstart_pfn = MAXMEM_PFN; - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - } -#endif - - phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list; - cur_pgd = init_mm.pgd = (pgd_t *)xen_start_info.pt_base; - - start_pfn = (__pa(xen_start_info.pt_base) >> PAGE_SHIFT) + - xen_start_info.nr_pt_frames; - - /* - * Initialize the boot-time allocator, and free up all RAM. Then reserve - * space for OS image, initrd, phys->machine table, bootstrap page table, - * and the bootmem bitmap. - * NB. There is definitely enough room for the bootmem bitmap in the - * bootstrap page table. We are guaranteed to get >=512kB unused 'padding' - * for our own use after all bootstrap elements - * (see asm-xen/xen-public/xen.h). - */ - boot_pfn = min((int)xen_start_info.nr_pages,lmax_low_pfn); - bootmap_size = init_bootmem(start_pfn,boot_pfn); - free_bootmem(0, PFN_PHYS(boot_pfn)); - reserve_bootmem(__pa(&_stext), - PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1 - - __pa(&_stext)); - - /* init_bootmem() set the global max_low_pfn to boot_pfn. Now max_low_pfn - * can be set to the override value. - */ - max_low_pfn = lmax_low_pfn; - -#ifdef CONFIG_BLK_DEV_INITRD - if ( xen_start_info.mod_start != 0 ) - { - if ( (__pa(xen_start_info.mod_start) + xen_start_info.mod_len) <= - (max_low_pfn << PAGE_SHIFT) ) - { - initrd_start = xen_start_info.mod_start; - initrd_end = initrd_start + xen_start_info.mod_len; - initrd_below_start_ok = 1; - } - else - { - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - __pa(xen_start_info.mod_start) + xen_start_info.mod_len, - max_low_pfn << PAGE_SHIFT); - initrd_start = 0; - } - } -#endif - - paging_init(); - - /* Make sure we have a correctly sized P->M table. */ - if ( max_pfn != xen_start_info.nr_pages ) - { - phys_to_machine_mapping = alloc_bootmem_low_pages( - max_pfn * sizeof(unsigned long)); - if ( max_pfn > xen_start_info.nr_pages ) - { - memset(phys_to_machine_mapping, ~0, - max_pfn * sizeof(unsigned long)); - memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - xen_start_info.nr_pages * sizeof(unsigned long)); - } - else - { - memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - max_pfn * sizeof(unsigned long)); - if (HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, - (unsigned long *)xen_start_info.mfn_list + max_pfn, - xen_start_info.nr_pages - max_pfn, 0) != - (xen_start_info.nr_pages - max_pfn)) - BUG(); - } - free_bootmem(__pa(xen_start_info.mfn_list), - PFN_PHYS(PFN_UP(xen_start_info.nr_pages * - sizeof(unsigned long)))); - } - - pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); - for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) - { - pfn_to_mfn_frame_list[j] = - virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT; - } - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = - virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; - - op.cmd = PHYSDEVOP_SET_IOPL; - op.u.set_iopl.iopl = current->thread.io_pl = 1; - HYPERVISOR_physdev_op(&op); - - if (xen_start_info.flags & SIF_INITDOMAIN ) - { - if( !(xen_start_info.flags & SIF_PRIVILEGED) ) - panic("Xen granted us console access but not privileged status"); - -#if defined(CONFIG_VT) -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif -#endif - } -} - -static int cachesize_override __initdata = -1; -static int __init cachesize_setup(char *str) -{ - get_option (&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -static int __init highio_setup(char *str) -{ - printk("i386: disabling HIGHMEM block I/O\n"); - blk_nohighio = 1; - return 1; -} -__setup("nohighio", highio_setup); - -static int __init get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (cpuid_eax(0x80000000) < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while ( *p == ' ' ) - p++; - if ( p != q ) { - while ( *p ) - *q++ = *p++; - while ( q <= &c->x86_model_id[48] ) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - - -static void __init display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ecx, edx, l2size; - - n = cpuid_eax(0x80000000); - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size=(ecx>>24)+(edx>>24); - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - ecx = cpuid_ecx(0x80000006); - l2size = ecx >> 16; - - /* AMD errata T13 (order #21922) */ - if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - l2size = 64; - if (c->x86_model == 4 && - (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ - l2size = 256; - } - - /* Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) && - (c->x86_model == 11) && (l2size == 0)) - l2size = 256; - - if (c->x86_vendor == X86_VENDOR_CENTAUR) { - /* VIA C3 CPUs (670-68F) need further shifting. */ - if ((c->x86 == 6) && - ((c->x86_model == 7) || (c->x86_model == 8))) { - l2size >>= 8; - } - - /* VIA also screwed up Nehemiah stepping 1, and made - it return '65KB' instead of '64KB' - - Note, it seems this may only be in engineering samples. */ - if ((c->x86==6) && (c->x86_model==9) && - (c->x86_mask==1) && (l2size==65)) - l2size -= 1; - } - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if ( l2size == 0 ) - return; /* Again, no L2 cache is possible */ - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -static void __init init_c3(struct cpuinfo_x86 *c) -{ - /* Test for Centaur Extended Feature Flags presence */ - if (cpuid_eax(0xC0000000) >= 0xC0000001) { - /* store Centaur Extended Feature Flags as - * word 5 of the CPU capability bit array - */ - c->x86_capability[5] = cpuid_edx(0xC0000001); - } - - switch (c->x86_model) { - case 9: /* Nehemiah */ - default: - get_model_name(c); - display_cacheinfo(c); - break; - } -} - -static void __init init_centaur(struct cpuinfo_x86 *c) -{ - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*3231) anyway */ - clear_bit(0*32+31, &c->x86_capability); - - switch (c->x86) { - case 6: - init_c3(c); - break; - default: - panic("Unsupported Centaur CPU (%i)\n", c->x86); - } -} - -static int __init init_amd(struct cpuinfo_x86 *c) -{ - int r; - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, &c->x86_capability); - - r = get_model_name(c); - - switch(c->x86) - { - case 5: /* We don't like AMD K6 */ - panic("Unsupported AMD processor\n"); - case 6: /* An Athlon/Duron. We can trust the BIOS probably */ - break; - } - - display_cacheinfo(c); - return r; -} - - -static void __init init_intel(struct cpuinfo_x86 *c) -{ - char *p = NULL; - unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ - - if (c->cpuid_level > 1) { - /* supports eax=2 call */ - int i, j, n; - int regs[4]; - unsigned char *dp = (unsigned char *)regs; - - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; - - for ( i = 0 ; i < n ; i++ ) { - cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]); - - /* If bit 31 is set, this is an unknown format */ - for ( j = 0 ; j < 3 ; j++ ) { - if ( regs[j] < 0 ) regs[j] = 0; - } - - /* Byte 0 is level count, not a descriptor */ - for ( j = 1 ; j < 16 ; j++ ) { - unsigned char des = dp[j]; - unsigned char dl, dh; - unsigned int cs; - - dh = des >> 4; - dl = des & 0x0F; - - /* Black magic... */ - - switch ( dh ) - { - case 0: - switch ( dl ) { - case 6: - /* L1 I cache */ - l1i += 8; - break; - case 8: - /* L1 I cache */ - l1i += 16; - break; - case 10: - /* L1 D cache */ - l1d += 8; - break; - case 12: - /* L1 D cache */ - l1d += 16; - break; - default:; - /* TLB, or unknown */ - } - break; - case 2: - if ( dl ) { - /* L3 cache */ - cs = (dl-1) << 9; - l3 += cs; - } - break; - case 4: - if ( c->x86 > 6 && dl ) { - /* P4 family */ - /* L3 cache */ - cs = 128 << (dl-1); - l3 += cs; - break; - } - /* else same as 8 - fall through */ - case 8: - if ( dl ) { - /* L2 cache */ - cs = 128 << (dl-1); - l2 += cs; - } - break; - case 6: - if (dl > 5) { - /* L1 D cache */ - cs = 8<<(dl-6); - l1d += cs; - } - break; - case 7: - if ( dl >= 8 ) - { - /* L2 cache */ - cs = 64<<(dl-8); - l2 += cs; - } else { - /* L0 I cache, count as L1 */ - cs = dl ? (16 << (dl-1)) : 12; - l1i += cs; - } - break; - default: - /* TLB, or something else we don't know about */ - break; - } - } - } - if ( l1i || l1d ) - printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n", - l1i, l1d); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - - /* - * This assumes the L3 cache is shared; it typically lives in - * the northbridge. The L1 caches are included by the L2 - * cache, and so should not be included for the purpose of - * SMP switching weights. - */ - c->x86_cache_size = l2 ? l2 : (l1i+l1d); - } - - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ - if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) - clear_bit(X86_FEATURE_SEP, &c->x86_capability); - - /* Names for the Pentium II/Celeron processors - detectable only by also checking the cache size. - Dixon is NOT a Celeron. */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (l2 == 0) - p = "Celeron (Covington)"; - if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if ( p ) - strcpy(c->x86_model_id, p); -} - -void __init get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - - if (!strcmp(v, "GenuineIntel")) - c->x86_vendor = X86_VENDOR_INTEL; - else if (!strcmp(v, "AuthenticAMD")) - c->x86_vendor = X86_VENDOR_AMD; - else if (!strcmp(v, "CentaurHauls")) - c->x86_vendor = X86_VENDOR_CENTAUR; - else - c->x86_vendor = X86_VENDOR_UNKNOWN; -} - -struct cpu_model_info { - int vendor; - int family; - char *model_names[16]; -}; - -/* Naming convention should be: <Name> [(<Codename>)] */ -/* This table only is used unless init_<vendor>() below doesn't set it; */ -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ -static struct cpu_model_info cpu_models[] __initdata = { - { X86_VENDOR_INTEL, 6, - { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", - NULL, "Pentium II (Deschutes)", "Mobile Pentium II", - "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL, - "Pentium III (Cascades)", NULL, NULL, NULL, NULL }}, - { X86_VENDOR_AMD, 6, /* Is this this really necessary?? */ - { "Athlon", "Athlon", - "Athlon", NULL, "Athlon", NULL, - NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL }} -}; - -/* Look up CPU names by table lookup. */ -static char __init *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info = cpu_models; - int i; - - if ( c->x86_model >= 16 ) - return NULL; /* Range check */ - - for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) { - if ( info->vendor == c->x86_vendor && - info->family == c->x86 ) { - return info->model_names[c->x86_model]; - } - info++; - } - return NULL; /* Not found */ -} - - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __init have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - - - -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) -unsigned char eddnr; -struct edd_info edd[EDDMAXNR]; -unsigned int edd_disk80_sig; -/** - * copy_edd() - Copy the BIOS EDD information - * from empty_zero_page into a safe place. - * - */ -static inline void copy_edd(void) -{ - eddnr = EDD_NR; - memcpy(edd, EDD_BUF, sizeof(edd)); - edd_disk80_sig = DISK80_SIGNATURE_BUFFER; -} -#else -static inline void copy_edd(void) {} -#endif - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -void __init identify_cpu(struct cpuinfo_x86 *c) -{ - int junk, i; - u32 xlvl, tfms; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - memset(&c->x86_capability, 0, sizeof c->x86_capability); - c->hard_math = 1; - - if ( !have_cpuid_p() ) { - panic("Processor must support CPUID\n"); - } else { - /* CPU does have CPUID */ - - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, - (int *)&c->x86_vendor_id[0], - (int *)&c->x86_vendor_id[8], - (int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - - /* Intel-defined flags: level 0x00000001 */ - if ( c->cpuid_level >= 0x00000001 ) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { - c->x86 += (tfms >> 20) & 0xff; - c->x86_model += ((tfms >> 16) & 0xF) << 4; - } - c->x86_mask = tfms & 15; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) - c->x86_capability[1] = cpuid_edx(0x80000001); - if ( xlvl >= 0x80000004 ) - get_model_name(c); /* Default name */ - } - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ( (xlvl & 0xffff0000) == 0x80860000 ) { - if ( xlvl >= 0x80860001 ) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - } - - printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_vendor); - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - switch ( c->x86_vendor ) { - case X86_VENDOR_AMD: - init_amd(c); - break; - - case X86_VENDOR_INTEL: - init_intel(c); - break; - - case X86_VENDOR_CENTAUR: - init_centaur(c); - break; - - default: - printk("Unsupported CPU vendor (%d) -- please report!\n", - c->x86_vendor); - } - - printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); - - - /* If the model name is still unset, do table lookup. */ - if ( !c->x86_model_id[0] ) { - char *p; - p = table_lookup_model(c); - if ( p ) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86_vendor, c->x86_model); - } - - /* Now the feature flags better reflect actual CPU features! */ - - printk(KERN_DEBUG "CPU: After generic, caps: %08x %08x %08x %08x\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if ( c != &boot_cpu_data ) { - /* AND the already accumulated flags with these */ - for ( i = 0 ; i < NCAPINTS ; i++ ) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - printk(KERN_DEBUG "CPU: Common caps: %08x %08x %08x %08x\n", - boot_cpu_data.x86_capability[0], - boot_cpu_data.x86_capability[1], - boot_cpu_data.x86_capability[2], - boot_cpu_data.x86_capability[3]); -} - - -/* These need to match <asm/processor.h> */ -static char *cpu_vendor_names[] __initdata = { - "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta" }; - - -void __init print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *)) - vendor = cpu_vendor_names[c->x86_vendor]; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); - - if (!c->x86_model_id[0]) - printk("%d86", c->x86); - else - printk("%s", c->x86_model_id); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); - else - printk("\n"); -} - -/* - * Get CPU information for use by the procfs. - */ -static int show_cpuinfo(struct seq_file *m, void *v) -{ - /* - * These flag bits must match the definitions in <asm/cpufeature.h>. - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu/<cpu_nr>/cpuid instead. - */ - static char *x86_cap_flags[] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", NULL, NULL, "mmxext", NULL, - NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "tm2", - "est", NULL, "cid", NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "xstore", NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - }; - struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; - int fpu_exception; - -#ifdef CONFIG_SMP - if (!(cpu_online_map & (1<<n))) - return 0; -#endif - seq_printf(m, "processor\t: %d\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" - "model\t\t: %d\n" - "model name\t: %s\n", - n, - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); - else - seq_printf(m, "stepping\t: unknown\n"); - - if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", - cpu_khz / 1000, (cpu_khz % 1000)); - } - - /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); - - /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ - fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu); - seq_printf(m, "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" - "f00f_bug\t: %s\n" - "coma_bug\t: %s\n" - "fpu\t\t: %s\n" - "fpu_exception\t: %s\n" - "cpuid level\t: %d\n" - "wp\t\t: %s\n" - "flags\t\t:", - c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", - c->f00f_bug ? "yes" : "no", - c->coma_bug ? "yes" : "no", - c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", - c->cpuid_level, - c->wp_works_ok ? "yes" : "no"); - - for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, &c->x86_capability) && - x86_cap_flags[i] != NULL ) - seq_printf(m, " %s", x86_cap_flags[i]); - - seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < NR_CPUS ? cpu_data + *pos : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) -{ -} -struct seq_operations cpuinfo_op = { - start: c_start, - next: c_next, - stop: c_stop, - show: show_cpuinfo, -}; - -unsigned long cpu_initialized __initdata = 0; - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - */ -void __init cpu_init (void) -{ - int nr = smp_processor_id(); - - if (test_and_set_bit(nr, &cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", nr); - for (;;) __sti(); - } - printk(KERN_INFO "Initializing CPU#%d\n", nr); - - /* - * set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - if(current->mm) - BUG(); - enter_lazy_tlb(&init_mm, current, nr); - - HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0); - - load_LDT(&init_mm.context); - - /* Force FPU initialization. */ - current->flags &= ~PF_USEDFPU; - current->used_math = 0; - stts(); -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/signal.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/signal.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,717 +0,0 @@ -/* - * linux/arch/i386/kernel/signal.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson - * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes - */ - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/errno.h> -#include <linux/wait.h> -#include <linux/ptrace.h> -#include <linux/unistd.h> -#include <linux/stddef.h> -#include <linux/tty.h> -#include <linux/personality.h> -#include <asm/ucontext.h> -#include <asm/uaccess.h> -#include <asm/i387.h> - -#define DEBUG_SIG 0 - -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - -int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); - -int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) -{ - if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) - return -EFAULT; - if (from->si_code < 0) - return __copy_to_user(to, from, sizeof(siginfo_t)); - else { - int err; - - /* If you change siginfo_t structure, please be sure - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); - /* First 32bits of unions are always present. */ - err |= __put_user(from->si_pid, &to->si_pid); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: - break; - case __SI_CHLD >> 16: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - default: - err |= __put_user(from->si_uid, &to->si_uid); - break; - /* case __SI_RT: This is not generated by the kernel as of now. */ - } - return err; - } -} - -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int -sys_sigsuspend(int history0, int history1, old_sigset_t mask) -{ - struct pt_regs * regs = (struct pt_regs *) &history0; - sigset_t saveset; - - mask &= _BLOCKABLE; - spin_lock_irq(&current->sigmask_lock); - saveset = current->blocked; - siginitset(&current->blocked, mask); - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - - regs->eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage int -sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) -{ - struct pt_regs * regs = (struct pt_regs *) &unewset; - sigset_t saveset, newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&newset, unewset, sizeof(newset))) - return -EFAULT; - sigdelsetmask(&newset, ~_BLOCKABLE); - - spin_lock_irq(&current->sigmask_lock); - saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - - regs->eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage int -sys_sigaction(int sig, const struct old_sigaction *act, - struct old_sigaction *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (verify_area(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) - return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) - return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - } - - return ret; -} - -asmlinkage int -sys_sigaltstack(const stack_t *uss, stack_t *uoss) -{ - struct pt_regs *regs = (struct pt_regs *) &uss; - return do_sigaltstack(uss, uoss, regs->esp); -} - - -/* - * Do a signal return; undo the signal stack. - */ - -struct sigframe -{ - char *pretcode; - int sig; - struct sigcontext sc; - struct _fpstate fpstate; - unsigned long extramask[_NSIG_WORDS-1]; - char retcode[8]; -}; - -struct rt_sigframe -{ - char *pretcode; - int sig; - struct siginfo *pinfo; - void *puc; - struct siginfo info; - struct ucontext uc; - struct _fpstate fpstate; - char retcode[8]; -}; - -static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax) -{ - unsigned int err = 0; - -#define COPY(x) err |= __get_user(regs->x, &sc->x) - -#define COPY_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->x##seg = tmp; } - -#define COPY_SEG_STRICT(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->x##seg = tmp|3; } - -#define GET_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - loadsegment(seg,tmp); } - - GET_SEG(gs); - GET_SEG(fs); - COPY_SEG(es); - COPY_SEG(ds); - COPY(edi); - COPY(esi); - COPY(ebp); - COPY(esp); - COPY(ebx); - COPY(edx); - COPY(ecx); - COPY(eip); - COPY_SEG_STRICT(cs); - COPY_SEG_STRICT(ss); - - { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->eflags); - regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); - regs->orig_eax = -1; /* disable syscall checks */ - } - - { - struct _fpstate * buf; - err |= __get_user(buf, &sc->fpstate); - if (buf) { - if (verify_area(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } - } - - err |= __get_user(*peax, &sc->eax); - return err; - -badframe: - return 1; -} - -asmlinkage int sys_sigreturn(unsigned long __unused) -{ - struct pt_regs *regs = (struct pt_regs *) &__unused; - struct sigframe *frame = (struct sigframe *)(regs->esp - 8); - sigset_t set; - int eax; - - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__get_user(set.sig[0], &frame->sc.oldmask) - || (_NSIG_WORDS > 1 - && __copy_from_user(&set.sig[1], &frame->extramask, - sizeof(frame->extramask)))) - goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(&current->sigmask_lock); - current->blocked = set; - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - - if (restore_sigcontext(regs, &frame->sc, &eax)) - goto badframe; - return eax; - -badframe: - force_sig(SIGSEGV, current); - return 0; -} - -asmlinkage int sys_rt_sigreturn(unsigned long __unused) -{ - struct pt_regs *regs = (struct pt_regs *) &__unused; - struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); - sigset_t set; - stack_t st; - int eax; - - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) - goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(&current->sigmask_lock); - current->blocked = set; - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) - goto badframe; - - if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) - goto badframe; - /* It is more difficult to avoid calling this function than to - call it and ignore errors. */ - do_sigaltstack(&st, NULL, regs->esp); - - return eax; - -badframe: - force_sig(SIGSEGV, current); - return 0; -} - -/* - * Set up a signal frame. - */ - -static int -setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, - struct pt_regs *regs, unsigned long mask) -{ - int tmp, err = 0; - - tmp = 0; - __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->gs); - __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->fs); - - err |= __put_user(regs->xes, (unsigned int *)&sc->es); - err |= __put_user(regs->xds, (unsigned int *)&sc->ds); - err |= __put_user(regs->edi, &sc->edi); - err |= __put_user(regs->esi, &sc->esi); - err |= __put_user(regs->ebp, &sc->ebp); - err |= __put_user(regs->esp, &sc->esp); - err |= __put_user(regs->ebx, &sc->ebx); - err |= __put_user(regs->edx, &sc->edx); - err |= __put_user(regs->ecx, &sc->ecx); - err |= __put_user(regs->eax, &sc->eax); - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->eip, &sc->eip); - err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); - err |= __put_user(regs->eflags, &sc->eflags); - err |= __put_user(regs->esp, &sc->esp_at_signal); - err |= __put_user(regs->xss, (unsigned int *)&sc->ss); - - tmp = save_i387(fpstate); - if (tmp < 0) - err = 1; - else - err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); - - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); - - return err; -} - -/* - * Determine which stack to use.. - */ -static inline void * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) -{ - unsigned long esp; - - /* Default to using normal stack */ - esp = regs->esp; - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(esp) == 0) - esp = current->sas_ss_sp + current->sas_ss_size; - } - - /* This is the legacy signal stack switching. */ - else if ((regs->xss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - esp = (unsigned long) ka->sa.sa_restorer; - } - - return (void *)((esp - frame_size) & -8ul); -} - -static void setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) -{ - struct sigframe *frame; - int err = 0; - - frame = get_sigframe(ka, regs, sizeof(*frame)); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; - - err |= __put_user((current->exec_domain - && current->exec_domain->signal_invmap - && sig < 32 - ? current->exec_domain->signal_invmap[sig] - : sig), - &frame->sig); - if (err) - goto give_sigsegv; - - err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); - if (err) - goto give_sigsegv; - - if (_NSIG_WORDS > 1) { - err |= __copy_to_user(frame->extramask, &set->sig[1], - sizeof(frame->extramask)); - } - if (err) - goto give_sigsegv; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - err |= __put_user(frame->retcode, &frame->pretcode); - /* This is popl %eax ; movl $,%eax ; int $0x80 */ - err |= __put_user(0xb858, (short *)(frame->retcode+0)); - err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2)); - err |= __put_user(0x80cd, (short *)(frame->retcode+6)); - } - - if (err) - goto give_sigsegv; - - /* Set up registers for signal handler */ - regs->esp = (unsigned long) frame; - regs->eip = (unsigned long) ka->sa.sa_handler; - - set_fs(USER_DS); - regs->xds = __USER_DS; - regs->xes = __USER_DS; - regs->xss = __USER_DS; - regs->xcs = __USER_CS; - regs->eflags &= ~TF_MASK; - -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", - current->comm, current->pid, frame, regs->eip, frame->pretcode); -#endif - - return; - -give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); -} - -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) -{ - struct rt_sigframe *frame; - int err = 0; - - frame = get_sigframe(ka, regs, sizeof(*frame)); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; - - err |= __put_user((current->exec_domain - && current->exec_domain->signal_invmap - && sig < 32 - ? current->exec_domain->signal_invmap[sig] - : sig), - &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); - if (err) - goto give_sigsegv; - - /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->esp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - goto give_sigsegv; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - err |= __put_user(frame->retcode, &frame->pretcode); - /* This is movl $,%eax ; int $0x80 */ - err |= __put_user(0xb8, (char *)(frame->retcode+0)); - err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1)); - err |= __put_user(0x80cd, (short *)(frame->retcode+5)); - } - - if (err) - goto give_sigsegv; - - /* Set up registers for signal handler */ - regs->esp = (unsigned long) frame; - regs->eip = (unsigned long) ka->sa.sa_handler; - - set_fs(USER_DS); - regs->xds = __USER_DS; - regs->xes = __USER_DS; - regs->xss = __USER_DS; - regs->xcs = __USER_CS; - regs->eflags &= ~TF_MASK; - -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", - current->comm, current->pid, frame, regs->eip, frame->pretcode); -#endif - - return; - -give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); -} - -/* - * OK, we're invoking a handler - */ - -static void -handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) -{ - /* Are we from a system call? */ - if (regs->orig_eax >= 0) { - /* If so, check system call restarting.. */ - switch (regs->eax) { - case -ERESTARTNOHAND: - regs->eax = -EINTR; - break; - - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->eax = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - regs->eax = regs->orig_eax; - regs->eip -= 2; - } - } - - /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); - else - setup_frame(sig, ka, oldset, regs); - - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; - - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(&current->sigmask_lock); - sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); - sigaddset(&current->blocked,sig); - recalc_sigpending(current); - spin_unlock_irq(&current->sigmask_lock); - } -} - -/* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - */ -int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) -{ - siginfo_t info; - struct k_sigaction *ka; - - /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. - */ - if ((regs->xcs & 2) != 2) - return 1; - - if (!oldset) - oldset = &current->blocked; - - for (;;) { - unsigned long signr; - - spin_lock_irq(&current->sigmask_lock); - signr = dequeue_signal(&current->blocked, &info); - spin_unlock_irq(&current->sigmask_lock); - - if (!signr) - break; - - if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { - /* Let the debugger run. */ - current->exit_code = signr; - current->state = TASK_STOPPED; - notify_parent(current, SIGCHLD); - schedule(); - - /* We're back. Did the debugger cancel the sig? */ - if (!(signr = current->exit_code)) - continue; - current->exit_code = 0; - - /* The debugger continued. Ignore SIGSTOP. */ - if (signr == SIGSTOP) - continue; - - /* Update the siginfo structure. Is this good? */ - if (signr != info.si_signo) { - info.si_signo = signr; - info.si_errno = 0; - info.si_code = SI_USER; - info.si_pid = current->p_pptr->pid; - info.si_uid = current->p_pptr->uid; - } - - /* If the (new) signal is now blocked, requeue it. */ - if (sigismember(&current->blocked, signr)) { - send_sig_info(signr, &info, current); - continue; - } - } - - ka = &current->sig->action[signr-1]; - if (ka->sa.sa_handler == SIG_IGN) { - if (signr != SIGCHLD) - continue; - /* Check for SIGCHLD: it's special. */ - while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) - /* nothing */; - continue; - } - - if (ka->sa.sa_handler == SIG_DFL) { - int exit_code = signr; - - /* Init gets no signals it doesn't want. */ - if (current->pid == 1) - continue; - - switch (signr) { - case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: - continue; - - case SIGTSTP: case SIGTTIN: case SIGTTOU: - if (is_orphaned_pgrp(current->pgrp)) - continue; - /* FALLTHRU */ - - case SIGSTOP: { - struct signal_struct *sig; - current->state = TASK_STOPPED; - current->exit_code = signr; - sig = current->p_pptr->sig; - if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) - notify_parent(current, SIGCHLD); - schedule(); - continue; - } - - case SIGQUIT: case SIGILL: case SIGTRAP: - case SIGABRT: case SIGFPE: case SIGSEGV: - case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: - if (do_coredump(signr, regs)) - exit_code |= 0x80; - /* FALLTHRU */ - - default: - sig_exit(signr, exit_code, &info); - /* NOTREACHED */ - } - } - - /* Reenable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if ( current->thread.debugreg[7] != 0 ) - HYPERVISOR_set_debugreg(7, current->thread.debugreg[7]); - - /* Whee! Actually deliver the signal. */ - handle_signal(signr, ka, &info, oldset, regs); - return 1; - } - - /* Did we come from a system call? */ - if (regs->orig_eax >= 0) { - /* Restart the system call - no handlers present */ - if (regs->eax == -ERESTARTNOHAND || - regs->eax == -ERESTARTSYS || - regs->eax == -ERESTARTNOINTR) { - regs->eax = regs->orig_eax; - regs->eip -= 2; - } - } - return 0; -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/time.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/time.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,741 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- - **************************************************************************** - * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge - * (C) 2002-2003 - Keir Fraser - University of Cambridge - **************************************************************************** - * - * File: arch/xen/kernel/time.c - * Author: Rolf Neugebauer and Keir Fraser - * - * Description: Interface with Xen to get correct notion of time - */ - -/* - * linux/arch/i386/kernel/time.c - * - * Copyright (C) 1991, 1992, 1995 Linus Torvalds - * - * This file contains the PC-specific time handling details: - * reading the RTC at bootup, etc.. - * 1994-07-02 Alan Modra - * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime - * 1995-03-26 Markus Kuhn - * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 - * precision CMOS clock update - * 1996-05-03 Ingo Molnar - * fixed time warps in do_[slow|fast]_gettimeoffset() - * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 - * "A Kernel Model for Precision Timekeeping" by Dave Mills - * 1998-09-05 (Various) - * More robust do_fast_gettimeoffset() algorithm implemented - * (works with APM, Cyrix 6x86MX and Centaur C6), - * monotonic gettimeofday() with fast_get_timeoffset(), - * drift-proof precision TSC calibration on boot - * (C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, Andrew D. - * Balsa <andrebalsa@xxxxxxxxxx>, Philip Gladstone <philip@xxxxxxxxxx>; - * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@xxxxxxxxxxxxx>). - * 1998-12-16 Andrea Arcangeli - * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy - * because was not accounting lost_ticks. - * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli - * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to - * serialize accesses to xtime/lost_ticks). - */ - -#include <asm/smp.h> -#include <asm/irq.h> -#include <asm/msr.h> -#include <asm/delay.h> -#include <asm/mpspec.h> -#include <asm/uaccess.h> -#include <asm/processor.h> - -#include <asm/div64.h> -#include <asm/hypervisor.h> -#include <asm-xen/xen-public/dom0_ops.h> - -#include <linux/mc146818rtc.h> -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/time.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/irq.h> -#include <linux/sysctl.h> -#include <linux/sysrq.h> - -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; -extern rwlock_t xtime_lock; -extern unsigned long wall_jiffies; - -unsigned long cpu_khz; /* get this from Xen, used elsewhere */ - -static unsigned int rdtsc_bitshift; -static u32 st_scale_f; /* convert ticks -> usecs */ -static u32 st_scale_i; /* convert ticks -> usecs */ - -/* These are peridically updated in shared_info, and then copied here. */ -static u32 shadow_tsc_stamp; -static u64 shadow_system_time; -static u32 shadow_time_version; -static struct timeval shadow_tv; - -/* - * We use this to ensure that gettimeofday() is monotonically increasing. We - * only break this guarantee if the wall clock jumps backwards "a long way". - */ -static struct timeval last_seen_tv = {0,0}; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST -/* Periodically propagate synchronised time base to the RTC and to Xen. */ -static long last_update_to_rtc, last_update_to_xen; -#endif - -/* Periodically take synchronised time base from Xen, if we need it. */ -static long last_update_from_xen; /* UTC seconds when last read Xen clock. */ - -/* Keep track of last time we did processing/updating of jiffies and xtime. */ -static u64 processed_system_time; /* System time (ns) at last processing. */ - -#define NS_PER_TICK (1000000000ULL/HZ) - -#ifndef NSEC_PER_SEC -#define NSEC_PER_SEC (1000000000L) -#endif - -#define HANDLE_USEC_UNDERFLOW(_tv) \ - do { \ - while ( (_tv).tv_usec < 0 ) \ - { \ - (_tv).tv_usec += 1000000; \ - (_tv).tv_sec--; \ - } \ - } while ( 0 ) -#define HANDLE_USEC_OVERFLOW(_tv) \ - do { \ - while ( (_tv).tv_usec >= 1000000 ) \ - { \ - (_tv).tv_usec -= 1000000; \ - (_tv).tv_sec++; \ - } \ - } while ( 0 ) -static inline void __normalize_time(time_t *sec, s64 *nsec) -{ - while (*nsec >= NSEC_PER_SEC) { - (*nsec) -= NSEC_PER_SEC; - (*sec)++; - } - while (*nsec < 0) { - (*nsec) += NSEC_PER_SEC; - (*sec)--; - } -} - -/* Dynamically-mapped IRQs. */ -static int time_irq, debug_irq; - -/* Does this guest OS track Xen time, or set its wall clock independently? */ -static int independent_wallclock = 0; -static int __init __independent_wallclock(char *str) -{ - independent_wallclock = 1; - return 1; -} -__setup("independent_wallclock", __independent_wallclock); -#define INDEPENDENT_WALLCLOCK() \ - (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN)) - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST -/* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be - * called 500 ms after the second nowtime has started, because when - * nowtime is written into the registers of the CMOS clock, it will - * jump to the next second precisely 500 ms later. Check the Motorola - * MC146818A or Dallas DS12887 data sheet for details. - * - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you'll only notice that after reboot! - */ -static int set_rtc_mmss(unsigned long nowtime) -{ - int retval = 0; - int real_seconds, real_minutes, cmos_minutes; - unsigned char save_control, save_freq_select; - - /* gets recalled with irq locally disabled */ - spin_lock(&rtc_lock); - save_control = CMOS_READ(RTC_CONTROL); - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - cmos_minutes = CMOS_READ(RTC_MINUTES); - if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) - BCD_TO_BIN(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, don't interfere with - * hour overflow. This avoids messing with unknown time zones but requires - * your RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 ) - real_minutes += 30; /* correct for half hour time zone */ - real_minutes %= 60; - - if ( abs(real_minutes - cmos_minutes) < 30 ) - { - if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) - { - BIN_TO_BCD(real_seconds); - BIN_TO_BCD(real_minutes); - } - CMOS_WRITE(real_seconds,RTC_SECONDS); - CMOS_WRITE(real_minutes,RTC_MINUTES); - } - else - { - printk(KERN_WARNING - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - spin_unlock(&rtc_lock); - - return retval; -} -#endif - - -/* - * Reads a consistent set of time-base values from Xen, into a shadow data - * area. Must be called with the xtime_lock held for writing. - */ -static void __get_time_values_from_xen(void) -{ - shared_info_t *s = HYPERVISOR_shared_info; - struct vcpu_time_info *src; - struct shadow_time_info *dst; - - src = &s->vcpu_time[smp_processor_id()]; - dst = &per_cpu(shadow_time, smp_processor_id()); - - do { - dst->version = src->time_version2; - rmb(); - dst->tsc_timestamp = src->tsc_timestamp; - dst->system_timestamp = src->system_time; - dst->tsc_to_nsec_mul = src->tsc_to_system_mul; - dst->tsc_shift = src->tsc_shift; - rmb(); - } - while (dst->version != src->time_version1); - - dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000; - - if ((shadow_tv.tv_sec != s->wc_sec) || - (shadow_tv.tv_usec != s->wc_usec)) - update_wallclock(); -} - - -/* - * Returns the system time elapsed, in ns, since the current shadow_timestamp - * was calculated. Must be called with the xtime_lock held for reading. - */ -static inline unsigned long __get_time_delta_usecs(void) -{ - s32 delta_tsc; - u32 low; - u64 delta, tsc; - - rdtscll(tsc); - low = (u32)(tsc >> rdtsc_bitshift); - delta_tsc = (s32)(low - shadow_tsc_stamp); - if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; - delta = ((u64)delta_tsc * st_scale_f); - delta >>= 32; - delta += ((u64)delta_tsc * st_scale_i); - - return (unsigned long)delta; -} - -static inline int time_values_up_to_date() -{ - struct vcpu_time_info *src; - struct shadow_time_info *dst; - - src = &HYPERVISOR_shared_info->vcpu_time[smp_processor_id()]; - dst = &per_cpu(shadow_time, smp_processor_id()); - - return (dst->version == src->time_version2); -} - - -/* - * Returns the current time-of-day in UTC timeval format. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long flags, lost; - struct timeval _tv; - s64 nsec; - - again: - read_lock_irqsave(&xtime_lock, flags); - - _tv.tv_usec = __get_time_delta_usecs(); - if ( (lost = (jiffies - wall_jiffies)) != 0 ) - _tv.tv_usec += lost * (1000000 / HZ); - _tv.tv_sec = xtime.tv_sec; - _tv.tv_usec += xtime.tv_usec; - - nsec = shadow_system_time - processed_system_time; - __normalize_time(&_tv.tv_sec, &nsec); - _tv.tv_usec += (long)nsec / 1000L; - - if ( unlikely(!time_values_up_to_date()) ) - { - /* - * We may have blocked for a long time, rendering our calculations - * invalid (e.g. the time delta may have overflowed). Detect that - * and recalculate with fresh values. - */ - read_unlock_irqrestore(&xtime_lock, flags); - write_lock_irqsave(&xtime_lock, flags); - __get_time_values_from_xen(); - write_unlock_irqrestore(&xtime_lock, flags); - goto again; - } - - HANDLE_USEC_OVERFLOW(_tv); - - /* Ensure that time-of-day is monotonically increasing. */ - if ( (_tv.tv_sec < last_seen_tv.tv_sec) || - ((_tv.tv_sec == last_seen_tv.tv_sec) && - (_tv.tv_usec < last_seen_tv.tv_usec)) ) - _tv = last_seen_tv; - last_seen_tv = _tv; - - read_unlock_irqrestore(&xtime_lock, flags); - - *tv = _tv; -} - - -/* - * Sets the current time-of-day based on passed-in UTC timeval parameter. - */ -void do_settimeofday(struct timeval *tv) -{ - struct timeval newtv; - s64 nsec; - suseconds_t usec; - - if ( !INDEPENDENT_WALLCLOCK() ) - return; - - write_lock_irq(&xtime_lock); - - /* - * Ensure we don't get blocked for a long time so that our time delta - * overflows. If that were to happen then our shadow time values would - * be stale, so we can retry with fresh ones. - */ - again: - usec = tv->tv_usec - __get_time_delta_usecs(); - - nsec = shadow_system_time - processed_system_time; - __normalize_time(&tv->tv_sec, &nsec); - usec -= (long)nsec / 1000L; - - if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) - { - __get_time_values_from_xen(); - goto again; - } - tv->tv_usec = usec; - - HANDLE_USEC_UNDERFLOW(*tv); - - newtv = *tv; - - tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); - HANDLE_USEC_UNDERFLOW(*tv); - - xtime = *tv; - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; - - /* Reset all our running time counts. They make no sense now. */ - last_seen_tv.tv_sec = 0; - last_update_from_xen = 0; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if ( xen_start_info.flags & SIF_INITDOMAIN ) - { - dom0_op_t op; - last_update_to_rtc = last_update_to_xen = 0; - op.cmd = DOM0_SETTIME; - op.u.settime.secs = newtv.tv_sec; - op.u.settime.usecs = newtv.tv_usec; - op.u.settime.system_time = shadow_system_time; - write_unlock_irq(&xtime_lock); - HYPERVISOR_dom0_op(&op); - } - else -#endif - { - write_unlock_irq(&xtime_lock); - } -} - - -asmlinkage long sys_stime(int *tptr) -{ - int value; - struct timeval tv; - - if ( !capable(CAP_SYS_TIME) ) - return -EPERM; - - if ( get_user(value, tptr) ) - return -EFAULT; - - tv.tv_sec = value; - tv.tv_usec = 0; - - do_settimeofday(&tv); - - return 0; -} - - -/* Convert jiffies to system time. Call with xtime_lock held for reading. */ -static inline u64 __jiffies_to_st(unsigned long j) -{ - return processed_system_time + ((j - jiffies) * NS_PER_TICK); -} - - -static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - s64 delta; - unsigned long ticks = 0; - long sec_diff; - - do { - __get_time_values_from_xen(); - - delta = (s64)(shadow_system_time + - ((s64)__get_time_delta_usecs() * 1000LL) - - processed_system_time); - } - while ( !TIME_VALUES_UP_TO_DATE ); - - if ( unlikely(delta < 0) ) - { - printk("Timer ISR: Time went backwards: %lld\n", delta); - return; - } - - /* Process elapsed jiffies since last call. */ - while ( delta >= NS_PER_TICK ) - { - ticks++; - delta -= NS_PER_TICK; - processed_system_time += NS_PER_TICK; - } - - if ( ticks != 0 ) - { - do_timer_ticks(ticks); - - if ( user_mode(regs) ) - update_process_times_us(ticks, 0); - else - update_process_times_us(0, ticks); - } - - /* - * Take synchronised time from Xen once a minute if we're not - * synchronised ourselves, and we haven't chosen to keep an independent - * time base. - */ - if ( !INDEPENDENT_WALLCLOCK() && - ((time_status & STA_UNSYNC) != 0) && - (xtime.tv_sec > (last_update_from_xen + 60)) ) - { - /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */ - shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ); - HANDLE_USEC_UNDERFLOW(shadow_tv); - - /* - * Reset our running time counts if they are invalidated by a warp - * backwards of more than 500ms. - */ - sec_diff = xtime.tv_sec - shadow_tv.tv_sec; - if ( unlikely(abs(sec_diff) > 1) || - unlikely(((sec_diff * 1000000) + - xtime.tv_usec - shadow_tv.tv_usec) > 500000) ) - { -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - last_update_to_rtc = last_update_to_xen = 0; -#endif - last_seen_tv.tv_sec = 0; - } - - /* Update our unsynchronised xtime appropriately. */ - xtime = shadow_tv; - - last_update_from_xen = xtime.tv_sec; - } - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if ( (xen_start_info.flags & SIF_INITDOMAIN) && - ((time_status & STA_UNSYNC) == 0) ) - { - /* Send synchronised time to Xen approximately every minute. */ - if ( xtime.tv_sec > (last_update_to_xen + 60) ) - { - dom0_op_t op; - struct timeval tv = xtime; - - tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ); - HANDLE_USEC_OVERFLOW(tv); - - op.cmd = DOM0_SETTIME; - op.u.settime.secs = tv.tv_sec; - op.u.settime.usecs = tv.tv_usec; - op.u.settime.system_time = shadow_system_time; - HYPERVISOR_dom0_op(&op); - - last_update_to_xen = xtime.tv_sec; - } - - /* - * If we have an externally synchronized Linux clock, then update CMOS - * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called - * as close as possible to 500 ms before the new second starts. - */ - if ( (xtime.tv_sec > (last_update_to_rtc + 660)) && - (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) && - (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) ) - { - if ( set_rtc_mmss(xtime.tv_sec) == 0 ) - last_update_to_rtc = xtime.tv_sec; - else - last_update_to_rtc = xtime.tv_sec - 600; - } - } -#endif -} - - -static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - write_lock(&xtime_lock); - do_timer_interrupt(irq, NULL, regs); - write_unlock(&xtime_lock); -} - -static struct irqaction irq_timer = { - timer_interrupt, - SA_INTERRUPT, - 0, - "timer", - NULL, - NULL -}; - - -/* - * This function works out when the the next timer function has to be - * executed (by looking at the timer list) and sets the Xen one-shot - * domain timer to the appropriate value. This is typically called in - * cpu_idle() before the domain blocks. - * - * The function returns a non-0 value on error conditions. - * - * It must be called with interrupts disabled. - */ -extern spinlock_t timerlist_lock; -int set_timeout_timer(void) -{ - struct timer_list *timer; - u64 alarm = 0; - int ret = 0; - - spin_lock(&timerlist_lock); - - /* - * This is safe against long blocking (since calculations are not based on - * TSC deltas). It is also safe against warped system time since - * suspend-resume is cooperative and we would first get locked out. It is - * safe against normal updates of jiffies since interrupts are off. - */ - if ( (timer = next_timer_event()) != NULL ) - alarm = __jiffies_to_st(timer->expires); - - /* Tasks on the timer task queue expect to be executed on the next tick. */ - if ( TQ_ACTIVE(tq_timer) ) - alarm = __jiffies_to_st(jiffies + 1); - - /* Failure is pretty bad, but we'd best soldier on. */ - if ( HYPERVISOR_set_timer_op(alarm) != 0 ) - ret = -1; - - spin_unlock(&timerlist_lock); - - return ret; -} - - -/* Time debugging. */ -static void dbg_time_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - unsigned long flags, j; - u64 s_now, j_st; - struct timeval s_tv, tv; - - struct timer_list *timer; - u64 t_st; - - read_lock_irqsave(&xtime_lock, flags); - s_tv.tv_sec = shadow_tv.tv_sec; - s_tv.tv_usec = shadow_tv.tv_usec; - s_now = shadow_system_time; - read_unlock_irqrestore(&xtime_lock, flags); - - do_gettimeofday(&tv); - - j = jiffies; - j_st = __jiffies_to_st(j); - - timer = next_timer_event(); - t_st = __jiffies_to_st(timer->expires); - - printk(KERN_ALERT "time: shadow_st=0x%X:%08X\n", - (u32)(s_now>>32), (u32)s_now); - printk(KERN_ALERT "time: wct=%lds %ldus shadow_wct=%lds %ldus\n", - tv.tv_sec, tv.tv_usec, s_tv.tv_sec, s_tv.tv_usec); - printk(KERN_ALERT "time: jiffies=%lu(0x%X:%08X) timeout=%lu(0x%X:%08X)\n", - jiffies,(u32)(j_st>>32), (u32)j_st, - timer->expires,(u32)(t_st>>32), (u32)t_st); - printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n", - (u32)(processed_system_time>>32), (u32)processed_system_time); - -#ifdef CONFIG_MAGIC_SYSRQ - handle_sysrq('t',NULL,NULL,NULL); -#endif -} - -static struct irqaction dbg_time = { - dbg_time_int, - SA_SHIRQ, - 0, - "timer_dbg", - &dbg_time_int, - NULL -}; - -void __init time_init(void) -{ - unsigned long long alarm; - u64 __cpu_khz, __cpu_ghz, cpu_freq, scale, scale2; - unsigned int cpu_ghz; - - __cpu_khz = __cpu_ghz = cpu_freq = HYPERVISOR_shared_info->cpu_freq; - do_div(__cpu_khz, 1000UL); - cpu_khz = (u32)__cpu_khz; - do_div(__cpu_ghz, 1000000000UL); - cpu_ghz = (unsigned int)__cpu_ghz; - - printk("Xen reported: %lu.%03lu MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - - xtime.tv_sec = HYPERVISOR_shared_info->wc_sec; - xtime.tv_usec = HYPERVISOR_shared_info->wc_usec; - processed_system_time = shadow_system_time; - - for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 ) - continue; - - scale = 1000000LL << (32 + rdtsc_bitshift); - do_div(scale, (u32)cpu_freq); - - if ( (cpu_freq >> 32) != 0 ) - { - scale2 = 1000000LL << rdtsc_bitshift; - do_div(scale2, (u32)(cpu_freq>>32)); - scale += scale2; - } - - st_scale_f = scale & 0xffffffff; - st_scale_i = scale >> 32; - - __get_time_values_from_xen(); - processed_system_time = shadow_system_time; - - time_irq = bind_virq_to_irq(VIRQ_TIMER); - debug_irq = bind_virq_to_irq(VIRQ_DEBUG); - - (void)setup_irq(time_irq, &irq_timer); - (void)setup_irq(debug_irq, &dbg_time); - - rdtscll(alarm); -} - -void time_suspend(void) -{ -} - -void time_resume(void) -{ - unsigned long flags; - write_lock_irqsave(&xtime_lock, flags); - /* Get timebases for new environment. */ - __get_time_values_from_xen(); - /* Reset our own concept of passage of system time. */ - processed_system_time = shadow_system_time; - /* Accept a warp in UTC (wall-clock) time. */ - last_seen_tv.tv_sec = 0; - /* Make sure we resync UTC time with Xen on next timer interrupt. */ - last_update_from_xen = 0; - write_unlock_irqrestore(&xtime_lock, flags); -} - -/* - * /proc/sys/xen: This really belongs in another file. It can stay here for - * now however. - */ -static ctl_table xen_subtable[] = { - {1, "independent_wallclock", &independent_wallclock, - sizeof(independent_wallclock), 0644, NULL, proc_dointvec}, - {0} -}; -static ctl_table xen_table[] = { - {123, "xen", NULL, 0, 0555, xen_subtable}, - {0} -}; -static int __init xen_sysctl_init(void) -{ - (void)register_sysctl_table(xen_table, 0); - return 0; -} -__initcall(xen_sysctl_init); diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/kernel/traps.c --- a/linux-2.4-xen-sparse/arch/xen/kernel/traps.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,619 +0,0 @@ -/* - * linux/arch/i386/traps.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 - */ - -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ -#include <linux/config.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/ptrace.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <linux/spinlock.h> -#include <linux/interrupt.h> -#include <linux/highmem.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/atomic.h> -#include <asm/debugreg.h> -#include <asm/desc.h> -#include <asm/i387.h> - -#include <asm/smp.h> -#include <asm/pgalloc.h> - -#include <asm/hypervisor.h> - -#include <linux/irq.h> -#include <linux/module.h> - -asmlinkage int system_call(void); -asmlinkage void lcall7(void); -asmlinkage void lcall27(void); - -asmlinkage void divide_error(void); -asmlinkage void debug(void); -asmlinkage void int3(void); -asmlinkage void overflow(void); -asmlinkage void bounds(void); -asmlinkage void invalid_op(void); -asmlinkage void device_not_available(void); -asmlinkage void double_fault(void); -asmlinkage void coprocessor_segment_overrun(void); -asmlinkage void invalid_TSS(void); -asmlinkage void segment_not_present(void); -asmlinkage void stack_segment(void); -asmlinkage void general_protection(void); -asmlinkage void page_fault(void); -asmlinkage void coprocessor_error(void); -asmlinkage void simd_coprocessor_error(void); -asmlinkage void alignment_check(void); -asmlinkage void fixup_4gb_segment(void); -asmlinkage void machine_check(void); - -int kstack_depth_to_print = 24; - - -/* - * If the address is either in the .text section of the - * kernel, or in the vmalloc'ed module regions, it *may* - * be the address of a calling routine - */ - -#ifdef CONFIG_MODULES - -extern struct module *module_list; -extern struct module kernel_module; - -static inline int kernel_text_address(unsigned long addr) -{ - int retval = 0; - struct module *mod; - - if (addr >= (unsigned long) &_stext && - addr <= (unsigned long) &_etext) - return 1; - - for (mod = module_list; mod != &kernel_module; mod = mod->next) { - /* mod_bound tests for addr being inside the vmalloc'ed - * module area. Of course it'd be better to test only - * for the .text subset... */ - if (mod_bound(addr, 0, mod)) { - retval = 1; - break; - } - } - - return retval; -} - -#else - -static inline int kernel_text_address(unsigned long addr) -{ - return (addr >= (unsigned long) &_stext && - addr <= (unsigned long) &_etext); -} - -#endif - -void show_trace(unsigned long * stack) -{ - int i; - unsigned long addr; - - if (!stack) - stack = (unsigned long*)&stack; - - printk("Call Trace: "); - i = 1; - while (((long) stack & (THREAD_SIZE-1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - if (i && ((i % 6) == 0)) - printk("\n "); - printk("[<%08lx>] ", addr); - i++; - } - } - printk("\n"); -} - -void show_trace_task(struct task_struct *tsk) -{ - unsigned long esp = tsk->thread.esp; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) - return; - show_trace((unsigned long *)esp); -} - -void show_stack(unsigned long * esp) -{ - unsigned long *stack; - int i; - - // debugging aid: "show_stack(NULL);" prints the - // back trace for this cpu. - - if(esp==NULL) - esp=(unsigned long*)&esp; - - stack = esp; - for(i=0; i < kstack_depth_to_print; i++) { - if (((long) stack & (THREAD_SIZE-1)) == 0) - break; - if (i && ((i % 8) == 0)) - printk("\n "); - printk("%08lx ", *stack++); - } - printk("\n"); - show_trace(esp); -} - -void show_registers(struct pt_regs *regs) -{ - int in_kernel = 1; - unsigned long esp; - unsigned short ss; - - esp = (unsigned long) (&regs->esp); - ss = __KERNEL_DS; - if (regs->xcs & 2) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } - printk(KERN_ALERT "CPU: %d\n", smp_processor_id() ); - printk(KERN_ALERT "EIP: %04x:[<%08lx>] %s\n", - 0xffff & regs->xcs, regs->eip, print_tainted()); - printk(KERN_ALERT "EFLAGS: %08lx\n",regs->eflags); - printk(KERN_ALERT "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk(KERN_ALERT "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_ALERT "ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk(KERN_ALERT "Process %s (pid: %d, stackpage=%08lx)", - current->comm, current->pid, 4096+(unsigned long)current); - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (in_kernel) { - - printk(KERN_ALERT "\nStack: "); - show_stack((unsigned long*)esp); - -#if 0 - { - int i; - printk(KERN_ALERT "\nCode: "); - if(regs->eip < PAGE_OFFSET) - goto bad; - - for(i=0;i<20;i++) - { - unsigned char c; - if(__get_user(c, &((unsigned char*)regs->eip)[i])) { -bad: - printk(KERN_ALERT " Bad EIP value."); - break; - } - printk("%02x ", c); - } - } -#endif - } - printk(KERN_ALERT "\n"); -} - -spinlock_t die_lock = SPIN_LOCK_UNLOCKED; - -void die(const char * str, struct pt_regs * regs, long err) -{ - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("%s: %04lx\n", str, err & 0xffff); - show_registers(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - do_exit(SIGSEGV); -} - -static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) -{ - if (!(2 & regs->xcs)) - die(str, regs, err); -} - - -static void inline do_trap(int trapnr, int signr, char *str, - struct pt_regs * regs, long error_code, - siginfo_t *info) -{ - if (!(regs->xcs & 2)) - goto kernel_trap; - - /*trap_signal:*/ { - struct task_struct *tsk = current; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); - return; - } - - kernel_trap: { - unsigned long fixup = search_exception_table(regs->eip); - if (fixup) - regs->eip = fixup; - else - die(str, regs, error_code); - return; - } -} - -#define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ -} - -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void *)siaddr; \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ -} - -DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) -DO_ERROR( 3, SIGTRAP, "int3", int3) -DO_ERROR( 4, SIGSEGV, "overflow", overflow) -DO_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) -DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) -DO_ERROR( 8, SIGSEGV, "double fault", double_fault) -DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR(18, SIGBUS, "machine check", machine_check) - -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) -{ - /* - * If we trapped on an LDT access then ensure that the default_ldt is - * loaded, if nothing else. We load default_ldt lazily because LDT - * switching costs time and many applications don't need it. - */ - if ( unlikely((error_code & 6) == 4) ) - { - unsigned long ldt; - __asm__ __volatile__ ( "sldt %0" : "=r" (ldt) ); - if ( ldt == 0 ) - { - xen_set_ldt((unsigned long)&default_ldt[0], 5); - return; - } - } - - if (!(regs->xcs & 2)) - goto gp_in_kernel; - - current->thread.error_code = error_code; - current->thread.trap_no = 13; - force_sig(SIGSEGV, current); - return; - -gp_in_kernel: - { - unsigned long fixup; - fixup = search_exception_table(regs->eip); - if (fixup) { - regs->eip = fixup; - return; - } - die("general protection fault", regs, error_code); - } -} - - -asmlinkage void do_debug(struct pt_regs * regs, long error_code) -{ - unsigned int condition; - struct task_struct *tsk = current; - siginfo_t info; - - condition = HYPERVISOR_get_debugreg(6); - - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg[7]) - goto clear_dr7; - } - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg[6] = condition; - - /* Mask out spurious TF errors due to lazy TF clearing */ - if (condition & DR_STEP) { - /* - * The TF error should be masked out only if the current - * process is not traced and if the TRAP flag has been set - * previously by a tracing process (condition detected by - * the PT_DTRACE flag); remember that the i386 TRAP flag - * can be modified by the process itself in user mode, - * allowing programs to debug themselves without the ptrace() - * interface. - */ - if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) - goto clear_TF; - } - - /* Ok, finally something we can handle */ - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - - /* If this is a kernel mode trap, save the user PC on entry to - * the kernel, that's what the debugger can make sense of. - */ - info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : - (void *)regs->eip; - force_sig_info(SIGTRAP, &info, tsk); - - /* Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ - clear_dr7: - HYPERVISOR_set_debugreg(7, 0); - return; - - clear_TF: - regs->eflags &= ~TF_MASK; - return; -} - - -/* - * Note that we play around with the 'TS' bit in an attempt to get - * the correct behaviour even in the presence of the asynchronous - * IRQ13 behaviour - */ -void math_error(void *eip) -{ - struct task_struct * task; - siginfo_t info; - unsigned short cwd, swd; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 16; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = eip; - /* - * (~cwd & swd) will mask out exceptions that are not set to unmasked - * status. 0x3f is the exception bits in these regs, 0x200 is the - * C1 reg you need in case of a stack fault, 0x040 is the stack - * fault bit. We should only be taking one exception at a time, - * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't syncronizing its FPU usage - * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception - */ - cwd = get_fpu_cwd(task); - swd = get_fpu_swd(task); - switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - case 0x041: /* Stack Fault */ - case 0x241: /* Stack Fault | Direction */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) -{ - ignore_irq13 = 1; - math_error((void *)regs->eip); -} - -void simd_math_error(void *eip) -{ - struct task_struct * task; - siginfo_t info; - unsigned short mxcsr; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 19; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = eip; - /* - * The SIMD FPU exceptions are handled a little differently, as there - * is only a single status/control register. Thus, to determine which - * unmasked exception was caught we must mask the exception mask bits - * at 0x1f80, and then use these to mask the exception bits at 0x3f. - */ - mxcsr = get_fpu_mxcsr(task); - switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, - long error_code) -{ - if (cpu_has_xmm) { - /* Handle SIMD FPU exceptions on PIII+ processors. */ - ignore_irq13 = 1; - simd_math_error((void *)regs->eip); - } else { - die_if_kernel("cache flush denied", regs, error_code); - current->thread.trap_no = 19; - current->thread.error_code = error_code; - force_sig(SIGSEGV, current); - } -} - -/* - * 'math_state_restore()' saves the current math information in the - * old math state array, and gets the new ones from the current task - * - * Careful.. There are problems with IBM-designed IRQ13 behaviour. - * Don't touch unless you *really* know how it works. - */ -asmlinkage void math_state_restore(struct pt_regs regs) -{ - /* - * A trap in kernel mode can be ignored. It'll be the fast XOR or - * copying libraries, which will correctly save/restore state and - * reset the TS bit in CR0. - */ - if ( (regs.xcs & 2) == 0 ) - return; - - if (current->used_math) { - restore_fpu(current); - } else { - init_fpu(); - } - current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */ -} - - -#define _set_gate(gate_addr,type,dpl,addr) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ - "movw %4,%%dx\n\t" \ - "movl %%eax,%0\n\t" \ - "movl %%edx,%1" \ - :"=m" (*((long *) (gate_addr))), \ - "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ - :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ - "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \ -} while (0) - -static void __init set_call_gate(void *a, void *addr) -{ - _set_gate(a,12,3,addr); -} - - -/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */ -static trap_info_t trap_table[] = { - { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, - { 1, 0, __KERNEL_CS, (unsigned long)debug }, - { 3, 3, __KERNEL_CS, (unsigned long)int3 }, - { 4, 3, __KERNEL_CS, (unsigned long)overflow }, - { 5, 3, __KERNEL_CS, (unsigned long)bounds }, - { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, - { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, - { 8, 0, __KERNEL_CS, (unsigned long)double_fault }, - { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, - { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, - { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, - { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, - { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, - { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, - { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment }, - { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, - { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, - { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, - { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, - { SYSCALL_VECTOR, - 3, __KERNEL_CS, (unsigned long)system_call }, - { 0, 0, 0, 0 } -}; - - -void __init trap_init(void) -{ - HYPERVISOR_set_trap_table(trap_table); - - /* - * The default LDT is a single-entry callgate to lcall7 for iBCS and a - * callgate to lcall27 for Solaris/x86 binaries. - */ - clear_page(&default_ldt[0]); - set_call_gate(&default_ldt[0],lcall7); - set_call_gate(&default_ldt[4],lcall27); - __make_page_readonly(&default_ldt[0]); - - cpu_init(); -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/lib/Makefile --- a/linux-2.4-xen-sparse/arch/xen/lib/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,15 +0,0 @@ - -.S.o: - $(CC) $(AFLAGS) -c $< -o $*.o - -L_TARGET = lib.a - -obj-y = checksum.o old-checksum.o delay.o \ - usercopy.o getuser.o \ - memcpy.o strstr.o xen_proc.o - -obj-$(CONFIG_X86_USE_3DNOW) += mmx.o -obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o -obj-$(CONFIG_DEBUG_IOVIRT) += iodebug.o - -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/lib/delay.c --- a/linux-2.4-xen-sparse/arch/xen/lib/delay.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,52 +0,0 @@ -/* - * Precise Delay Loops for i386 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. The additional - * jump magic is needed to get the timing stable on all the CPU's - * we have to worry about. - */ - -#include <linux/config.h> -#include <linux/sched.h> -#include <linux/delay.h> -#include <asm/processor.h> -#include <asm/delay.h> - -#ifdef CONFIG_SMP -#include <asm/smp.h> -#endif - -void __delay(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do - { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - -inline void __const_udelay(unsigned long xloops) -{ - int d0; - __asm__("mull %0" - :"=d" (xloops), "=&a" (d0) - :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy)); - __delay(xloops * HZ); -} - -void __udelay(unsigned long usecs) -{ - __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ -} - -void __ndelay(unsigned long nsecs) -{ - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/mm/Makefile --- a/linux-2.4-xen-sparse/arch/xen/mm/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,16 +0,0 @@ -# -# Makefile for the linux i386-specific parts of the memory manager. -# -# Note! Dependencies are done automagically by 'make dep', which also -# removes any old dependencies. DON'T put your own dependencies here -# unless it's something special (ie not a .c file). -# -# Note 2! The CFLAGS definition is now in the main makefile... - -O_TARGET := mm.o - -obj-y := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o - -export-objs := pageattr.o - -include $(TOPDIR)/Rules.make diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/mm/fault.c --- a/linux-2.4-xen-sparse/arch/xen/mm/fault.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,302 +0,0 @@ -/* - * linux/arch/i386/mm/fault.c - * - * Copyright (C) 1995 Linus Torvalds - */ - -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/tty.h> -#include <linux/vt_kern.h> /* For unblank_screen() */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#include <asm/hardirq.h> - -extern void die(const char *,struct pt_regs *,long); - -pgd_t *cur_pgd; - -extern spinlock_t timerlist_lock; - -/* - * Unlock any spinlocks which will prevent us from getting the - * message out (timerlist_lock is acquired through the - * console unblank code) - */ -void bust_spinlocks(int yes) -{ - spin_lock_init(&timerlist_lock); - if (yes) { - oops_in_progress = 1; - } else { - int loglevel_save = console_loglevel; -#ifdef CONFIG_VT - unblank_screen(); -#endif - oops_in_progress = 0; - /* - * OK, the message is on the console. Now we call printk() - * without oops_in_progress set so that printk will give klogd - * a poke. Hold onto your hats... - */ - console_loglevel = 15; /* NMI oopser may have shut the console up */ - printk(" "); - console_loglevel = loglevel_save; - } -} - -/* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. - * - * error_code: - * bit 0 == 0 means no page found, 1 means protection fault - * bit 1 == 0 means read, 1 means write - * bit 2 == 0 means kernel, 1 means user-mode - */ -asmlinkage void do_page_fault(struct pt_regs *regs, - unsigned long error_code, - unsigned long address) -{ - struct task_struct *tsk = current; - struct mm_struct *mm; - struct vm_area_struct * vma; - unsigned long page; - unsigned long fixup; - int write; - siginfo_t info; - - /* Set the "privileged fault" bit to something sane. */ - error_code &= 3; - error_code |= (regs->xcs & 2) << 1; - - /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - * - * This verifies that the fault happens in kernel space - * (error_code & 4) == 0, and that the fault was not a - * protection error (error_code & 1) == 0. - */ - if (address >= TASK_SIZE && !(error_code & 5)) - goto vmalloc_fault; - - mm = tsk->mm; - info.si_code = SEGV_MAPERR; - - /* - * If we're in an interrupt or have no user - * context, we must not take the fault.. - */ - if (in_interrupt() || !mm) - goto no_context; - - down_read(&mm->mmap_sem); - - vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (error_code & 4) { - /* - * accessing the stack below %esp is always a bug. - * The "+ 32" is there due to some instructions (like - * pusha) doing post-decrement on the stack and that - * doesn't show up until later.. - */ - if (address + 32 < regs->esp) - goto bad_area; - } - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - info.si_code = SEGV_ACCERR; - write = 0; - switch (error_code & 3) { - default: /* 3: write, present */ - /* fall through */ - case 2: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - write++; - break; - case 1: /* read, present */ - goto bad_area; - case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; - } - - survive: - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ - switch (handle_mm_fault(mm, vma, address, write)) { - case 1: - tsk->min_flt++; - break; - case 2: - tsk->maj_flt++; - break; - case 0: - goto do_sigbus; - default: - goto out_of_memory; - } - - up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - - /* User mode accesses just cause a SIGSEGV */ - if (error_code & 4) { - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *)address; - force_sig_info(SIGSEGV, &info, tsk); - return; - } - -no_context: - /* Are we prepared to handle this kernel fault? */ - if ((fixup = search_exception_table(regs->eip)) != 0) { - regs->eip = fixup; - return; - } - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - - bust_spinlocks(1); - - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); - else - printk(KERN_ALERT "Unable to handle kernel paging request"); - printk(" at virtual address %08lx\n",address); - printk(" printing eip:\n"); - printk("%08lx\n", regs->eip); - page = ((unsigned long *) cur_pgd)[address >> 22]; - printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page)); - if (page & 1) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = machine_to_phys(page); - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; - printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page, - machine_to_phys(page)); - } - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - if (tsk->pid == 1) { - yield(); - goto survive; - } - up_read(&mm->mmap_sem); - printk("VM: killing process %s\n", tsk->comm); - if (error_code & 4) - do_exit(SIGKILL); - goto no_context; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* - * Send a sigbus, regardless of whether we were in kernel - * or user mode. - */ - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void *)address; - force_sig_info(SIGBUS, &info, tsk); - - /* Kernel mode? Handle exceptions or die */ - if (!(error_code & 4)) - goto no_context; - return; - -vmalloc_fault: - { - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "tsk" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - int offset = __pgd_offset(address); - pgd_t *pgd, *pgd_k; - pmd_t *pmd, *pmd_k; - pte_t *pte_k; - - pgd = offset + cur_pgd; - pgd_k = init_mm.pgd + offset; - - if (!pgd_present(*pgd_k)) - goto no_context; - set_pgd(pgd, *pgd_k); - - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); - if (!pmd_present(*pmd_k)) - goto no_context; - set_pmd(pmd, *pmd_k); - - pte_k = pte_offset(pmd_k, address); - if (!pte_present(*pte_k)) - goto no_context; - return; - } -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/mm/init.c --- a/linux-2.4-xen-sparse/arch/xen/mm/init.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,482 +0,0 @@ -/* - * linux/arch/i386/mm/init.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#include <linux/config.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/smp.h> -#include <linux/init.h> -#ifdef CONFIG_BLK_DEV_INITRD -#include <linux/blk.h> -#endif -#include <linux/highmem.h> -#include <linux/pagemap.h> -#include <linux/bootmem.h> -#include <linux/slab.h> - -#include <asm/processor.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/dma.h> -#include <asm/apic.h> -#include <asm/tlb.h> - -/* XEN: We *cannot* use mmx_clear_page() this early. Force dumb memset(). */ -#undef clear_page -#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) - -mmu_gather_t mmu_gathers[NR_CPUS]; -unsigned long highstart_pfn, highend_pfn; -static unsigned long totalram_pages; -static unsigned long totalhigh_pages; - -int do_check_pgt_cache(int low, int high) -{ - int freed = 0; - if(pgtable_cache_size > high) { - do { - if (!QUICKLIST_EMPTY(pgd_quicklist)) { - free_pgd_slow(get_pgd_fast()); - freed++; - } - if (!QUICKLIST_EMPTY(pte_quicklist)) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); - freed++; - } - } while(pgtable_cache_size > low); - } - return freed; -} - -/* - * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the - * physical space so we can cache the place of the first one and move - * around without checking the pgd every time. - */ - -#if CONFIG_HIGHMEM -pte_t *kmap_pte; -pgprot_t kmap_prot; - -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) - -void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; -} -#endif /* CONFIG_HIGHMEM */ - -void show_mem(void) -{ - int i, total = 0, reserved = 0; - int shared = 0, cached = 0; - int highmem = 0; - - printk("Mem-info:\n"); - show_free_areas(); - printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); - i = max_mapnr; - while (i-- > 0) { - total++; - if (PageHighMem(mem_map+i)) - highmem++; - if (PageReserved(mem_map+i)) - reserved++; - else if (PageSwapCache(mem_map+i)) - cached++; - else if (page_count(mem_map+i)) - shared += page_count(mem_map+i) - 1; - } - printk("%d pages of RAM\n", total); - printk("%d pages of HIGHMEM\n",highmem); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); - printk("%ld pages in page table cache\n",pgtable_cache_size); - show_buffers(); -} - -/* References to section boundaries */ - -extern char _text, _etext, _edata, __bss_start, _end; -extern char __init_begin, __init_end; - -static inline void set_pte_phys (unsigned long vaddr, - unsigned long phys, pgprot_t prot) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - - pgd = init_mm.pgd + __pgd_offset(vaddr); - if (pgd_none(*pgd)) { - printk("PAE BUG #00!\n"); - return; - } - pmd = pmd_offset(pgd, vaddr); - if (pmd_none(*pmd)) { - printk("PAE BUG #01!\n"); - return; - } - pte = pte_offset(pmd, vaddr); - - set_pte(pte, (pte_t) { phys | pgprot_val(prot) }); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -void __set_fixmap(enum fixed_addresses idx, unsigned long phys, - pgprot_t flags) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - printk("Invalid __set_fixmap\n"); - return; - } - set_pte_phys(address, phys, flags); -} - -void clear_fixmap(enum fixed_addresses idx) -{ - set_pte_phys(__fix_to_virt(idx), 0, __pgprot(0)); -} - -static void __init fixrange_init (unsigned long start, - unsigned long end, pgd_t *pgd_base) -{ - pgd_t *pgd, *kpgd; - pmd_t *pmd, *kpmd; - pte_t *pte, *kpte; - int i, j; - unsigned long vaddr; - - vaddr = start; - i = __pgd_offset(vaddr); - j = __pmd_offset(vaddr); - pgd = pgd_base + i; - - for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { -#if CONFIG_X86_PAE - if (pgd_none(*pgd)) { - pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); - if (pmd != pmd_offset(pgd, 0)) - printk("PAE BUG #02!\n"); - } - pmd = pmd_offset(pgd, vaddr); -#else - pmd = (pmd_t *)pgd; -#endif - for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { - if (pmd_none(*pmd)) { - pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - clear_page(pte); - kpgd = pgd_offset_k((unsigned long)pte); - kpmd = pmd_offset(kpgd, (unsigned long)pte); - kpte = pte_offset(kpmd, (unsigned long)pte); - set_pte(kpte, pte_wrprotect(*kpte)); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); - } - vaddr += PMD_SIZE; - } - j = 0; - } -} - - -static void __init pagetable_init (void) -{ - unsigned long vaddr, end, ram_end; - pgd_t *kpgd, *pgd, *pgd_base; - int i, j, k; - pmd_t *kpmd, *pmd; - pte_t *kpte, *pte, *pte_base; - - ram_end = end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); - if ( xen_start_info.nr_pages < max_low_pfn ) - ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE); - - pgd_base = init_mm.pgd; - i = __pgd_offset(PAGE_OFFSET); - pgd = pgd_base + i; - - for (; i < PTRS_PER_PGD; pgd++, i++) { - vaddr = i*PGDIR_SIZE; - if (vaddr >= end) - break; - pmd = (pmd_t *)pgd; - for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { - vaddr = i*PGDIR_SIZE + j*PMD_SIZE; - if (vaddr >= end) - break; - - /* Filled in for us already? */ - if ( pmd_val(*pmd) & _PAGE_PRESENT ) - continue; - - pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - clear_page(pte_base); - - for (k = 0; k < PTRS_PER_PTE; pte++, k++) { - vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; - if (vaddr >= ram_end) - break; - *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); - } - kpgd = pgd_offset_k((unsigned long)pte_base); - kpmd = pmd_offset(kpgd, (unsigned long)pte_base); - kpte = pte_offset(kpmd, (unsigned long)pte_base); - set_pte(kpte, pte_wrprotect(*kpte)); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); - } - } - - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd); - -#if CONFIG_HIGHMEM - /* - * Permanent kmaps: - */ - vaddr = PKMAP_BASE; - fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, init_mm.pgd); - - pgd = init_mm.pgd + __pgd_offset(vaddr); - pmd = pmd_offset(pgd, vaddr); - pte = pte_offset(pmd, vaddr); - pkmap_page_table = pte; -#endif -} - -static void __init zone_sizes_init(void) -{ - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; - unsigned int max_dma, high, low; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - low = max_low_pfn; - high = highend_pfn; - - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; -#ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; -#endif - } - free_area_init(zones_size); -} - -void __init paging_init(void) -{ - pagetable_init(); - - zone_sizes_init(); - - /* Switch to the real shared_info page, and clear the dummy page. */ - set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info); - HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - -#ifdef CONFIG_HIGHMEM - kmap_init(); -#endif -} - -static inline int page_is_ram (unsigned long pagenr) -{ - return 1; -} - -#ifdef CONFIG_HIGHMEM -void __init one_highpage_init(struct page *page, int free_page) -{ - ClearPageReserved(page); - set_bit(PG_highmem, &page->flags); - atomic_set(&page->count, 1); - if ( free_page ) - __free_page(page); - totalhigh_pages++; -} -#endif /* CONFIG_HIGHMEM */ - -static void __init set_max_mapnr_init(void) -{ -#ifdef CONFIG_HIGHMEM - highmem_start_page = mem_map + highstart_pfn; - max_mapnr = num_physpages = highend_pfn; - num_mappedpages = max_low_pfn; -#else - max_mapnr = num_mappedpages = num_physpages = max_low_pfn; -#endif -} - -static int __init free_pages_init(void) -{ -#ifdef CONFIG_HIGHMEM - int bad_ppro = 0; -#endif - int reservedpages, pfn; - - /* add only boot_pfn pages of low memory to free list. - * max_low_pfn may be sized for - * pages yet to be allocated from the hypervisor, or it may be set - * to override the xen_start_info amount of memory - */ - int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn); - - /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - /* XEN: init and count low-mem pages outside initial allocation. */ - for (pfn = boot_pfn; pfn < max_low_pfn; pfn++) { - ClearPageReserved(&mem_map[pfn]); - atomic_set(&mem_map[pfn].count, 1); - totalram_pages++; - } - - reservedpages = 0; - for (pfn = 0; pfn < boot_pfn ; pfn++) { - /* - * Only count reserved RAM pages - */ - if (page_is_ram(pfn) && PageReserved(mem_map+pfn)) - reservedpages++; - } -#ifdef CONFIG_HIGHMEM - for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) - one_highpage_init((struct page *) (mem_map + pfn), - (pfn < xen_start_info.nr_pages)); - totalram_pages += totalhigh_pages; -#endif - return reservedpages; -} - -void __init mem_init(void) -{ - int codesize, reservedpages, datasize, initsize; - - if (!mem_map) - BUG(); - -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); - BUG(); - } -#endif - - set_max_mapnr_init(); - - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - - /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); - - reservedpages = free_pages_init(); - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); - - boot_cpu_data.wp_works_ok = 1; -} - -void free_initmem(void) -{ - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - set_page_count(virt_to_page(addr), 1); - free_page(addr); - totalram_pages++; - } - printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (start < end) - printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - totalram_pages++; - } -} -#endif - -void si_meminfo(struct sysinfo *val) -{ - val->totalram = max_pfn; - val->sharedram = 0; - val->freeram = nr_free_pages(); - val->bufferram = atomic_read(&buffermem_pages); - val->totalhigh = max_pfn-max_low_pfn; - val->freehigh = nr_free_highpages(); - val->mem_unit = PAGE_SIZE; - return; -} - -#if defined(CONFIG_X86_PAE) -struct kmem_cache_s *pae_pgd_cachep; -void __init pgtable_cache_init(void) -{ - /* - * PAE pgds must be 16-byte aligned: - */ - pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, - SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); - if (!pae_pgd_cachep) - panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); -} -#endif /* CONFIG_X86_PAE */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/mm/ioremap.c --- a/linux-2.4-xen-sparse/arch/xen/mm/ioremap.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,266 +0,0 @@ -/* - * arch/xen/mm/ioremap.c - * - * Re-map IO memory to kernel address space so that we can access it. - * - * (C) Copyright 1995 1996 Linus Torvalds - * - * Modifications for Xenolinux (c) 2003-2004 Keir Fraser - */ - -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/mman.h> -#include <linux/vmalloc.h> -#include <asm/io.h> -#include <asm/pgalloc.h> -#include <asm/uaccess.h> -#include <asm/tlb.h> -#include <asm/mmu.h> - -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) - -/* These hacky macros avoid phys->machine translations. */ -#define __direct_pte(x) ((pte_t) { (x) } ) -#define __direct_mk_pte(page_nr,pgprot) \ - __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) -#define direct_mk_pte_phys(physpage, pgprot) \ - __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) - -static inline void direct_remap_area_pte(pte_t *pte, - unsigned long address, - unsigned long size, - mmu_update_t **v) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); - - do { - (*v)->ptr = virt_to_machine(pte); - (*v)++; - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline int direct_remap_area_pmd(struct mm_struct *mm, - pmd_t *pmd, - unsigned long address, - unsigned long size, - mmu_update_t **v) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - if (address >= end) - BUG(); - do { - pte_t *pte = pte_alloc(mm, pmd, address); - if (!pte) - return -ENOMEM; - direct_remap_area_pte(pte, address, end - address, v); - - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -int __direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long size, - mmu_update_t *v) -{ - pgd_t * dir; - unsigned long end = address + size; - - dir = pgd_offset(mm, address); - flush_cache_all(); - if (address >= end) - BUG(); - spin_lock(&mm->page_table_lock); - do { - pmd_t *pmd = pmd_alloc(mm, dir, address); - if (!pmd) - return -ENOMEM; - direct_remap_area_pmd(mm, pmd, address, end - address, &v); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - - } while (address && (address < end)); - spin_unlock(&mm->page_table_lock); - flush_tlb_all(); - return 0; -} - - -int direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long machine_addr, - unsigned long size, - pgprot_t prot, - domid_t domid) -{ - int i; - unsigned long start_address; -#define MAX_DIRECTMAP_MMU_QUEUE 130 - mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u; - - start_address = address; - - for( i = 0; i < size; i += PAGE_SIZE ) - { - if ( (v - u) == MAX_DIRECTMAP_MMU_QUEUE ) - { - /* Fill in the PTE pointers. */ - __direct_remap_area_pages( mm, - start_address, - address-start_address, - u); - - if ( HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0 ) - return -EFAULT; - v = u; - start_address = address; - } - - /* - * Fill in the machine address: PTE ptr is done later by - * __direct_remap_area_pages(). - */ - v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot); - - machine_addr += PAGE_SIZE; - address += PAGE_SIZE; - v++; - } - - if ( v != u ) - { - /* get the ptep's filled in */ - __direct_remap_area_pages(mm, - start_address, - address-start_address, - u); - if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) ) - return -EFAULT; - } - - return 0; -} - - -#endif /* CONFIG_XEN_PRIVILEGED_GUEST */ - - -/* - * Remap an arbitrary machine address space into the kernel virtual - * address space. Needed when a privileged instance of Xenolinux wants - * to access space outside its world directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - */ -void * __ioremap(unsigned long machine_addr, - unsigned long size, - unsigned long flags) -{ -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) - void * addr; - struct vm_struct * area; - unsigned long offset, last_addr; - pgprot_t prot; - - /* Don't allow wraparound or zero size */ - last_addr = machine_addr + size - 1; - if (!size || last_addr < machine_addr) - return NULL; - - /* Mappings have to be page-aligned */ - offset = machine_addr & ~PAGE_MASK; - machine_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - machine_addr; - - /* Ok, go for it */ - area = get_vm_area(size, VM_IOREMAP); - if (!area) - return NULL; - addr = area->addr; - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | - _PAGE_ACCESSED | flags); - if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr), - machine_addr, size, prot, 0)) { - vfree(addr); - return NULL; - } - return (void *) (offset + (char *)addr); -#else - return NULL; -#endif -} - -void iounmap(void *addr) -{ - vfree((void *)((unsigned long)addr & PAGE_MASK)); -} - -/* implementation of boot time ioremap for purpose of provising access -to the vga console for privileged domains. Unlike boot time ioremap on -other architectures, ours is permanent and not reclaimed when then vmalloc -infrastructure is started */ - -void __init *bt_ioremap(unsigned long machine_addr, unsigned long size) -{ - unsigned long offset, last_addr; - unsigned int nrpages; - enum fixed_addresses idx; - - /* Don't allow wraparound or zero size */ - last_addr = machine_addr + size - 1; - if (!size || last_addr < machine_addr) - return NULL; - - /* - * Mappings have to be page-aligned - */ - offset = machine_addr & ~PAGE_MASK; - machine_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - machine_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) - return NULL; - - /* - * Ok, go for it.. - */ - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - __set_fixmap(idx, machine_addr, PAGE_KERNEL); - machine_addr += PAGE_SIZE; - --idx; - --nrpages; - } - - flush_tlb_all(); - - return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); -} - - -#if 0 /* We don't support these functions. They shouldn't be required. */ -void __init bt_iounmap(void *addr, unsigned long size) {} -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/arch/xen/vmlinux.lds --- a/linux-2.4-xen-sparse/arch/xen/vmlinux.lds Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,75 +0,0 @@ -/* ld script to make i386 Linux kernel - * Written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>; - */ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) -SECTIONS -{ - . = 0xC0000000 + 0x100000; - _text = .; /* Text and read-only data */ - .text : { - *(.text) - *(.fixup) - *(.gnu.warning) - } = 0x9090 - - _etext = .; /* End of text section */ - - .rodata : { *(.rodata) *(.rodata.*) } - .kstrtab : { *(.kstrtab) } - - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - - __start___ksymtab = .; /* Kernel symbol table */ - __ksymtab : { *(__ksymtab) } - __stop___ksymtab = .; - - .data : { /* Data */ - *(.data) - CONSTRUCTORS - } - - _edata = .; /* End of data section */ - - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .text.init : { *(.text.init) } - .data.init : { *(.data.init) } - . = ALIGN(16); - __setup_start = .; - .setup.init : { *(.setup.init) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { *(.initcall.init) } - __initcall_end = .; - . = ALIGN(4096); - __init_end = .; - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - __bss_start = .; /* BSS */ - .bss : { - *(.bss) - } - _end = . ; - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/drivers/block/ll_rw_blk.c --- a/linux-2.4-xen-sparse/drivers/block/ll_rw_blk.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,1663 +0,0 @@ -/* - * linux/drivers/block/ll_rw_blk.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 1994, Karl Keyte: Added support for disk statistics - * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@xxxxxxx> SuSE - * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@xxxxxxx> - * kernel-doc documentation started by NeilBrown <neilb@xxxxxxxxxxxxxxx> - July2000 - */ - -/* - * This handles all read/write requests to block devices - */ -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/config.h> -#include <linux/locks.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/init.h> -#include <linux/smp_lock.h> -#include <linux/completion.h> -#include <linux/bootmem.h> - -#include <asm/system.h> -#include <asm/io.h> -#include <linux/blk.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <linux/module.h> - -/* - * MAC Floppy IWM hooks - */ - -#ifdef CONFIG_MAC_FLOPPY_IWM -extern int mac_floppy_init(void); -#endif - -/* - * For the allocated request tables - */ -static kmem_cache_t *request_cachep; - -/* - * The "disk" task queue is used to start the actual requests - * after a plug - */ -DECLARE_TASK_QUEUE(tq_disk); - -/* - * Protect the request list against multiple users.. - * - * With this spinlock the Linux block IO subsystem is 100% SMP threaded - * from the IRQ event side, and almost 100% SMP threaded from the syscall - * side (we still have protect against block device array operations, and - * the do_request() side is casually still unsafe. The kernel lock protects - * this part currently.). - * - * there is a fair chance that things will work just OK if these functions - * are called with no global kernel lock held ... - */ -spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; - -/* This specifies how many sectors to read ahead on the disk. */ - -int read_ahead[MAX_BLKDEV]; - -/* blk_dev_struct is: - * *request_fn - * *current_request - */ -struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ - -/* - * blk_size contains the size of all block-devices in units of 1024 byte - * sectors: - * - * blk_size[MAJOR][MINOR] - * - * if (!blk_size[MAJOR]) then no minor size checking is done. - */ -int * blk_size[MAX_BLKDEV]; - -/* - * blksize_size contains the size of all block-devices: - * - * blksize_size[MAJOR][MINOR] - * - * if (!blksize_size[MAJOR]) then 1024 bytes is assumed. - */ -int * blksize_size[MAX_BLKDEV]; - -/* - * hardsect_size contains the size of the hardware sector of a device. - * - * hardsect_size[MAJOR][MINOR] - * - * if (!hardsect_size[MAJOR]) - * then 512 bytes is assumed. - * else - * sector_size is hardsect_size[MAJOR][MINOR] - * This is currently set by some scsi devices and read by the msdos fs driver. - * Other uses may appear later. - */ -int * hardsect_size[MAX_BLKDEV]; - -/* - * The following tunes the read-ahead algorithm in mm/filemap.c - */ -int * max_readahead[MAX_BLKDEV]; - -/* - * Max number of sectors per request - */ -int * max_sectors[MAX_BLKDEV]; - -unsigned long blk_max_low_pfn, blk_max_pfn; -int blk_nohighio = 0; - -int block_dump = 0; - -static struct timer_list writeback_timer; - -static inline int get_max_sectors(kdev_t dev) -{ - if (!max_sectors[MAJOR(dev)]) - return MAX_SECTORS; - return max_sectors[MAJOR(dev)][MINOR(dev)]; -} - -static inline request_queue_t *__blk_get_queue(kdev_t dev) -{ - struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); - - if (bdev->queue) - return bdev->queue(dev); - else - return &blk_dev[MAJOR(dev)].request_queue; -} - -request_queue_t *blk_get_queue(kdev_t dev) -{ - return __blk_get_queue(dev); -} - -static int __blk_cleanup_queue(struct request_list *list) -{ - struct list_head *head = &list->free; - struct request *rq; - int i = 0; - - while (!list_empty(head)) { - rq = list_entry(head->next, struct request, queue); - list_del(&rq->queue); - kmem_cache_free(request_cachep, rq); - i++; - }; - - if (i != list->count) - printk("request list leak!\n"); - - list->count = 0; - return i; -} - -/** - * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed - * @q: the request queue to be released - * - * Description: - * blk_cleanup_queue is the pair to blk_init_queue(). It should - * be called when a request queue is being released; typically - * when a block device is being de-registered. Currently, its - * primary task it to free all the &struct request structures that - * were allocated to the queue. - * Caveat: - * Hopefully the low level driver will have finished any - * outstanding requests first... - **/ -void blk_cleanup_queue(request_queue_t * q) -{ - int count = q->nr_requests; - - count -= __blk_cleanup_queue(&q->rq); - - if (count) - printk("blk_cleanup_queue: leaked requests (%d)\n", count); - if (atomic_read(&q->nr_sectors)) - printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors)); - - memset(q, 0, sizeof(*q)); -} - -/** - * blk_queue_headactive - indicate whether head of request queue may be active - * @q: The queue which this applies to. - * @active: A flag indication where the head of the queue is active. - * - * Description: - * The driver for a block device may choose to leave the currently active - * request on the request queue, removing it only when it has completed. - * The queue handling routines assume this by default for safety reasons - * and will not involve the head of the request queue in any merging or - * reordering of requests when the queue is unplugged (and thus may be - * working on this particular request). - * - * If a driver removes requests from the queue before processing them, then - * it may indicate that it does so, there by allowing the head of the queue - * to be involved in merging and reordering. This is done be calling - * blk_queue_headactive() with an @active flag of %0. - * - * If a driver processes several requests at once, it must remove them (or - * at least all but one of them) from the request queue. - * - * When a queue is plugged the head will be assumed to be inactive. - **/ - -void blk_queue_headactive(request_queue_t * q, int active) -{ - q->head_active = active; -} - -/** - * blk_queue_throttle_sectors - indicates you will call sector throttling funcs - * @q: The queue which this applies to. - * @active: A flag indication if you want sector throttling on - * - * Description: - * The sector throttling code allows us to put a limit on the number of - * sectors pending io to the disk at a given time, sending @active nonzero - * indicates you will call blk_started_sectors and blk_finished_sectors in - * addition to calling blk_started_io and blk_finished_io in order to - * keep track of the number of sectors in flight. - **/ - -void blk_queue_throttle_sectors(request_queue_t * q, int active) -{ - q->can_throttle = active; -} - -/** - * blk_queue_make_request - define an alternate make_request function for a device - * @q: the request queue for the device to be affected - * @mfn: the alternate make_request function - * - * Description: - * The normal way for &struct buffer_heads to be passed to a device - * driver is for them to be collected into requests on a request - * queue, and then to allow the device driver to select requests - * off that queue when it is ready. This works well for many block - * devices. However some block devices (typically virtual devices - * such as md or lvm) do not benefit from the processing on the - * request queue, and are served best by having the requests passed - * directly to them. This can be achieved by providing a function - * to blk_queue_make_request(). - * - * Caveat: - * The driver that does this *must* be able to deal appropriately - * with buffers in "highmemory", either by calling bh_kmap() to get - * a kernel mapping, to by calling create_bounce() to create a - * buffer in normal memory. - **/ - -void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) -{ - q->make_request_fn = mfn; -} - -/** - * blk_queue_bounce_limit - set bounce buffer limit for queue - * @q: the request queue for the device - * @dma_addr: bus address limit - * - * Description: - * Different hardware can have different requirements as to what pages - * it can do I/O directly to. A low level driver can call - * blk_queue_bounce_limit to have lower memory pages allocated as bounce - * buffers for doing I/O to pages residing above @page. By default - * the block layer sets this to the highest numbered "low" memory page. - **/ -void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) -{ - unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; - unsigned long mb = dma_addr >> 20; - static request_queue_t *old_q; - - /* - * keep this for debugging for now... - */ - if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) { - old_q = q; - printk("blk: queue %p, ", q); - if (dma_addr == BLK_BOUNCE_ANY) - printk("no I/O memory limit\n"); - else - printk("I/O limit %luMb (mask 0x%Lx)\n", mb, - (long long) dma_addr); - } - - q->bounce_pfn = bounce_pfn; -} - - -/* - * can we merge the two segments, or do we need to start a new one? - */ -static inline int __blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt) -{ - /* - * if bh and nxt are contigous and don't cross a 4g boundary, it's ok - */ - if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt)) - return 1; - - return 0; -} - -int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt) -{ - return __blk_seg_merge_ok(bh, nxt); -} - -static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) -{ - if (req->nr_segments < max_segments) { - req->nr_segments++; - return 1; - } - return 0; -} - -static int ll_back_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) -{ - if (__blk_seg_merge_ok(req->bhtail, bh)) - return 1; - - return ll_new_segment(q, req, max_segments); -} - -static int ll_front_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) -{ - if (__blk_seg_merge_ok(bh, req->bh)) - return 1; - - return ll_new_segment(q, req, max_segments); -} - -static int ll_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next, int max_segments) -{ - int total_segments = req->nr_segments + next->nr_segments; - - if (__blk_seg_merge_ok(req->bhtail, next->bh)) - total_segments--; - - if (total_segments > max_segments) - return 0; - - req->nr_segments = total_segments; - return 1; -} - -/* - * "plug" the device if there are no outstanding requests: this will - * force the transfer to start only after we have put all the requests - * on the list. - * - * This is called with interrupts off and no requests on the queue. - * (and with the request spinlock acquired) - */ -static void generic_plug_device(request_queue_t *q, kdev_t dev) -{ - /* - * no need to replug device - */ - if (!list_empty(&q->queue_head) || q->plugged) - return; - - q->plugged = 1; - queue_task(&q->plug_tq, &tq_disk); -} - -/* - * remove the plug and let it rip.. - */ -static inline void __generic_unplug_device(request_queue_t *q) -{ - if (q->plugged) { - q->plugged = 0; - if (!list_empty(&q->queue_head)) - q->request_fn(q); - } -} - -void generic_unplug_device(void *data) -{ - request_queue_t *q = (request_queue_t *) data; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock, flags); - __generic_unplug_device(q); - spin_unlock_irqrestore(&io_request_lock, flags); -} - -/** blk_grow_request_list - * @q: The &request_queue_t - * @nr_requests: how many requests are desired - * - * More free requests are added to the queue's free lists, bringing - * the total number of requests to @nr_requests. - * - * The requests are added equally to the request queue's read - * and write freelists. - * - * This function can sleep. - * - * Returns the (new) number of requests which the queue has available. - */ -int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors) -{ - unsigned long flags; - /* Several broken drivers assume that this function doesn't sleep, - * this causes system hangs during boot. - * As a temporary fix, make the function non-blocking. - */ - spin_lock_irqsave(&io_request_lock, flags); - while (q->nr_requests < nr_requests) { - struct request *rq; - - rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC); - if (rq == NULL) - break; - memset(rq, 0, sizeof(*rq)); - rq->rq_status = RQ_INACTIVE; - list_add(&rq->queue, &q->rq.free); - q->rq.count++; - - q->nr_requests++; - } - - /* - * Wakeup waiters after both one quarter of the - * max-in-fligh queue and one quarter of the requests - * are available again. - */ - - q->batch_requests = q->nr_requests / 4; - if (q->batch_requests > 32) - q->batch_requests = 32; - q->batch_sectors = max_queue_sectors / 4; - - q->max_queue_sectors = max_queue_sectors; - - BUG_ON(!q->batch_sectors); - atomic_set(&q->nr_sectors, 0); - - spin_unlock_irqrestore(&io_request_lock, flags); - return q->nr_requests; -} - -static void blk_init_free_list(request_queue_t *q) -{ - struct sysinfo si; - int megs; /* Total memory, in megabytes */ - int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS; - - INIT_LIST_HEAD(&q->rq.free); - q->rq.count = 0; - q->rq.pending[READ] = q->rq.pending[WRITE] = 0; - q->nr_requests = 0; - - si_meminfo(&si); - megs = si.totalram >> (20 - PAGE_SHIFT); - nr_requests = MAX_NR_REQUESTS; - if (megs < 30) { - nr_requests /= 2; - max_queue_sectors /= 2; - } - /* notice early if anybody screwed the defaults */ - BUG_ON(!nr_requests); - BUG_ON(!max_queue_sectors); - - blk_grow_request_list(q, nr_requests, max_queue_sectors); - - init_waitqueue_head(&q->wait_for_requests); - - spin_lock_init(&q->queue_lock); -} - -static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); - -/** - * blk_init_queue - prepare a request queue for use with a block device - * @q: The &request_queue_t to be initialised - * @rfn: The function to be called to process requests that have been - * placed on the queue. - * - * Description: - * If a block device wishes to use the standard request handling procedures, - * which sorts requests and coalesces adjacent requests, then it must - * call blk_init_queue(). The function @rfn will be called when there - * are requests on the queue that need to be processed. If the device - * supports plugging, then @rfn may not be called immediately when requests - * are available on the queue, but may be called at some time later instead. - * Plugged queues are generally unplugged when a buffer belonging to one - * of the requests on the queue is needed, or due to memory pressure. - * - * @rfn is not required, or even expected, to remove all requests off the - * queue, but only as many as it can handle at a time. If it does leave - * requests on the queue, it is responsible for arranging that the requests - * get dealt with eventually. - * - * A global spin lock $io_request_lock must be held while manipulating the - * requests on the request queue. - * - * The request on the head of the queue is by default assumed to be - * potentially active, and it is not considered for re-ordering or merging - * whenever the given queue is unplugged. This behaviour can be changed with - * blk_queue_headactive(). - * - * Note: - * blk_init_queue() must be paired with a blk_cleanup_queue() call - * when the block device is deactivated (such as at module unload). - **/ -void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) -{ - INIT_LIST_HEAD(&q->queue_head); - elevator_init(&q->elevator, ELEVATOR_LINUS); - blk_init_free_list(q); - q->request_fn = rfn; - q->back_merge_fn = ll_back_merge_fn; - q->front_merge_fn = ll_front_merge_fn; - q->merge_requests_fn = ll_merge_requests_fn; - q->make_request_fn = __make_request; - q->plug_tq.sync = 0; - q->plug_tq.routine = &generic_unplug_device; - q->plug_tq.data = q; - q->plugged = 0; - q->can_throttle = 0; - - /* - * These booleans describe the queue properties. We set the - * default (and most common) values here. Other drivers can - * use the appropriate functions to alter the queue properties. - * as appropriate. - */ - q->plug_device_fn = generic_plug_device; - q->head_active = 1; - - blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); -} - -#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); -/* - * Get a free request. io_request_lock must be held and interrupts - * disabled on the way in. Returns NULL if there are no free requests. - */ -static struct request *get_request(request_queue_t *q, int rw) -{ - struct request *rq = NULL; - struct request_list *rl = &q->rq; - - if (blk_oversized_queue(q)) { - int rlim = q->nr_requests >> 5; - - if (rlim < 4) - rlim = 4; - - /* - * if its a write, or we have more than a handful of reads - * pending, bail out - */ - if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim)) - return NULL; - if (blk_oversized_queue_reads(q)) - return NULL; - } - - if (!list_empty(&rl->free)) { - rq = blkdev_free_rq(&rl->free); - list_del(&rq->queue); - rl->count--; - rl->pending[rw]++; - rq->rq_status = RQ_ACTIVE; - rq->cmd = rw; - rq->special = NULL; - rq->q = q; - } - - return rq; -} - -/* - * Here's the request allocation design, low latency version: - * - * 1: Blocking on request exhaustion is a key part of I/O throttling. - * - * 2: We want to be `fair' to all requesters. We must avoid starvation, and - * attempt to ensure that all requesters sleep for a similar duration. Hence - * no stealing requests when there are other processes waiting. - * - * There used to be more here, attempting to allow a process to send in a - * number of requests once it has woken up. But, there's no way to - * tell if a process has just been woken up, or if it is a new process - * coming in to steal requests from the waiters. So, we give up and force - * everyone to wait fairly. - * - * So here's what we do: - * - * a) A READA requester fails if free_requests < batch_requests - * - * We don't want READA requests to prevent sleepers from ever - * waking. Note that READA is used extremely rarely - a few - * filesystems use it for directory readahead. - * - * When a process wants a new request: - * - * b) If free_requests == 0, the requester sleeps in FIFO manner, and - * the queue full condition is set. The full condition is not - * cleared until there are no longer any waiters. Once the full - * condition is set, all new io must wait, hopefully for a very - * short period of time. - * - * When a request is released: - * - * c) If free_requests < batch_requests, do nothing. - * - * d) If free_requests >= batch_requests, wake up a single waiter. - * - * As each waiter gets a request, he wakes another waiter. We do this - * to prevent a race where an unplug might get run before a request makes - * it's way onto the queue. The result is a cascade of wakeups, so delaying - * the initial wakeup until we've got batch_requests available helps avoid - * wakeups where there aren't any requests available yet. - */ - -static struct request *__get_request_wait(request_queue_t *q, int rw) -{ - register struct request *rq; - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue_exclusive(&q->wait_for_requests, &wait); - - do { - set_current_state(TASK_UNINTERRUPTIBLE); - spin_lock_irq(&io_request_lock); - if (blk_oversized_queue(q) || q->rq.count == 0) { - __generic_unplug_device(q); - spin_unlock_irq(&io_request_lock); - schedule(); - spin_lock_irq(&io_request_lock); - } - rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); - } while (rq == NULL); - remove_wait_queue(&q->wait_for_requests, &wait); - current->state = TASK_RUNNING; - - return rq; -} - -static void get_request_wait_wakeup(request_queue_t *q, int rw) -{ - /* - * avoid losing an unplug if a second __get_request_wait did the - * generic_unplug_device while our __get_request_wait was running - * w/o the queue_lock held and w/ our request out of the queue. - */ - if (waitqueue_active(&q->wait_for_requests)) - wake_up(&q->wait_for_requests); -} - -/* RO fail safe mechanism */ - -static long ro_bits[MAX_BLKDEV][8]; - -int is_read_only(kdev_t dev) -{ - int minor,major; - - major = MAJOR(dev); - minor = MINOR(dev); - if (major < 0 || major >= MAX_BLKDEV) return 0; - return ro_bits[major][minor >> 5] & (1 << (minor & 31)); -} - -void set_device_ro(kdev_t dev,int flag) -{ - int minor,major; - - major = MAJOR(dev); - minor = MINOR(dev); - if (major < 0 || major >= MAX_BLKDEV) return; - if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31); - else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); -} - -inline void drive_stat_acct (kdev_t dev, int rw, - unsigned long nr_sectors, int new_io) -{ - unsigned int major = MAJOR(dev); - unsigned int index; - - index = disk_index(dev); - if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR)) - return; - - kstat.dk_drive[major][index] += new_io; - if (rw == READ) { - kstat.dk_drive_rio[major][index] += new_io; - kstat.dk_drive_rblk[major][index] += nr_sectors; - } else if (rw == WRITE) { - kstat.dk_drive_wio[major][index] += new_io; - kstat.dk_drive_wblk[major][index] += nr_sectors; - } else - printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); -} - -#ifdef CONFIG_BLK_STATS -/* - * Return up to two hd_structs on which to do IO accounting for a given - * request. - * - * On a partitioned device, we want to account both against the partition - * and against the whole disk. - */ -static void locate_hd_struct(struct request *req, - struct hd_struct **hd1, - struct hd_struct **hd2) -{ - struct gendisk *gd; - - *hd1 = NULL; - *hd2 = NULL; - - gd = get_gendisk(req->rq_dev); - if (gd && gd->part) { - /* Mask out the partition bits: account for the entire disk */ - int devnr = MINOR(req->rq_dev) >> gd->minor_shift; - int whole_minor = devnr << gd->minor_shift; - - *hd1 = &gd->part[whole_minor]; - if (whole_minor != MINOR(req->rq_dev)) - *hd2= &gd->part[MINOR(req->rq_dev)]; - } -} - -/* - * Round off the performance stats on an hd_struct. - * - * The average IO queue length and utilisation statistics are maintained - * by observing the current state of the queue length and the amount of - * time it has been in this state for. - * Normally, that accounting is done on IO completion, but that can result - * in more than a second's worth of IO being accounted for within any one - * second, leading to >100% utilisation. To deal with that, we do a - * round-off before returning the results when reading /proc/partitions, - * accounting immediately for all queue usage up to the current jiffies and - * restarting the counters again. - */ -void disk_round_stats(struct hd_struct *hd) -{ - unsigned long now = jiffies; - - hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change)); - hd->last_queue_change = now; - - if (hd->ios_in_flight) - hd->io_ticks += (now - hd->last_idle_time); - hd->last_idle_time = now; -} - -static inline void down_ios(struct hd_struct *hd) -{ - disk_round_stats(hd); - --hd->ios_in_flight; -} - -static inline void up_ios(struct hd_struct *hd) -{ - disk_round_stats(hd); - ++hd->ios_in_flight; -} - -static void account_io_start(struct hd_struct *hd, struct request *req, - int merge, int sectors) -{ - switch (req->cmd) { - case READ: - if (merge) - hd->rd_merges++; - hd->rd_sectors += sectors; - break; - case WRITE: - if (merge) - hd->wr_merges++; - hd->wr_sectors += sectors; - break; - } - if (!merge) - up_ios(hd); -} - -static void account_io_end(struct hd_struct *hd, struct request *req) -{ - unsigned long duration = jiffies - req->start_time; - switch (req->cmd) { - case READ: - hd->rd_ticks += duration; - hd->rd_ios++; - break; - case WRITE: - hd->wr_ticks += duration; - hd->wr_ios++; - break; - } - down_ios(hd); -} - -void req_new_io(struct request *req, int merge, int sectors) -{ - struct hd_struct *hd1, *hd2; - - locate_hd_struct(req, &hd1, &hd2); - if (hd1) - account_io_start(hd1, req, merge, sectors); - if (hd2) - account_io_start(hd2, req, merge, sectors); -} - -void req_merged_io(struct request *req) -{ - struct hd_struct *hd1, *hd2; - - locate_hd_struct(req, &hd1, &hd2); - if (hd1) - down_ios(hd1); - if (hd2) - down_ios(hd2); -} - -void req_finished_io(struct request *req) -{ - struct hd_struct *hd1, *hd2; - - locate_hd_struct(req, &hd1, &hd2); - if (hd1) - account_io_end(hd1, req); - if (hd2) - account_io_end(hd2, req); -} -EXPORT_SYMBOL(req_finished_io); -#endif /* CONFIG_BLK_STATS */ - -/* - * add-request adds a request to the linked list. - * io_request_lock is held and interrupts disabled, as we muck with the - * request queue list. - * - * By this point, req->cmd is always either READ/WRITE, never READA, - * which is important for drive_stat_acct() above. - */ -static inline void add_request(request_queue_t * q, struct request * req, - struct list_head *insert_here) -{ - drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); - - if (!q->plugged && q->head_active && insert_here == &q->queue_head) { - spin_unlock_irq(&io_request_lock); - BUG(); - } - - /* - * elevator indicated where it wants this request to be - * inserted at elevator_merge time - */ - list_add(&req->queue, insert_here); -} - -/* - * Must be called with io_request_lock held and interrupts disabled - */ -void blkdev_release_request(struct request *req) -{ - request_queue_t *q = req->q; - - req->rq_status = RQ_INACTIVE; - req->q = NULL; - - /* - * Request may not have originated from ll_rw_blk. if not, - * assume it has free buffers and check waiters - */ - if (q) { - struct request_list *rl = &q->rq; - int oversized_batch = 0; - - if (q->can_throttle) - oversized_batch = blk_oversized_queue_batch(q); - rl->count++; - /* - * paranoia check - */ - if (req->cmd == READ || req->cmd == WRITE) - rl->pending[req->cmd]--; - if (rl->pending[READ] > q->nr_requests) - printk("blk: reads: %u\n", rl->pending[READ]); - if (rl->pending[WRITE] > q->nr_requests) - printk("blk: writes: %u\n", rl->pending[WRITE]); - if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests) - printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests); - list_add(&req->queue, &rl->free); - if (rl->count >= q->batch_requests && !oversized_batch) { - smp_mb(); - if (waitqueue_active(&q->wait_for_requests)) - wake_up(&q->wait_for_requests); - } - } -} - -/* - * Has to be called with the request spinlock acquired - */ -static void attempt_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) -{ - struct request *next; - - next = blkdev_next_request(req); - if (req->sector + req->nr_sectors != next->sector) - return; - if (req->cmd != next->cmd - || req->rq_dev != next->rq_dev - || req->nr_sectors + next->nr_sectors > max_sectors - || next->waiting) - return; - /* - * If we are not allowed to merge these requests, then - * return. If we are allowed to merge, then the count - * will have been updated to the appropriate number, - * and we shouldn't do it here too. - */ - if (!q->merge_requests_fn(q, req, next, max_segments)) - return; - - q->elevator.elevator_merge_req_fn(req, next); - - /* At this point we have either done a back merge - * or front merge. We need the smaller start_time of - * the merged requests to be the current request - * for accounting purposes. - */ - if (time_after(req->start_time, next->start_time)) - req->start_time = next->start_time; - - req->bhtail->b_reqnext = next->bh; - req->bhtail = next->bhtail; - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - list_del(&next->queue); - - /* One last thing: we have removed a request, so we now have one - less expected IO to complete for accounting purposes. */ - req_merged_io(req); - - blkdev_release_request(next); -} - -static inline void attempt_back_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) -{ - if (&req->queue == q->queue_head.prev) - return; - attempt_merge(q, req, max_sectors, max_segments); -} - -static inline void attempt_front_merge(request_queue_t * q, - struct list_head * head, - struct request *req, - int max_sectors, - int max_segments) -{ - struct list_head * prev; - - prev = req->queue.prev; - if (head == prev) - return; - attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); -} - -static int __make_request(request_queue_t * q, int rw, - struct buffer_head * bh) -{ - unsigned int sector, count, sync; - int max_segments = MAX_SEGMENTS; - struct request * req, *freereq = NULL; - int rw_ahead, max_sectors, el_ret; - struct list_head *head, *insert_here; - int latency; - elevator_t *elevator = &q->elevator; - int should_wake = 0; - - count = bh->b_size >> 9; - sector = bh->b_rsector; - sync = test_and_clear_bit(BH_Sync, &bh->b_state); - - rw_ahead = 0; /* normal case; gets changed below for READA */ - switch (rw) { - case READA: -#if 0 /* bread() misinterprets failed READA attempts as IO errors on SMP */ - rw_ahead = 1; -#endif - rw = READ; /* drop into READ */ - case READ: - case WRITE: - latency = elevator_request_latency(elevator, rw); - break; - default: - BUG(); - goto end_io; - } - - /* We'd better have a real physical mapping! - Check this bit only if the buffer was dirty and just locked - down by us so at this point flushpage will block and - won't clear the mapped bit under us. */ - if (!buffer_mapped(bh)) - BUG(); - - /* - * Temporary solution - in 2.5 this will be done by the lowlevel - * driver. Create a bounce buffer if the buffer data points into - * high memory - keep the original buffer otherwise. - */ - bh = blk_queue_bounce(q, rw, bh); - -/* look for a free request. */ - /* - * Try to coalesce the new request with old requests - */ - max_sectors = get_max_sectors(bh->b_rdev); - - req = NULL; - head = &q->queue_head; - /* - * Now we acquire the request spinlock, we have to be mega careful - * not to schedule or do something nonatomic - */ - spin_lock_irq(&io_request_lock); - -again: - insert_here = head->prev; - - if (list_empty(head)) { - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ - goto get_rq; - } else if (q->head_active && !q->plugged) - head = head->next; - - el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); - switch (el_ret) { - - case ELEVATOR_BACK_MERGE: - if (!q->back_merge_fn(q, req, bh, max_segments)) { - insert_here = &req->queue; - break; - } - req->bhtail->b_reqnext = bh; - req->bhtail = bh; - req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); - blk_started_sectors(req, count); - drive_stat_acct(req->rq_dev, req->cmd, count, 0); - req_new_io(req, 1, count); - attempt_back_merge(q, req, max_sectors, max_segments); - goto out; - - case ELEVATOR_FRONT_MERGE: - if (!q->front_merge_fn(q, req, bh, max_segments)) { - insert_here = req->queue.prev; - break; - } - bh->b_reqnext = req->bh; - req->bh = bh; - /* - * may not be valid, but queues not having bounce - * enabled for highmem pages must not look at - * ->buffer anyway - */ - req->buffer = bh->b_data; - req->current_nr_sectors = req->hard_cur_sectors = count; - req->sector = req->hard_sector = sector; - req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); - blk_started_sectors(req, count); - drive_stat_acct(req->rq_dev, req->cmd, count, 0); - req_new_io(req, 1, count); - attempt_front_merge(q, head, req, max_sectors, max_segments); - goto out; - - /* - * elevator says don't/can't merge. get new request - */ - case ELEVATOR_NO_MERGE: - /* - * use elevator hints as to where to insert the - * request. if no hints, just add it to the back - * of the queue - */ - if (req) - insert_here = &req->queue; - break; - - default: - printk("elevator returned crap (%d)\n", el_ret); - BUG(); - } - -get_rq: - if (freereq) { - req = freereq; - freereq = NULL; - } else { - /* - * See description above __get_request_wait() - */ - if (rw_ahead) { - if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) { - spin_unlock_irq(&io_request_lock); - goto end_io; - } - req = get_request(q, rw); - if (req == NULL) - BUG(); - } else { - req = get_request(q, rw); - if (req == NULL) { - spin_unlock_irq(&io_request_lock); - freereq = __get_request_wait(q, rw); - head = &q->queue_head; - spin_lock_irq(&io_request_lock); - should_wake = 1; - goto again; - } - } - } - -/* fill up the request-info, and add it to the queue */ - req->elevator_sequence = latency; - req->cmd = rw; - req->errors = 0; - req->hard_sector = req->sector = sector; - req->hard_nr_sectors = req->nr_sectors = count; - req->current_nr_sectors = req->hard_cur_sectors = count; - req->nr_segments = 1; /* Always 1 for a new request. */ - req->nr_hw_segments = 1; /* Always 1 for a new request. */ - req->buffer = bh->b_data; - req->waiting = NULL; - req->bh = bh; - req->bhtail = bh; - req->rq_dev = bh->b_rdev; - req->start_time = jiffies; - req_new_io(req, 0, count); - blk_started_io(count); - blk_started_sectors(req, count); - add_request(q, req, insert_here); -out: - if (freereq) - blkdev_release_request(freereq); - if (should_wake) - get_request_wait_wakeup(q, rw); - if (sync) - __generic_unplug_device(q); - spin_unlock_irq(&io_request_lock); - return 0; -end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); - return 0; -} - -/** - * generic_make_request: hand a buffer head to it's device driver for I/O - * @rw: READ, WRITE, or READA - what sort of I/O is desired. - * @bh: The buffer head describing the location in memory and on the device. - * - * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct buffer_head and a &rw value. The - * %READ and %WRITE options are (hopefully) obvious in meaning. The - * %READA value means that a read is required, but that the driver is - * free to fail the request if, for example, it cannot get needed - * resources immediately. - * - * generic_make_request() does not return any status. The - * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bh->b_end_io - * function described (one day) else where. - * - * The caller of generic_make_request must make sure that b_page, - * b_addr, b_size are set to describe the memory buffer, that b_rdev - * and b_rsector are set to describe the device address, and the - * b_end_io and optionally b_private are set to describe how - * completion notification should be signaled. BH_Mapped should also - * be set (to confirm that b_dev and b_blocknr are valid). - * - * generic_make_request and the drivers it calls may use b_reqnext, - * and may change b_rdev and b_rsector. So the values of these fields - * should NOT be depended on after the call to generic_make_request. - * Because of this, the caller should record the device address - * information in b_dev and b_blocknr. - * - * Apart from those fields mentioned above, no other fields, and in - * particular, no other flags, are changed by generic_make_request or - * any lower level drivers. - * */ -void generic_make_request (int rw, struct buffer_head * bh) -{ - int major = MAJOR(bh->b_rdev); - int minorsize = 0; - request_queue_t *q; - - if (!bh->b_end_io) - BUG(); - - /* Test device size, when known. */ - if (blk_size[major]) - minorsize = blk_size[major][MINOR(bh->b_rdev)]; - if (minorsize) { - unsigned long maxsector = (minorsize << 1) + 1; - unsigned long sector = bh->b_rsector; - unsigned int count = bh->b_size >> 9; - - if (maxsector < count || maxsector - count < sector) { - /* Yecch */ - bh->b_state &= ~(1 << BH_Dirty); - - /* This may well happen - the kernel calls bread() - without checking the size of the device, e.g., - when mounting a device. */ - printk(KERN_INFO - "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", - kdevname(bh->b_rdev), rw, - (sector + count)>>1, minorsize); - - bh->b_end_io(bh, 0); - return; - } - } - - /* - * Resolve the mapping until finished. (drivers are - * still free to implement/resolve their own stacking - * by explicitly returning 0) - */ - /* NOTE: we don't repeat the blk_size check for each new device. - * Stacking drivers are expected to know what they are doing. - */ - do { - q = __blk_get_queue(bh->b_rdev); - if (!q) { - printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%ld)\n", - kdevname(bh->b_rdev), bh->b_rsector); - buffer_IO_error(bh); - break; - } - } while (q->make_request_fn(q, rw, bh)); -} - - -/** - * submit_bh: submit a buffer_head to the block device later for I/O - * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) - * @bh: The &struct buffer_head which describes the I/O - * - * submit_bh() is very similar in purpose to generic_make_request(), and - * uses that function to do most of the work. - * - * The extra functionality provided by submit_bh is to determine - * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. - * This is is appropriate for IO requests that come from the buffer - * cache and page cache which (currently) always use aligned blocks. - */ -void submit_bh(int rw, struct buffer_head * bh) -{ - int count = bh->b_size >> 9; - - if (!test_bit(BH_Lock, &bh->b_state)) - BUG(); - - set_bit(BH_Req, &bh->b_state); - set_bit(BH_Launder, &bh->b_state); - - /* - * First step, 'identity mapping' - RAID or LVM might - * further remap this. - */ - bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * count; - - get_bh(bh); - generic_make_request(rw, bh); - - /* fix race condition with wait_on_buffer() */ - smp_mb(); /* spin_unlock may have inclusive semantics */ - if (waitqueue_active(&bh->b_wait)) - wake_up(&bh->b_wait); - - if (block_dump) - printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev)); - - put_bh(bh); - switch (rw) { - case WRITE: - kstat.pgpgout += count; - break; - default: - kstat.pgpgin += count; - break; - } -} - -/** - * ll_rw_block: low-level access to block devices - * @rw: whether to %READ or %WRITE or maybe %READA (readahead) - * @nr: number of &struct buffer_heads in the array - * @bhs: array of pointers to &struct buffer_head - * - * ll_rw_block() takes an array of pointers to &struct buffer_heads, - * and requests an I/O operation on them, either a %READ or a %WRITE. - * The third %READA option is described in the documentation for - * generic_make_request() which ll_rw_block() calls. - * - * This function provides extra functionality that is not in - * generic_make_request() that is relevant to buffers in the buffer - * cache or page cache. In particular it drops any buffer that it - * cannot get a lock on (with the BH_Lock state bit), any buffer that - * appears to be clean when doing a write request, and any buffer that - * appears to be up-to-date when doing read request. Further it marks - * as clean buffers that are processed for writing (the buffer cache - * wont assume that they are actually clean until the buffer gets - * unlocked). - * - * ll_rw_block sets b_end_io to simple completion handler that marks - * the buffer up-to-date (if approriate), unlocks the buffer and wakes - * any waiters. As client that needs a more interesting completion - * routine should call submit_bh() (or generic_make_request()) - * directly. - * - * Caveat: - * All of the buffers must be for the same device, and must also be - * of the current approved size for the device. */ - -void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) -{ - unsigned int major; - int correct_size; - int i; - - if (!nr) - return; - - major = MAJOR(bhs[0]->b_dev); - - /* Determine correct block size for this device. */ - correct_size = get_hardsect_size(bhs[0]->b_dev); - - /* Verify requested block sizes. */ - for (i = 0; i < nr; i++) { - struct buffer_head *bh = bhs[i]; - if (bh->b_size % correct_size) { - printk(KERN_NOTICE "ll_rw_block: device %s: " - "only %d-char blocks implemented (%u)\n", - kdevname(bhs[0]->b_dev), - correct_size, bh->b_size); - goto sorry; - } - } - - if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) { - printk(KERN_NOTICE "Can't write to read-only device %s\n", - kdevname(bhs[0]->b_dev)); - goto sorry; - } - - for (i = 0; i < nr; i++) { - struct buffer_head *bh = bhs[i]; - - lock_buffer(bh); - - /* We have the buffer lock */ - atomic_inc(&bh->b_count); - bh->b_end_io = end_buffer_io_sync; - - switch(rw) { - case WRITE: - if (!atomic_set_buffer_clean(bh)) - /* Hmmph! Nothing to write */ - goto end_io; - __mark_buffer_clean(bh); - break; - - case READA: - case READ: - if (buffer_uptodate(bh)) - /* Hmmph! Already have it */ - goto end_io; - break; - default: - BUG(); - end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); - continue; - } - - submit_bh(rw, bh); - } - return; - -sorry: - /* Make sure we don't get infinite dirty retries.. */ - for (i = 0; i < nr; i++) - mark_buffer_clean(bhs[i]); -} - -#ifdef CONFIG_STRAM_SWAP -extern int stram_device_init (void); -#endif - -static void blk_writeback_timer(unsigned long data) -{ - wakeup_bdflush(); - wakeup_kupdate(); -} - -/** - * end_that_request_first - end I/O on one buffer. - * @req: the request being processed - * @uptodate: 0 for I/O error - * @name: the name printed for an I/O error - * - * Description: - * Ends I/O on the first buffer attached to @req, and sets it up - * for the next buffer_head (if any) in the cluster. - * - * Return: - * 0 - we are done with this request, call end_that_request_last() - * 1 - still buffers pending for this request - * - * Caveat: - * Drivers implementing their own end_request handling must call - * blk_finished_io() appropriately. - **/ - -int end_that_request_first (struct request *req, int uptodate, char *name) -{ - struct buffer_head * bh; - int nsect; - - req->errors = 0; - if (!uptodate) - printk("end_request: I/O error, dev %s (%s), sector %lu\n", - kdevname(req->rq_dev), name, req->sector); - - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - blk_finished_sectors(req, nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector = req->hard_sector; - req->nr_sectors = req->hard_nr_sectors; - - req->current_nr_sectors = bh->b_size >> 9; - req->hard_cur_sectors = req->current_nr_sectors; - if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; - printk("end_request: buffer-list destroyed\n"); - } - req->buffer = bh->b_data; - return 1; - } - } - return 0; -} - -extern int laptop_mode; - -void end_that_request_last(struct request *req) -{ - struct completion *waiting = req->waiting; - - /* - * schedule the writeout of pending dirty data when the disk is idle - */ - if (laptop_mode && req->cmd == READ) - mod_timer(&writeback_timer, jiffies + 5 * HZ); - - req_finished_io(req); - blkdev_release_request(req); - if (waiting) - complete(waiting); -} - -int __init blk_dev_init(void) -{ - struct blk_dev_struct *dev; - - request_cachep = kmem_cache_create("blkdev_requests", - sizeof(struct request), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - - if (!request_cachep) - panic("Can't create request pool slab cache\n"); - - for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) - dev->queue = NULL; - - memset(ro_bits,0,sizeof(ro_bits)); - memset(max_readahead, 0, sizeof(max_readahead)); - memset(max_sectors, 0, sizeof(max_sectors)); - - blk_max_low_pfn = max_low_pfn - 1; - blk_max_pfn = max_pfn - 1; - - init_timer(&writeback_timer); - writeback_timer.function = blk_writeback_timer; - -#ifdef CONFIG_AMIGA_Z2RAM - z2_init(); -#endif -#ifdef CONFIG_STRAM_SWAP - stram_device_init(); -#endif -#ifdef CONFIG_ISP16_CDI - isp16_init(); -#endif -#ifdef CONFIG_BLK_DEV_PS2 - ps2esdi_init(); -#endif -#ifdef CONFIG_BLK_DEV_XD - xd_init(); -#endif -#ifdef CONFIG_BLK_DEV_MFM - mfm_init(); -#endif -#ifdef CONFIG_PARIDE - { extern void paride_init(void); paride_init(); }; -#endif -#ifdef CONFIG_MAC_FLOPPY - swim3_init(); -#endif -#ifdef CONFIG_BLK_DEV_SWIM_IOP - swimiop_init(); -#endif -#ifdef CONFIG_AMIGA_FLOPPY - amiga_floppy_init(); -#endif -#ifdef CONFIG_ATARI_FLOPPY - atari_floppy_init(); -#endif -#ifdef CONFIG_BLK_DEV_FD - floppy_init(); -#else -#if defined(__i386__) && !defined(CONFIG_XEN) /* Do we even need this? */ - outb_p(0xc, 0x3f2); -#endif -#endif -#ifdef CONFIG_CDU31A - cdu31a_init(); -#endif -#ifdef CONFIG_ATARI_ACSI - acsi_init(); -#endif -#ifdef CONFIG_MCD - mcd_init(); -#endif -#ifdef CONFIG_MCDX - mcdx_init(); -#endif -#ifdef CONFIG_SBPCD - sbpcd_init(); -#endif -#ifdef CONFIG_AZTCD - aztcd_init(); -#endif -#ifdef CONFIG_CDU535 - sony535_init(); -#endif -#ifdef CONFIG_GSCD - gscd_init(); -#endif -#ifdef CONFIG_CM206 - cm206_init(); -#endif -#ifdef CONFIG_OPTCD - optcd_init(); -#endif -#ifdef CONFIG_SJCD - sjcd_init(); -#endif -#ifdef CONFIG_APBLOCK - ap_init(); -#endif -#ifdef CONFIG_DDV - ddv_init(); -#endif -#ifdef CONFIG_MDISK - mdisk_init(); -#endif -#ifdef CONFIG_DASD - dasd_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) - tapeblock_init(); -#endif -#ifdef CONFIG_BLK_DEV_XPRAM - xpram_init(); -#endif - -#ifdef CONFIG_SUN_JSFLASH - jsfd_init(); -#endif - -#if defined(CONFIG_XEN_BLKDEV_FRONTEND) - xlblk_init(); -#endif - - return 0; -}; - -EXPORT_SYMBOL(io_request_lock); -EXPORT_SYMBOL(end_that_request_first); -EXPORT_SYMBOL(end_that_request_last); -EXPORT_SYMBOL(blk_grow_request_list); -EXPORT_SYMBOL(blk_init_queue); -EXPORT_SYMBOL(blk_get_queue); -EXPORT_SYMBOL(blk_cleanup_queue); -EXPORT_SYMBOL(blk_queue_headactive); -EXPORT_SYMBOL(blk_queue_throttle_sectors); -EXPORT_SYMBOL(blk_queue_make_request); -EXPORT_SYMBOL(generic_make_request); -EXPORT_SYMBOL(blkdev_release_request); -EXPORT_SYMBOL(generic_unplug_device); -EXPORT_SYMBOL(blk_queue_bounce_limit); -EXPORT_SYMBOL(blk_max_low_pfn); -EXPORT_SYMBOL(blk_max_pfn); -EXPORT_SYMBOL(blk_seg_merge_ok); -EXPORT_SYMBOL(blk_nohighio); diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/drivers/char/Makefile --- a/linux-2.4-xen-sparse/drivers/char/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,361 +0,0 @@ -# -# Makefile for the kernel character device drivers. -# -# Note! Dependencies are done automagically by 'make dep', which also -# removes any old dependencies. DON'T put your own dependencies here -# unless it's something special (ie not a .c file). -# -# Note 2! The CFLAGS definitions are now inherited from the -# parent makes.. -# - -# -# This file contains the font map for the default (hardware) font -# -FONTMAPFILE = cp437.uni - -O_TARGET := char.o - -obj-y += mem.o tty_io.o n_tty.o tty_ioctl.o raw.o pty.o misc.o random.o - -# All of the (potential) objects that export symbols. -# This list comes from 'grep -l EXPORT_SYMBOL *.[hc]'. - -export-objs := busmouse.o console.o keyboard.o sysrq.o \ - misc.o pty.o random.o selection.o serial.o \ - sonypi.o tty_io.o tty_ioctl.o generic_serial.o \ - au1000_gpio.o vac-serial.o hp_psaux.o nvram.o \ - scx200.o fetchop.o - -mod-subdirs := joystick ftape drm drm-4.0 pcmcia - -list-multi := - -KEYMAP =defkeymap.o -KEYBD =pc_keyb.o -CONSOLE =console.o -SERIAL =serial.o - -ifeq ($(ARCH),xen) - ifneq ($(CONFIG_XEN_PHYSDEV_ACCESS),y) - KEYBD = - endif -endif - -ifeq ($(ARCH),s390) - KEYMAP = - KEYBD = - CONSOLE = - SERIAL = -endif - -ifeq ($(ARCH),mips) - ifneq ($(CONFIG_PC_KEYB),y) - KEYBD = - endif - ifeq ($(CONFIG_VR41XX_KIU),y) - KEYMAP = - KEYBD = vr41xx_keyb.o - endif -endif - -ifeq ($(ARCH),s390x) - KEYMAP = - KEYBD = - CONSOLE = - SERIAL = -endif - -ifeq ($(ARCH),m68k) - ifdef CONFIG_AMIGA - KEYBD = amikeyb.o - else - ifndef CONFIG_MAC - KEYBD = - endif - endif - SERIAL = -endif - -ifeq ($(ARCH),parisc) - ifdef CONFIG_GSC_PS2 - KEYBD = hp_psaux.o hp_keyb.o - else - KEYBD = - endif - ifdef CONFIG_SERIAL_MUX - CONSOLE += mux.o - endif - ifdef CONFIG_PDC_CONSOLE - CONSOLE += pdc_console.o - endif -endif - -ifdef CONFIG_Q40 - KEYBD += q40_keyb.o - SERIAL = serial.o -endif - -ifdef CONFIG_APOLLO - KEYBD += dn_keyb.o -endif - -ifeq ($(ARCH),parisc) - ifdef CONFIG_GSC_PS2 - KEYBD = hp_psaux.o hp_keyb.o - else - KEYBD = - endif - ifdef CONFIG_PDC_CONSOLE - CONSOLE += pdc_console.o - endif -endif - -ifeq ($(ARCH),arm) - ifneq ($(CONFIG_PC_KEYMAP),y) - KEYMAP = - endif - ifneq ($(CONFIG_PC_KEYB),y) - KEYBD = - endif -endif - -ifeq ($(ARCH),sh) - KEYMAP = - KEYBD = - CONSOLE = - ifeq ($(CONFIG_SH_HP600),y) - KEYMAP = defkeymap.o - KEYBD = scan_keyb.o hp600_keyb.o - CONSOLE = console.o - endif - ifeq ($(CONFIG_SH_DMIDA),y) - # DMIDA does not connect the HD64465 PS/2 keyboard port - # but we allow for USB keyboards to be plugged in. - KEYMAP = defkeymap.o - KEYBD = # hd64465_keyb.o pc_keyb.o - CONSOLE = console.o - endif - ifeq ($(CONFIG_SH_EC3104),y) - KEYMAP = defkeymap.o - KEYBD = ec3104_keyb.o - CONSOLE = console.o - endif - ifeq ($(CONFIG_SH_DREAMCAST),y) - KEYMAP = defkeymap.o - KEYBD = - CONSOLE = console.o - endif -endif - -ifeq ($(CONFIG_DECSTATION),y) - KEYMAP = - KEYBD = -endif - -ifeq ($(CONFIG_BAGET_MIPS),y) - KEYBD = - SERIAL = vac-serial.o -endif - -ifeq ($(CONFIG_NINO),y) - SERIAL = -endif - -ifneq ($(CONFIG_SUN_SERIAL),) - SERIAL = -endif - -ifeq ($(CONFIG_QTRONIX_KEYBOARD),y) - KEYBD = qtronix.o - KEYMAP = qtronixmap.o -endif - -ifeq ($(CONFIG_DUMMY_KEYB),y) - KEYBD = dummy_keyb.o -endif - -obj-$(CONFIG_VT) += vt.o vc_screen.o consolemap.o consolemap_deftbl.o $(CONSOLE) selection.o -obj-$(CONFIG_SERIAL) += $(SERIAL) -obj-$(CONFIG_PARPORT_SERIAL) += parport_serial.o -obj-$(CONFIG_SERIAL_HCDP) += hcdp_serial.o -obj-$(CONFIG_SERIAL_21285) += serial_21285.o -obj-$(CONFIG_SERIAL_SA1100) += serial_sa1100.o -obj-$(CONFIG_SERIAL_AMBA) += serial_amba.o -obj-$(CONFIG_TS_AU1X00_ADS7846) += au1000_ts.o -obj-$(CONFIG_SERIAL_DEC) += decserial.o - -ifndef CONFIG_SUN_KEYBOARD - obj-$(CONFIG_VT) += keyboard.o $(KEYMAP) $(KEYBD) -else - obj-$(CONFIG_PCI) += keyboard.o $(KEYMAP) -endif - -obj-$(CONFIG_HIL) += hp_keyb.o -obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o -obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o -obj-$(CONFIG_ROCKETPORT) += rocket.o -obj-$(CONFIG_MOXA_SMARTIO) += mxser.o -obj-$(CONFIG_MOXA_INTELLIO) += moxa.o -obj-$(CONFIG_DIGI) += pcxx.o -obj-$(CONFIG_DIGIEPCA) += epca.o -obj-$(CONFIG_CYCLADES) += cyclades.o -obj-$(CONFIG_STALLION) += stallion.o -obj-$(CONFIG_ISTALLION) += istallion.o -obj-$(CONFIG_SIBYTE_SB1250_DUART) += sb1250_duart.o -obj-$(CONFIG_COMPUTONE) += ip2.o ip2main.o -obj-$(CONFIG_RISCOM8) += riscom8.o -obj-$(CONFIG_ISI) += isicom.o -obj-$(CONFIG_ESPSERIAL) += esp.o -obj-$(CONFIG_SYNCLINK) += synclink.o -obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o -obj-$(CONFIG_N_HDLC) += n_hdlc.o -obj-$(CONFIG_SPECIALIX) += specialix.o -obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o -obj-$(CONFIG_A2232) += ser_a2232.o generic_serial.o -obj-$(CONFIG_SX) += sx.o generic_serial.o -obj-$(CONFIG_RIO) += rio/rio.o generic_serial.o -obj-$(CONFIG_SH_SCI) += sh-sci.o generic_serial.o -obj-$(CONFIG_SERIAL167) += serial167.o -obj-$(CONFIG_MVME147_SCC) += generic_serial.o vme_scc.o -obj-$(CONFIG_MVME162_SCC) += generic_serial.o vme_scc.o -obj-$(CONFIG_BVME6000_SCC) += generic_serial.o vme_scc.o -obj-$(CONFIG_HVC_CONSOLE) += hvc_console.o -obj-$(CONFIG_SERIAL_TX3912) += generic_serial.o serial_tx3912.o -obj-$(CONFIG_TXX927_SERIAL) += serial_txx927.o -obj-$(CONFIG_SERIAL_TXX9) += generic_serial.o serial_txx9.o -obj-$(CONFIG_IP22_SERIAL) += sgiserial.o -obj-$(CONFIG_AU1X00_UART) += au1x00-serial.o -obj-$(CONFIG_SGI_L1_SERIAL) += sn_serial.o - -subdir-$(CONFIG_RIO) += rio -subdir-$(CONFIG_INPUT) += joystick - -obj-$(CONFIG_ATIXL_BUSMOUSE) += atixlmouse.o -obj-$(CONFIG_LOGIBUSMOUSE) += logibusmouse.o -obj-$(CONFIG_PRINTER) += lp.o -obj-$(CONFIG_TIPAR) += tipar.o -obj-$(CONFIG_OBMOUSE) += obmouse.o - -ifeq ($(CONFIG_INPUT),y) -obj-y += joystick/js.o -endif - -obj-$(CONFIG_FETCHOP) += fetchop.o -obj-$(CONFIG_BUSMOUSE) += busmouse.o -obj-$(CONFIG_DTLK) += dtlk.o -obj-$(CONFIG_R3964) += n_r3964.o -obj-$(CONFIG_APPLICOM) += applicom.o -obj-$(CONFIG_SONYPI) += sonypi.o -obj-$(CONFIG_MS_BUSMOUSE) += msbusmouse.o -obj-$(CONFIG_82C710_MOUSE) += qpmouse.o -obj-$(CONFIG_AMIGAMOUSE) += amigamouse.o -obj-$(CONFIG_ATARIMOUSE) += atarimouse.o -obj-$(CONFIG_ADBMOUSE) += adbmouse.o -obj-$(CONFIG_PC110_PAD) += pc110pad.o -obj-$(CONFIG_MK712_MOUSE) += mk712.o -obj-$(CONFIG_RTC) += rtc.o -obj-$(CONFIG_GEN_RTC) += genrtc.o -obj-$(CONFIG_EFI_RTC) += efirtc.o -obj-$(CONFIG_SGI_DS1286) += ds1286.o -obj-$(CONFIG_MIPS_RTC) += mips_rtc.o -obj-$(CONFIG_SGI_IP27_RTC) += ip27-rtc.o -ifeq ($(CONFIG_PPC),) - obj-$(CONFIG_NVRAM) += nvram.o -endif -obj-$(CONFIG_TOSHIBA) += toshiba.o -obj-$(CONFIG_I8K) += i8k.o -obj-$(CONFIG_DS1620) += ds1620.o -obj-$(CONFIG_DS1742) += ds1742.o -obj-$(CONFIG_INTEL_RNG) += i810_rng.o -obj-$(CONFIG_AMD_RNG) += amd768_rng.o -obj-$(CONFIG_HW_RANDOM) += hw_random.o -obj-$(CONFIG_AMD_PM768) += amd76x_pm.o -obj-$(CONFIG_BRIQ_PANEL) += briq_panel.o - -obj-$(CONFIG_ITE_GPIO) += ite_gpio.o -obj-$(CONFIG_AU1X00_GPIO) += au1000_gpio.o -obj-$(CONFIG_AU1X00_USB_TTY) += au1000_usbtty.o -obj-$(CONFIG_AU1X00_USB_RAW) += au1000_usbraw.o -obj-$(CONFIG_COBALT_LCD) += lcd.o - -obj-$(CONFIG_QIC02_TAPE) += tpqic02.o - -subdir-$(CONFIG_FTAPE) += ftape -subdir-$(CONFIG_DRM_OLD) += drm-4.0 -subdir-$(CONFIG_DRM_NEW) += drm -subdir-$(CONFIG_PCMCIA) += pcmcia -subdir-$(CONFIG_AGP) += agp - -ifeq ($(CONFIG_FTAPE),y) -obj-y += ftape/ftape.o -endif - -obj-$(CONFIG_H8) += h8.o -obj-$(CONFIG_PPDEV) += ppdev.o -obj-$(CONFIG_DZ) += dz.o -obj-$(CONFIG_NWBUTTON) += nwbutton.o -obj-$(CONFIG_NWFLASH) += nwflash.o -obj-$(CONFIG_SCx200) += scx200.o -obj-$(CONFIG_SCx200_GPIO) += scx200_gpio.o - -# Only one watchdog can succeed. We probe the hardware watchdog -# drivers first, then the softdog driver. This means if your hardware -# watchdog dies or is 'borrowed' for some reason the software watchdog -# still gives you some cover. - -obj-$(CONFIG_PCWATCHDOG) += pcwd.o -obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o -obj-$(CONFIG_ADVANTECH_WDT) += advantechwdt.o -obj-$(CONFIG_IB700_WDT) += ib700wdt.o -obj-$(CONFIG_MIXCOMWD) += mixcomwd.o -obj-$(CONFIG_60XX_WDT) += sbc60xxwdt.o -obj-$(CONFIG_W83877F_WDT) += w83877f_wdt.o -obj-$(CONFIG_SC520_WDT) += sc520_wdt.o -obj-$(CONFIG_WDT) += wdt.o -obj-$(CONFIG_WDTPCI) += wdt_pci.o -obj-$(CONFIG_21285_WATCHDOG) += wdt285.o -obj-$(CONFIG_977_WATCHDOG) += wdt977.o -obj-$(CONFIG_I810_TCO) += i810-tco.o -obj-$(CONFIG_MACHZ_WDT) += machzwd.o -obj-$(CONFIG_SH_WDT) += shwdt.o -obj-$(CONFIG_EUROTECH_WDT) += eurotechwdt.o -obj-$(CONFIG_ALIM7101_WDT) += alim7101_wdt.o -obj-$(CONFIG_ALIM1535_WDT) += alim1535d_wdt.o -obj-$(CONFIG_INDYDOG) += indydog.o -obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o -obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o -obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o -obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o -obj-$(CONFIG_INDYDOG) += indydog.o -obj-$(CONFIG_8xx_WDT) += mpc8xx_wdt.o - -subdir-$(CONFIG_MWAVE) += mwave -ifeq ($(CONFIG_MWAVE),y) - obj-y += mwave/mwave.o -endif - -subdir-$(CONFIG_IPMI_HANDLER) += ipmi -ifeq ($(CONFIG_IPMI_HANDLER),y) - obj-y += ipmi/ipmi.o -endif - -include $(TOPDIR)/Rules.make - -fastdep: - -conmakehash: conmakehash.c - $(HOSTCC) $(HOSTCFLAGS) -o conmakehash conmakehash.c - -consolemap_deftbl.c: $(FONTMAPFILE) conmakehash - ./conmakehash $(FONTMAPFILE) > consolemap_deftbl.c - -consolemap_deftbl.o: consolemap_deftbl.c $(TOPDIR)/include/linux/types.h - -.DELETE_ON_ERROR: - -defkeymap.c: defkeymap.map - set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@ - -qtronixmap.c: qtronixmap.map - set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/drivers/char/mem.c --- a/linux-2.4-xen-sparse/drivers/char/mem.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,812 +0,0 @@ -/* - * linux/drivers/char/mem.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Added devfs support. - * Jan-11-1998, C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx> - * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@xxxxxxx> - * - * MODIFIED FOR XEN by Keir Fraser, 10th July 2003. - * Linux running on Xen has strange semantics for /dev/mem and /dev/kmem!! - * 1. mmap will not work on /dev/kmem - * 2. mmap on /dev/mem interprets the 'file offset' as a machine address - * rather than a physical address. - * I don't believe anyone sane mmaps /dev/kmem, but /dev/mem is mmapped - * to get at memory-mapped I/O spaces (eg. the VESA X server does this). - * For this to work at all we need to expect machine addresses. - * Reading/writing of /dev/kmem expects kernel virtual addresses, as usual. - * Reading/writing of /dev/mem expects 'physical addresses' as usual -- this - * is because /dev/mem can only read/write existing kernel mappings, which - * will be normal RAM, and we should present pseudo-physical layout for all - * except I/O (which is the sticky case that mmap is hacked to deal with). - */ - -#include <linux/config.h> -#include <linux/mm.h> -#include <linux/miscdevice.h> -#include <linux/tpqic02.h> -#include <linux/ftape.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/mman.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/raw.h> -#include <linux/tty.h> -#include <linux/capability.h> -#include <linux/ptrace.h> - -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/pgalloc.h> - -#ifdef CONFIG_I2C -extern int i2c_init_all(void); -#endif -#ifdef CONFIG_FB -extern void fbmem_init(void); -#endif -#ifdef CONFIG_PROM_CONSOLE -extern void prom_con_init(void); -#endif -#ifdef CONFIG_MDA_CONSOLE -extern void mda_console_init(void); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR) -extern void tapechar_init(void); -#endif - -static ssize_t do_write_mem(struct file * file, void *p, unsigned long realp, - const char * buf, size_t count, loff_t *ppos) -{ - ssize_t written; - - written = 0; -#if defined(__sparc__) || defined(__mc68000__) - /* we don't have page 0 mapped on sparc and m68k.. */ - if (realp < PAGE_SIZE) { - unsigned long sz = PAGE_SIZE-realp; - if (sz > count) sz = count; - /* Hmm. Do something? */ - buf+=sz; - p+=sz; - count-=sz; - written+=sz; - } -#endif - if (copy_from_user(p, buf, count)) - return -EFAULT; - written += count; - *ppos = realp + written; - return written; -} - - -/* - * This funcion reads the *physical* memory. The f_pos points directly to the - * memory location. - */ -static ssize_t read_mem(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - unsigned long end_mem; - ssize_t read; - - end_mem = __pa(high_memory); - if (p >= end_mem) - return 0; - if (count > end_mem - p) - count = end_mem - p; - read = 0; -#if defined(__sparc__) || defined(__mc68000__) - /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE) { - unsigned long sz = PAGE_SIZE-p; - if (sz > count) - sz = count; - if (sz > 0) { - if (clear_user(buf, sz)) - return -EFAULT; - buf += sz; - p += sz; - count -= sz; - read += sz; - } - } -#endif - if (copy_to_user(buf, __va(p), count)) - return -EFAULT; - read += count; - *ppos = p + read; - return read; -} - -static ssize_t write_mem(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - unsigned long end_mem; - - end_mem = __pa(high_memory); - if (p >= end_mem) - return 0; - if (count > end_mem - p) - count = end_mem - p; - return do_write_mem(file, __va(p), p, buf, count, ppos); -} - -#ifndef pgprot_noncached - -/* - * This should probably be per-architecture in <asm/pgtable.h> - */ -static inline pgprot_t pgprot_noncached(pgprot_t _prot) -{ - unsigned long prot = pgprot_val(_prot); - -#if defined(__i386__) || defined(__x86_64__) - /* On PPro and successors, PCD alone doesn't always mean - uncached because of interactions with the MTRRs. PCD | PWT - means definitely uncached. */ - if (boot_cpu_data.x86 > 3) - prot |= _PAGE_PCD | _PAGE_PWT; -#elif defined(__powerpc__) - prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; -#elif defined(__mc68000__) -#ifdef SUN3_PAGE_NOCACHE - if (MMU_IS_SUN3) - prot |= SUN3_PAGE_NOCACHE; - else -#endif - if (MMU_IS_851 || MMU_IS_030) - prot |= _PAGE_NOCACHE030; - /* Use no-cache mode, serialized */ - else if (MMU_IS_040 || MMU_IS_060) - prot = (prot & _CACHEMASK040) | _PAGE_NOCACHE_S; -#endif - - return __pgprot(prot); -} - -#endif /* !pgprot_noncached */ - -/* - * Architectures vary in how they handle caching for addresses - * outside of main memory. - */ -static inline int noncached_address(unsigned long addr) -{ -#if defined(__i386__) - /* - * On the PPro and successors, the MTRRs are used to set - * memory types for physical addresses outside main memory, - * so blindly setting PCD or PWT on those pages is wrong. - * For Pentiums and earlier, the surround logic should disable - * caching for the high addresses through the KEN pin, but - * we maintain the tradition of paranoia in this code. - */ - return !( test_bit(X86_FEATURE_MTRR, &boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_K6_MTRR, &boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CYRIX_ARR, &boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) ) - && addr >= __pa(high_memory); -#else - return addr >= __pa(high_memory); -#endif -} - -#if !defined(CONFIG_XEN) -static int mmap_mem(struct file * file, struct vm_area_struct * vma) -{ - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - - /* - * Accessing memory above the top the kernel knows about or - * through a file pointer that was marked O_SYNC will be - * done non-cached. - */ - if (noncached_address(offset) || (file->f_flags & O_SYNC)) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - /* Don't try to swap out physical pages.. */ - vma->vm_flags |= VM_RESERVED; - - /* - * Don't dump addresses that are not real memory to a core file. - */ - if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC)) - vma->vm_flags |= VM_IO; - - if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - return 0; -} -#elif !defined(CONFIG_XEN_PRIVILEGED_GUEST) -static int mmap_mem(struct file * file, struct vm_area_struct * vma) -{ - return -ENXIO; -} -#else -static int mmap_mem(struct file * file, struct vm_area_struct * vma) -{ - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - - if (!(xen_start_info.flags & SIF_PRIVILEGED)) - return -ENXIO; - - /* DONTCOPY is essential for Xen as copy_page_range is broken. */ - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, - vma->vm_end-vma->vm_start, vma->vm_page_prot, - DOMID_IO)) - return -EAGAIN; - return 0; -} -#endif /* CONFIG_XEN */ - -/* - * This function reads the *virtual* memory as seen by the kernel. - */ -static ssize_t read_kmem(struct file *file, char *buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - ssize_t read = 0; - ssize_t virtr = 0; - char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ - - if (p < (unsigned long) high_memory) { - read = count; - if (count > (unsigned long) high_memory - p) - read = (unsigned long) high_memory - p; - -#if defined(__sparc__) || defined(__mc68000__) - /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE && read > 0) { - size_t tmp = PAGE_SIZE - p; - if (tmp > read) tmp = read; - if (clear_user(buf, tmp)) - return -EFAULT; - buf += tmp; - p += tmp; - read -= tmp; - count -= tmp; - } -#endif - if (copy_to_user(buf, (char *)p, read)) - return -EFAULT; - p += read; - buf += read; - count -= read; - } - - if (count > 0) { - kbuf = (char *)__get_free_page(GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - while (count > 0) { - int len = count; - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - len = vread(kbuf, (char *)p, len); - if (!len) - break; - if (copy_to_user(buf, kbuf, len)) { - free_page((unsigned long)kbuf); - return -EFAULT; - } - count -= len; - buf += len; - virtr += len; - p += len; - } - free_page((unsigned long)kbuf); - } - *ppos = p; - return virtr + read; -} - -extern long vwrite(char *buf, char *addr, unsigned long count); - -/* - * This function writes to the *virtual* memory as seen by the kernel. - */ -static ssize_t write_kmem(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - ssize_t wrote = 0; - ssize_t virtr = 0; - char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ - - if (p < (unsigned long) high_memory) { - wrote = count; - if (count > (unsigned long) high_memory - p) - wrote = (unsigned long) high_memory - p; - - wrote = do_write_mem(file, (void*)p, p, buf, wrote, ppos); - - p += wrote; - buf += wrote; - count -= wrote; - } - - if (count > 0) { - kbuf = (char *)__get_free_page(GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - while (count > 0) { - int len = count; - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - if (len && copy_from_user(kbuf, buf, len)) { - free_page((unsigned long)kbuf); - return -EFAULT; - } - len = vwrite(kbuf, (char *)p, len); - count -= len; - buf += len; - virtr += len; - p += len; - } - free_page((unsigned long)kbuf); - } - - *ppos = p; - return virtr + wrote; -} - -#if defined(CONFIG_ISA) || !defined(__mc68000__) -static ssize_t read_port(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - unsigned long i = *ppos; - char *tmp = buf; - - if (verify_area(VERIFY_WRITE,buf,count)) - return -EFAULT; - while (count-- > 0 && i < 65536) { - if (__put_user(inb(i),tmp) < 0) - return -EFAULT; - i++; - tmp++; - } - *ppos = i; - return tmp-buf; -} - -static ssize_t write_port(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - unsigned long i = *ppos; - const char * tmp = buf; - - if (verify_area(VERIFY_READ,buf,count)) - return -EFAULT; - while (count-- > 0 && i < 65536) { - char c; - if (__get_user(c, tmp)) - return -EFAULT; - outb(c,i); - i++; - tmp++; - } - *ppos = i; - return tmp-buf; -} -#endif - -static ssize_t read_null(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - return 0; -} - -static ssize_t write_null(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - return count; -} - -/* - * For fun, we are using the MMU for this. - */ -static inline size_t read_zero_pagealigned(char * buf, size_t size) -{ - struct mm_struct *mm; - struct vm_area_struct * vma; - unsigned long addr=(unsigned long)buf; - - mm = current->mm; - /* Oops, this was forgotten before. -ben */ - down_read(&mm->mmap_sem); - - /* For private mappings, just map in zero pages. */ - for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { - unsigned long count; - - if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0) - goto out_up; - if (vma->vm_flags & VM_SHARED) - break; - count = vma->vm_end - addr; - if (count > size) - count = size; - - zap_page_range(mm, addr, count); - zeromap_page_range(addr, count, PAGE_COPY); - - size -= count; - buf += count; - addr += count; - if (size == 0) - goto out_up; - } - - up_read(&mm->mmap_sem); - - /* The shared case is hard. Let's do the conventional zeroing. */ - do { - unsigned long unwritten = clear_user(buf, PAGE_SIZE); - if (unwritten) - return size + unwritten - PAGE_SIZE; - if (current->need_resched) - schedule(); - buf += PAGE_SIZE; - size -= PAGE_SIZE; - } while (size); - - return size; -out_up: - up_read(&mm->mmap_sem); - return size; -} - -static ssize_t read_zero(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - unsigned long left, unwritten, written = 0; - - if (!count) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - - left = count; - - /* do we want to be clever? Arbitrary cut-off */ - if (count >= PAGE_SIZE*4) { - unsigned long partial; - - /* How much left of the page? */ - partial = (PAGE_SIZE-1) & -(unsigned long) buf; - unwritten = clear_user(buf, partial); - written = partial - unwritten; - if (unwritten) - goto out; - left -= partial; - buf += partial; - unwritten = read_zero_pagealigned(buf, left & PAGE_MASK); - written += (left & PAGE_MASK) - unwritten; - if (unwritten) - goto out; - buf += left & PAGE_MASK; - left &= ~PAGE_MASK; - } - unwritten = clear_user(buf, left); - written += left - unwritten; -out: - return written ? written : -EFAULT; -} - -static int mmap_zero(struct file * file, struct vm_area_struct * vma) -{ - if (vma->vm_flags & VM_SHARED) - return shmem_zero_setup(vma); - if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return -EAGAIN; - return 0; -} - -static ssize_t write_full(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - return -ENOSPC; -} - -/* - * Special lseek() function for /dev/null and /dev/zero. Most notably, you - * can fopen() both devices with "a" now. This was previously impossible. - * -- SRB. - */ - -static loff_t null_lseek(struct file * file, loff_t offset, int orig) -{ - return file->f_pos = 0; -} - -/* - * The memory devices use the full 32/64 bits of the offset, and so we cannot - * check against negative addresses: they are ok. The return value is weird, - * though, in that case (0). - * - * also note that seeking relative to the "end of file" isn't supported: - * it has no meaning, so it returns -EINVAL. - */ -static loff_t memory_lseek(struct file * file, loff_t offset, int orig) -{ - loff_t ret; - - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - force_successful_syscall_return(); - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - force_successful_syscall_return(); - break; - default: - ret = -EINVAL; - } - return ret; -} - -static int open_port(struct inode * inode, struct file * filp) -{ - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write) -{ - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long kaddr; - pgd_t *pgd; - pmd_t *pmd; - pte_t *ptep, pte; - struct page *page = NULL; - - /* address is user VA; convert to kernel VA of desired page */ - kaddr = (address - vma->vm_start) + offset; - kaddr = VMALLOC_VMADDR(kaddr); - - spin_lock(&init_mm.page_table_lock); - - /* Lookup page structure for kernel VA */ - pgd = pgd_offset(&init_mm, kaddr); - if (pgd_none(*pgd) || pgd_bad(*pgd)) - goto out; - pmd = pmd_offset(pgd, kaddr); - if (pmd_none(*pmd) || pmd_bad(*pmd)) - goto out; - ptep = pte_offset(pmd, kaddr); - if (!ptep) - goto out; - pte = *ptep; - if (!pte_present(pte)) - goto out; - if (write && !pte_write(pte)) - goto out; - page = pte_page(pte); - if (!VALID_PAGE(page)) { - page = NULL; - goto out; - } - - /* Increment reference count on page */ - get_page(page); - -out: - spin_unlock(&init_mm.page_table_lock); - - return page; -} - -struct vm_operations_struct kmem_vm_ops = { - nopage: kmem_vm_nopage, -}; - -static int mmap_kmem(struct file * file, struct vm_area_struct * vma) -{ - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long size = vma->vm_end - vma->vm_start; - - /* - * If the user is not attempting to mmap a high memory address then - * the standard mmap_mem mechanism will work. High memory addresses - * need special handling, as remap_page_range expects a physically- - * contiguous range of kernel addresses (such as obtained in kmalloc). - */ - if ((offset + size) < (unsigned long) high_memory) - return mmap_mem(file, vma); - - /* - * Accessing memory above the top the kernel knows about or - * through a file pointer that was marked O_SYNC will be - * done non-cached. - */ - if (noncached_address(offset) || (file->f_flags & O_SYNC)) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - /* Don't do anything here; "nopage" will fill the holes */ - vma->vm_ops = &kmem_vm_ops; - - /* Don't try to swap out physical pages.. */ - vma->vm_flags |= VM_RESERVED; - - /* - * Don't dump addresses that are not real memory to a core file. - */ - vma->vm_flags |= VM_IO; - - return 0; -} - -#define zero_lseek null_lseek -#define full_lseek null_lseek -#define write_zero write_null -#define read_full read_zero -#define open_mem open_port -#define open_kmem open_mem - -static struct file_operations mem_fops = { - llseek: memory_lseek, - read: read_mem, - write: write_mem, - mmap: mmap_mem, - open: open_mem, -}; - -static struct file_operations kmem_fops = { - llseek: memory_lseek, - read: read_kmem, - write: write_kmem, -#if !defined(CONFIG_XEN) - mmap: mmap_kmem, -#endif - open: open_kmem, -}; - -static struct file_operations null_fops = { - llseek: null_lseek, - read: read_null, - write: write_null, -}; - -#if defined(CONFIG_ISA) || !defined(__mc68000__) -static struct file_operations port_fops = { - llseek: memory_lseek, - read: read_port, - write: write_port, - open: open_port, -}; -#endif - -static struct file_operations zero_fops = { - llseek: zero_lseek, - read: read_zero, - write: write_zero, - mmap: mmap_zero, -}; - -static struct file_operations full_fops = { - llseek: full_lseek, - read: read_full, - write: write_full, -}; - -static int memory_open(struct inode * inode, struct file * filp) -{ - switch (MINOR(inode->i_rdev)) { - case 1: - filp->f_op = &mem_fops; - break; - case 2: - filp->f_op = &kmem_fops; - break; - case 3: - filp->f_op = &null_fops; - break; -#if defined(CONFIG_ISA) || !defined(__mc68000__) - case 4: - filp->f_op = &port_fops; - break; -#endif - case 5: - filp->f_op = &zero_fops; - break; - case 7: - filp->f_op = &full_fops; - break; - case 8: - filp->f_op = &random_fops; - break; - case 9: - filp->f_op = &urandom_fops; - break; - default: - return -ENXIO; - } - if (filp->f_op && filp->f_op->open) - return filp->f_op->open(inode,filp); - return 0; -} - -void __init memory_devfs_register (void) -{ - /* These are never unregistered */ - static const struct { - unsigned short minor; - char *name; - umode_t mode; - struct file_operations *fops; - } list[] = { /* list of minor devices */ - {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops}, - {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops}, - {3, "null", S_IRUGO | S_IWUGO, &null_fops}, -#if defined(CONFIG_ISA) || !defined(__mc68000__) - {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops}, -#endif - {5, "zero", S_IRUGO | S_IWUGO, &zero_fops}, - {7, "full", S_IRUGO | S_IWUGO, &full_fops}, - {8, "random", S_IRUGO | S_IWUSR, &random_fops}, - {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops} - }; - int i; - - for (i=0; i<(sizeof(list)/sizeof(*list)); i++) - devfs_register (NULL, list[i].name, DEVFS_FL_NONE, - MEM_MAJOR, list[i].minor, - list[i].mode | S_IFCHR, - list[i].fops, NULL); -} - -static struct file_operations memory_fops = { - open: memory_open, /* just a selector for the real open */ -}; - -int __init chr_dev_init(void) -{ - if (devfs_register_chrdev(MEM_MAJOR,"mem",&memory_fops)) - printk("unable to get major %d for memory devs\n", MEM_MAJOR); - memory_devfs_register(); - rand_initialize(); -#ifdef CONFIG_I2C - i2c_init_all(); -#endif -#if defined (CONFIG_FB) - fbmem_init(); -#endif -#if defined (CONFIG_PROM_CONSOLE) - prom_con_init(); -#endif -#if defined (CONFIG_MDA_CONSOLE) - mda_console_init(); -#endif - tty_init(); -#ifdef CONFIG_M68K_PRINTER - lp_m68k_init(); -#endif - misc_init(); -#if CONFIG_QIC02_TAPE - qic02_tape_init(); -#endif -#ifdef CONFIG_FTAPE - ftape_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR) - tapechar_init(); -#endif - return 0; -} - -__initcall(chr_dev_init); diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/drivers/char/tty_io.c --- a/linux-2.4-xen-sparse/drivers/char/tty_io.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,2891 +0,0 @@ -/* - * linux/drivers/char/tty_io.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles - * or rs-channels. It also implements echoing, cooked mode etc. - * - * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0. - * - * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the - * tty_struct and tty_queue structures. Previously there was an array - * of 256 tty_struct's which was statically allocated, and the - * tty_queue structures were allocated at boot time. Both are now - * dynamically allocated only when the tty is open. - * - * Also restructured routines so that there is more of a separation - * between the high-level tty routines (tty_io.c and tty_ioctl.c) and - * the low-level tty routines (serial.c, pty.c, console.c). This - * makes for cleaner and more compact code. -TYT, 9/17/92 - * - * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines - * which can be dynamically activated and de-activated by the line - * discipline handling modules (like SLIP). - * - * NOTE: pay no attention to the line discipline code (yet); its - * interface is still subject to change in this version... - * -- TYT, 1/31/92 - * - * Added functionality to the OPOST tty handling. No delays, but all - * other bits should be there. - * -- Nick Holloway <alfie@xxxxxxxxxxxxxxxxx>, 27th May 1993. - * - * Rewrote canonical mode and added more termios flags. - * -- julian@xxxxxxxxxxxxxxxxxxxxxx (J. Cowley), 13Jan94 - * - * Reorganized FASYNC support so mouse code can share it. - * -- ctm@xxxxxxxx, 9Sep95 - * - * New TIOCLINUX variants added. - * -- mj@xxxxxxxxxxxxxxxxx, 19-Nov-95 - * - * Restrict vt switching via ioctl() - * -- grif@xxxxxxxxxx, 5-Dec-95 - * - * Move console and virtual terminal code to more appropriate files, - * implement CONFIG_VT and generalize console device interface. - * -- Marko Kohtala <Marko.Kohtala@xxxxxx>, March 97 - * - * Rewrote init_dev and release_dev to eliminate races. - * -- Bill Hawes <whawes@xxxxxxxx>, June 97 - * - * Added devfs support. - * -- C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, 13-Jan-1998 - * - * Added support for a Unix98-style ptmx device. - * -- C. Scott Ananian <cananian@xxxxxxxxxxxxxxxxxxxx>, 14-Jan-1998 - * - * Reduced memory usage for older ARM systems - * -- Russell King <rmk@xxxxxxxxxxxxxxxx> - * - * Move do_SAK() into process context. Less stack use in devfs functions. - * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@xxxxxxxxxx> 17Mar01 - */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/major.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/fcntl.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/tty.h> -#include <linux/tty_driver.h> -#include <linux/tty_flip.h> -#include <linux/devpts_fs.h> -#include <linux/file.h> -#include <linux/console.h> -#include <linux/timer.h> -#include <linux/ctype.h> -#include <linux/kd.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/poll.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/smp_lock.h> - -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/bitops.h> - -#include <linux/kbd_kern.h> -#include <linux/vt_kern.h> -#include <linux/selection.h> -#include <linux/devfs_fs_kernel.h> - -#include <linux/kmod.h> - -#ifdef CONFIG_XEN_CONSOLE -extern void xen_console_init(void); -#endif - -#ifdef CONFIG_VT -extern void con_init_devfs (void); -#endif - -extern void disable_early_printk(void); - -#define CONSOLE_DEV MKDEV(TTY_MAJOR,0) -#define TTY_DEV MKDEV(TTYAUX_MAJOR,0) -#define SYSCONS_DEV MKDEV(TTYAUX_MAJOR,1) -#define PTMX_DEV MKDEV(TTYAUX_MAJOR,2) - -#undef TTY_DEBUG_HANGUP - -#define TTY_PARANOIA_CHECK 1 -#define CHECK_TTY_COUNT 1 - -struct termios tty_std_termios; /* for the benefit of tty drivers */ -struct tty_driver *tty_drivers; /* linked list of tty drivers */ - -#ifdef CONFIG_UNIX98_PTYS -extern struct tty_driver ptm_driver[]; /* Unix98 pty masters; for /dev/ptmx */ -extern struct tty_driver pts_driver[]; /* Unix98 pty slaves; for /dev/ptmx */ -#endif - -static void initialize_tty_struct(struct tty_struct *tty); - -static ssize_t tty_read(struct file *, char *, size_t, loff_t *); -static ssize_t tty_write(struct file *, const char *, size_t, loff_t *); -static unsigned int tty_poll(struct file *, poll_table *); -static int tty_open(struct inode *, struct file *); -static int tty_release(struct inode *, struct file *); -int tty_ioctl(struct inode * inode, struct file * file, - unsigned int cmd, unsigned long arg); -static int tty_fasync(int fd, struct file * filp, int on); -extern int vme_scc_init (void); -extern long vme_scc_console_init(void); -extern int serial167_init(void); -extern long serial167_console_init(void); -extern void console_8xx_init(void); -extern void au1x00_serial_console_init(void); -extern int rs_8xx_init(void); -extern void mac_scc_console_init(void); -extern void hwc_console_init(void); -extern void hwc_tty_init(void); -extern void con3215_init(void); -extern void tty3215_init(void); -extern void tub3270_con_init(void); -extern void tub3270_init(void); -extern void rs285_console_init(void); -extern void sa1100_rs_console_init(void); -extern void sgi_serial_console_init(void); -extern void sn_sal_serial_console_init(void); -extern void sci_console_init(void); -extern void dec_serial_console_init(void); -extern void tx3912_console_init(void); -extern void tx3912_rs_init(void); -extern void txx927_console_init(void); -extern void txx9_rs_init(void); -extern void txx9_serial_console_init(void); -extern void sb1250_serial_console_init(void); -extern void arc_console_init(void); -extern int hvc_console_init(void); - -#ifndef MIN -#define MIN(a,b) ((a) < (b) ? (a) : (b)) -#endif -#ifndef MAX -#define MAX(a,b) ((a) < (b) ? (b) : (a)) -#endif - -static struct tty_struct *alloc_tty_struct(void) -{ - struct tty_struct *tty; - - tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL); - if (tty) - memset(tty, 0, sizeof(struct tty_struct)); - return tty; -} - -static inline void free_tty_struct(struct tty_struct *tty) -{ - kfree(tty); -} - -/* - * This routine returns the name of tty. - */ -static char * -_tty_make_name(struct tty_struct *tty, const char *name, char *buf) -{ - int idx = (tty)?MINOR(tty->device) - tty->driver.minor_start:0; - - if (!tty) /* Hmm. NULL pointer. That's fun. */ - strcpy(buf, "NULL tty"); - else - sprintf(buf, name, - idx + tty->driver.name_base); - - return buf; -} - -#define TTY_NUMBER(tty) (MINOR((tty)->device) - (tty)->driver.minor_start + \ - (tty)->driver.name_base) - -char *tty_name(struct tty_struct *tty, char *buf) -{ - return _tty_make_name(tty, (tty)?tty->driver.name:NULL, buf); -} - -inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device, - const char *routine) -{ -#ifdef TTY_PARANOIA_CHECK - static const char badmagic[] = KERN_WARNING - "Warning: bad magic number for tty struct (%s) in %s\n"; - static const char badtty[] = KERN_WARNING - "Warning: null TTY for (%s) in %s\n"; - - if (!tty) { - printk(badtty, kdevname(device), routine); - return 1; - } - if (tty->magic != TTY_MAGIC) { - printk(badmagic, kdevname(device), routine); - return 1; - } -#endif - return 0; -} - -static int check_tty_count(struct tty_struct *tty, const char *routine) -{ -#ifdef CHECK_TTY_COUNT - struct list_head *p; - int count = 0; - - file_list_lock(); - for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) { - if(list_entry(p, struct file, f_list)->private_data == tty) - count++; - } - file_list_unlock(); - if (tty->driver.type == TTY_DRIVER_TYPE_PTY && - tty->driver.subtype == PTY_TYPE_SLAVE && - tty->link && tty->link->count) - count++; - if (tty->count != count) { - printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) " - "!= #fd's(%d) in %s\n", - kdevname(tty->device), tty->count, count, routine); - return count; - } -#endif - return 0; -} - -/* - * This is probably overkill for real world processors but - * they are not on hot paths so a little discipline won't do - * any harm. - */ - -static void tty_set_termios_ldisc(struct tty_struct *tty, int num) -{ - down(&tty->termios_sem); - tty->termios->c_line = num; - up(&tty->termios_sem); -} - -/* - * This guards the refcounted line discipline lists. The lock - * must be taken with irqs off because there are hangup path - * callers who will do ldisc lookups and cannot sleep. - */ - -spinlock_t tty_ldisc_lock = SPIN_LOCK_UNLOCKED; -DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait); -struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table */ - -int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc) -{ - - unsigned long flags; - int ret = 0; - - if (disc < N_TTY || disc >= NR_LDISCS) - return -EINVAL; - - spin_lock_irqsave(&tty_ldisc_lock, flags); - if (new_ldisc) { - tty_ldiscs[disc] = *new_ldisc; - tty_ldiscs[disc].num = disc; - tty_ldiscs[disc].flags |= LDISC_FLAG_DEFINED; - tty_ldiscs[disc].refcount = 0; - } else { - if(tty_ldiscs[disc].refcount) - ret = -EBUSY; - else - tty_ldiscs[disc].flags &= ~LDISC_FLAG_DEFINED; - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - - return ret; - -} - - -EXPORT_SYMBOL(tty_register_ldisc); - -struct tty_ldisc *tty_ldisc_get(int disc) -{ - unsigned long flags; - struct tty_ldisc *ld; - - if (disc < N_TTY || disc >= NR_LDISCS) - return NULL; - - spin_lock_irqsave(&tty_ldisc_lock, flags); - - ld = &tty_ldiscs[disc]; - /* Check the entry is defined */ - if(ld->flags & LDISC_FLAG_DEFINED) - ld->refcount++; - else - ld = NULL; - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - return ld; -} - -EXPORT_SYMBOL_GPL(tty_ldisc_get); - -void tty_ldisc_put(int disc) -{ - struct tty_ldisc *ld; - unsigned long flags; - - if (disc < N_TTY || disc >= NR_LDISCS) - BUG(); - - spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = &tty_ldiscs[disc]; - if(ld->refcount <= 0) - BUG(); - ld->refcount--; - spin_unlock_irqrestore(&tty_ldisc_lock, flags); -} - -EXPORT_SYMBOL_GPL(tty_ldisc_put); - -void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) -{ - tty->ldisc = *ld; - tty->ldisc.refcount = 0; -} - -/** - * tty_ldisc_try - internal helper - * @tty: the tty - * - * Make a single attempt to grab and bump the refcount on - * the tty ldisc. Return 0 on failure or 1 on success. This is - * used to implement both the waiting and non waiting versions - * of tty_ldisc_ref - */ - -static int tty_ldisc_try(struct tty_struct *tty) -{ - unsigned long flags; - struct tty_ldisc *ld; - int ret = 0; - - spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = &tty->ldisc; - if(test_bit(TTY_LDISC, &tty->flags)) - { - ld->refcount++; - ret = 1; - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - return ret; -} - -/** - * tty_ldisc_ref_wait - wait for the tty ldisc - * @tty: tty device - * - * Dereference the line discipline for the terminal and take a - * reference to it. If the line discipline is in flux then - * wait patiently until it changes. - * - * Note: Must not be called from an IRQ/timer context. The caller - * must also be careful not to hold other locks that will deadlock - * against a discipline change, such as an existing ldisc reference - * (which we check for) - */ - -struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) -{ - /* wait_event is a macro */ - wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); - return &tty->ldisc; -} - -EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); - -/** - * tty_ldisc_ref - get the tty ldisc - * @tty: tty device - * - * Dereference the line discipline for the terminal and take a - * reference to it. If the line discipline is in flux then - * return NULL. Can be called from IRQ and timer functions. - */ - -struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) -{ - if(tty_ldisc_try(tty)) - return &tty->ldisc; - return NULL; -} - -EXPORT_SYMBOL_GPL(tty_ldisc_ref); - - -void tty_ldisc_deref(struct tty_ldisc *ld) -{ - - unsigned long flags; - - if(ld == NULL) - BUG(); - - spin_lock_irqsave(&tty_ldisc_lock, flags); - if(ld->refcount == 0) - printk(KERN_EMERG "tty_ldisc_deref: no references.\n"); - else - ld->refcount--; - if(ld->refcount == 0) - wake_up(&tty_ldisc_wait); - spin_unlock_irqrestore(&tty_ldisc_lock, flags); -} - -EXPORT_SYMBOL_GPL(tty_ldisc_deref); - -/** - * tty_ldisc_enable - allow ldisc use - * @tty: terminal to activate ldisc on - * - * Set the TTY_LDISC flag when the line discipline can be called - * again. Do neccessary wakeups for existing sleepers. - * - * Note: nobody should set this bit except via this function. Clearing - * directly is allowed. - */ - -static void tty_ldisc_enable(struct tty_struct *tty) -{ - set_bit(TTY_LDISC, &tty->flags); - wake_up(&tty_ldisc_wait); -} - -/** - * tty_set_ldisc - set line discipline - * @tty: the terminal to set - * @ldisc: the line discipline - * - * Set the discipline of a tty line. Must be called from a process - * context. - */ - -static int tty_set_ldisc(struct tty_struct *tty, int ldisc) -{ - int retval = 0; - struct tty_ldisc o_ldisc; - char buf[64]; - unsigned long flags; - struct tty_ldisc *ld; - - if ((ldisc < N_TTY) || (ldisc >= NR_LDISCS)) - return -EINVAL; - -restart: - - if (tty->ldisc.num == ldisc) - return 0; /* We are already in the desired discipline */ - - ld = tty_ldisc_get(ldisc); - /* Eduardo Blanco <ejbs@xxxxxxxxxxxx> */ - /* Cyrus Durgin <cider@xxxxxxxxxxxxx> */ - if (ld == NULL) - { - char modname [20]; - sprintf(modname, "tty-ldisc-%d", ldisc); - request_module (modname); - ld = tty_ldisc_get(ldisc); - } - - if (ld == NULL) - return -EINVAL; - - - o_ldisc = tty->ldisc; - tty_wait_until_sent(tty, 0); - - /* - * Make sure we don't change while someone holds a - * reference to the line discipline. The TTY_LDISC bit - * prevents anyone taking a reference once it is clear. - * We need the lock to avoid racing reference takers. - */ - - spin_lock_irqsave(&tty_ldisc_lock, flags); - if(tty->ldisc.refcount) - { - /* Free the new ldisc we grabbed. Must drop the lock - first. */ - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(ldisc); - /* - * There are several reasons we may be busy, including - * random momentary I/O traffic. We must therefore - * retry. We could distinguish between blocking ops - * and retries if we made tty_ldisc_wait() smarter. That - * is up for discussion. - */ - if(wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0) - return -ERESTARTSYS; - goto restart; - } - clear_bit(TTY_LDISC, &tty->flags); - clear_bit(TTY_DONT_FLIP, &tty->flags); - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - - /* - * From this point on we know nobody has an ldisc - * usage reference, nor can they obtain one until - * we say so later on. - */ - - /* - * Wait for ->hangup_work and ->flip.work handlers to terminate - */ - run_task_queue(&tq_timer); - flush_scheduled_tasks(); - - /* Shutdown the current discipline. */ - if (tty->ldisc.close) - (tty->ldisc.close)(tty); - - /* Now set up the new line discipline. */ - tty_ldisc_assign(tty, ld); - tty_set_termios_ldisc(tty, ldisc); - if (tty->ldisc.open) - retval = (tty->ldisc.open)(tty); - if (retval < 0) { - tty_ldisc_put(ldisc); - /* There is an outstanding reference here so this is safe */ - tty_ldisc_assign(tty, tty_ldisc_get(o_ldisc.num)); - tty_set_termios_ldisc(tty, tty->ldisc.num); - if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) { - tty_ldisc_put(o_ldisc.num); - /* This driver is always present */ - tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); - tty_set_termios_ldisc(tty, N_TTY); - if (tty->ldisc.open) { - int r = tty->ldisc.open(tty); - - if (r < 0) - panic("Couldn't open N_TTY ldisc for " - "%s --- error %d.", - tty_name(tty, buf), r); - } - } - } - /* At this point we hold a reference to the new ldisc and a - reference to the old ldisc. If we ended up flipping back - to the existing ldisc we have two references to it */ - - if (tty->ldisc.num != o_ldisc.num && tty->driver.set_ldisc) - tty->driver.set_ldisc(tty); - - tty_ldisc_put(o_ldisc.num); - - /* - * Allow ldisc referencing to occur as soon as the driver - * ldisc callback completes. - */ - tty_ldisc_enable(tty); - - return retval; -} - -/* - * This routine returns a tty driver structure, given a device number - */ -struct tty_driver *get_tty_driver(kdev_t device) -{ - int major, minor; - struct tty_driver *p; - - minor = MINOR(device); - major = MAJOR(device); - - for (p = tty_drivers; p; p = p->next) { - if (p->major != major) - continue; - if (minor < p->minor_start) - continue; - if (minor >= p->minor_start + p->num) - continue; - return p; - } - return NULL; -} - -/* - * If we try to write to, or set the state of, a terminal and we're - * not in the foreground, send a SIGTTOU. If the signal is blocked or - * ignored, go ahead and perform the operation. (POSIX 7.2) - */ -int tty_check_change(struct tty_struct * tty) -{ - if (current->tty != tty) - return 0; - if (tty->pgrp <= 0) { - printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n"); - return 0; - } - if (current->pgrp == tty->pgrp) - return 0; - if (is_ignored(SIGTTOU)) - return 0; - if (is_orphaned_pgrp(current->pgrp)) - return -EIO; - (void) kill_pg(current->pgrp,SIGTTOU,1); - return -ERESTARTSYS; -} - -static ssize_t hung_up_tty_read(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - /* Can't seek (pread) on ttys. */ - if (ppos != &file->f_pos) - return -ESPIPE; - return 0; -} - -static ssize_t hung_up_tty_write(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - /* Can't seek (pwrite) on ttys. */ - if (ppos != &file->f_pos) - return -ESPIPE; - return -EIO; -} - -/* No kernel lock held - none needed ;) */ -static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait) -{ - return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM; -} - -static int hung_up_tty_ioctl(struct inode * inode, struct file * file, - unsigned int cmd, unsigned long arg) -{ - return cmd == TIOCSPGRP ? -ENOTTY : -EIO; -} - -static struct file_operations tty_fops = { - llseek: no_llseek, - read: tty_read, - write: tty_write, - poll: tty_poll, - ioctl: tty_ioctl, - open: tty_open, - release: tty_release, - fasync: tty_fasync, -}; - -static struct file_operations hung_up_tty_fops = { - llseek: no_llseek, - read: hung_up_tty_read, - write: hung_up_tty_write, - poll: hung_up_tty_poll, - ioctl: hung_up_tty_ioctl, - release: tty_release, -}; - -static spinlock_t redirect_lock = SPIN_LOCK_UNLOCKED; -static struct file *redirect; - -/** - * tty_wakeup - request more data - * @tty: terminal - * - * Internal and external helper for wakeups of tty. This function - * informs the line discipline if present that the driver is ready\ - * to receive more output data. - */ - -void tty_wakeup(struct tty_struct *tty) -{ - struct tty_ldisc *ld; - - if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) { - ld = tty_ldisc_ref(tty); - if(ld) { - if(ld->write_wakeup) - ld->write_wakeup(tty); - tty_ldisc_deref(ld); - } - } - wake_up_interruptible(&tty->write_wait); -} - -/* - * tty_wakeup/tty_ldisc_flush are actually _GPL exports but we can't do - * that in 2.4 for modutils compat reasons. - */ -EXPORT_SYMBOL(tty_wakeup); - - -void tty_ldisc_flush(struct tty_struct *tty) -{ - struct tty_ldisc *ld = tty_ldisc_ref(tty); - if(ld) { - if(ld->flush_buffer) - ld->flush_buffer(tty); - tty_ldisc_deref(ld); - } -} - - -/* - * tty_wakeup/tty_ldisc_flush are actually _GPL exports but we can't do - * that in 2.4 for modutils compat reasons. - */ -EXPORT_SYMBOL(tty_ldisc_flush); - -void do_tty_hangup(void *data) -{ - struct tty_struct *tty = (struct tty_struct *) data; - struct file * cons_filp = NULL; - struct file *f = NULL; - struct task_struct *p; - struct list_head *l; - struct tty_ldisc *ld; - int closecount = 0, n; - - if (!tty) - return; - - /* inuse_filps is protected by the single kernel lock */ - lock_kernel(); - - spin_lock(&redirect_lock); - if (redirect && redirect->private_data == tty) { - f = redirect; - redirect = NULL; - } - spin_unlock(&redirect_lock); - - check_tty_count(tty, "do_tty_hangup"); - file_list_lock(); - for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) { - struct file * filp = list_entry(l, struct file, f_list); - if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV || - filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) { - cons_filp = filp; - continue; - } - if (filp->f_op != &tty_fops) - continue; - closecount++; - tty_fasync(-1, filp, 0); /* can't block */ - filp->f_op = &hung_up_tty_fops; - } - file_list_unlock(); - - /* FIXME! What are the locking issues here? This may me overdoing things.. */ - ld = tty_ldisc_ref(tty); - if(ld != NULL) - { - if (ld->flush_buffer) - ld->flush_buffer(tty); - if (tty->driver.flush_buffer) - tty->driver.flush_buffer(tty); - if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && ld->write_wakeup) - ld->write_wakeup(tty); - if (ld->hangup) - ld->hangup(tty); - } - - /* FIXME: Once we trust the LDISC code better we can wait here for - ldisc completion and fix the driver call race */ - - wake_up_interruptible(&tty->write_wait); - wake_up_interruptible(&tty->read_wait); - - /* - * Shutdown the current line discipline, and reset it to - * N_TTY. - */ - - if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) - { - down(&tty->termios_sem); - *tty->termios = tty->driver.init_termios; - up(&tty->termios_sem); - } - - /* Defer ldisc switch */ - /* tty_deferred_ldisc_switch(N_TTY) - This should get done automatically when the port closes and - tty_release is called */ - - read_lock(&tasklist_lock); - for_each_task(p) { - if ((tty->session > 0) && (p->session == tty->session) && - p->leader) { - send_sig(SIGHUP,p,1); - send_sig(SIGCONT,p,1); - if (tty->pgrp > 0) - p->tty_old_pgrp = tty->pgrp; - } - if (p->tty == tty) - p->tty = NULL; - } - read_unlock(&tasklist_lock); - - tty->flags = 0; - tty->session = 0; - tty->pgrp = -1; - tty->ctrl_status = 0; - /* - * If one of the devices matches a console pointer, we - * cannot just call hangup() because that will cause - * tty->count and state->count to go out of sync. - * So we just call close() the right number of times. - */ - if (cons_filp) { - if (tty->driver.close) - for (n = 0; n < closecount; n++) - tty->driver.close(tty, cons_filp); - } else if (tty->driver.hangup) - (tty->driver.hangup)(tty); - - /* We don't want to have driver/ldisc interactions beyond - the ones we did here. The driver layer expects no - calls after ->hangup() from the ldisc side. However we - can't yet guarantee all that */ - - set_bit(TTY_HUPPED, &tty->flags); - if(ld) { - tty_ldisc_enable(tty); - tty_ldisc_deref(ld); - } - unlock_kernel(); - if (f) - fput(f); -} - -void tty_hangup(struct tty_struct * tty) -{ -#ifdef TTY_DEBUG_HANGUP - char buf[64]; - - printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf)); -#endif - schedule_task(&tty->tq_hangup); -} - -void tty_vhangup(struct tty_struct * tty) -{ -#ifdef TTY_DEBUG_HANGUP - char buf[64]; - - printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf)); -#endif - do_tty_hangup((void *) tty); -} - -int tty_hung_up_p(struct file * filp) -{ - return (filp->f_op == &hung_up_tty_fops); -} - -/* - * This function is typically called only by the session leader, when - * it wants to disassociate itself from its controlling tty. - * - * It performs the following functions: - * (1) Sends a SIGHUP and SIGCONT to the foreground process group - * (2) Clears the tty from being controlling the session - * (3) Clears the controlling tty for all processes in the - * session group. - * - * The argument on_exit is set to 1 if called when a process is - * exiting; it is 0 if called by the ioctl TIOCNOTTY. - */ -void disassociate_ctty(int on_exit) -{ - struct tty_struct *tty = current->tty; - struct task_struct *p; - int tty_pgrp = -1; - - if (tty) { - tty_pgrp = tty->pgrp; - if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY) - tty_vhangup(tty); - } else { - if (current->tty_old_pgrp) { - kill_pg(current->tty_old_pgrp, SIGHUP, on_exit); - kill_pg(current->tty_old_pgrp, SIGCONT, on_exit); - } - return; - } - if (tty_pgrp > 0) { - kill_pg(tty_pgrp, SIGHUP, on_exit); - if (!on_exit) - kill_pg(tty_pgrp, SIGCONT, on_exit); - } - - current->tty_old_pgrp = 0; - tty->session = 0; - tty->pgrp = -1; - - read_lock(&tasklist_lock); - for_each_task(p) - if (p->session == current->session) - p->tty = NULL; - read_unlock(&tasklist_lock); -} - -void stop_tty(struct tty_struct *tty) -{ - if (tty->stopped) - return; - tty->stopped = 1; - if (tty->link && tty->link->packet) { - tty->ctrl_status &= ~TIOCPKT_START; - tty->ctrl_status |= TIOCPKT_STOP; - wake_up_interruptible(&tty->link->read_wait); - } - if (tty->driver.stop) - (tty->driver.stop)(tty); -} - -void start_tty(struct tty_struct *tty) -{ - if (!tty->stopped || tty->flow_stopped) - return; - tty->stopped = 0; - if (tty->link && tty->link->packet) { - tty->ctrl_status &= ~TIOCPKT_STOP; - tty->ctrl_status |= TIOCPKT_START; - wake_up_interruptible(&tty->link->read_wait); - } - if (tty->driver.start) - (tty->driver.start)(tty); - /* If we have a running line discipline it may need kicking */ - tty_wakeup(tty); -} - -static ssize_t tty_read(struct file * file, char * buf, size_t count, - loff_t *ppos) -{ - int i; - struct tty_struct * tty; - struct inode *inode; - struct tty_ldisc *ld; - - /* Can't seek (pread) on ttys. */ - if (ppos != &file->f_pos) - return -ESPIPE; - - tty = (struct tty_struct *)file->private_data; - inode = file->f_dentry->d_inode; - if (tty_paranoia_check(tty, inode->i_rdev, "tty_read")) - return -EIO; - if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) - return -EIO; - - /* This check not only needs to be done before reading, but also - whenever read_chan() gets woken up after sleeping, so I've - moved it to there. This should only be done for the N_TTY - line discipline, anyway. Same goes for write_chan(). -- jlc. */ -#if 0 - if ((inode->i_rdev != CONSOLE_DEV) && /* don't stop on /dev/console */ - (tty->pgrp > 0) && - (current->tty == tty) && - (tty->pgrp != current->pgrp)) - if (is_ignored(SIGTTIN) || is_orphaned_pgrp(current->pgrp)) - return -EIO; - else { - (void) kill_pg(current->pgrp, SIGTTIN, 1); - return -ERESTARTSYS; - } -#endif - /* We want to wait for the line discipline to sort out in this - situation */ - ld = tty_ldisc_ref_wait(tty); - lock_kernel(); - if (ld->read) - i = (ld->read)(tty,file,buf,count); - else - i = -EIO; - tty_ldisc_deref(ld); - unlock_kernel(); - if (i > 0) - inode->i_atime = CURRENT_TIME; - return i; -} - -/* - * Split writes up in sane blocksizes to avoid - * denial-of-service type attacks - */ -static inline ssize_t do_tty_write( - ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t), - struct tty_struct *tty, - struct file *file, - const unsigned char *buf, - size_t count) -{ - ssize_t ret = 0, written = 0; - - if (file->f_flags & O_NONBLOCK) { - if (down_trylock(&tty->atomic_write)) - return -EAGAIN; - } - else { - if (down_interruptible(&tty->atomic_write)) - return -ERESTARTSYS; - } - if ( test_bit(TTY_NO_WRITE_SPLIT, &tty->flags) ) { - lock_kernel(); - written = write(tty, file, buf, count); - unlock_kernel(); - } else { - for (;;) { - unsigned long size = MAX(PAGE_SIZE*2,16384); - if (size > count) - size = count; - lock_kernel(); - ret = write(tty, file, buf, size); - unlock_kernel(); - if (ret <= 0) - break; - written += ret; - buf += ret; - count -= ret; - if (!count) - break; - ret = -ERESTARTSYS; - if (signal_pending(current)) - break; - if (current->need_resched) - schedule(); - } - } - if (written) { - file->f_dentry->d_inode->i_mtime = CURRENT_TIME; - ret = written; - } - up(&tty->atomic_write); - return ret; -} - - -static ssize_t tty_write(struct file * file, const char * buf, size_t count, - loff_t *ppos) -{ - int is_console; - struct tty_struct * tty; - struct inode *inode = file->f_dentry->d_inode; - ssize_t ret; - struct tty_ldisc *ld; - - /* Can't seek (pwrite) on ttys. */ - if (ppos != &file->f_pos) - return -ESPIPE; - - /* - * For now, we redirect writes from /dev/console as - * well as /dev/tty0. - */ - inode = file->f_dentry->d_inode; - is_console = (inode->i_rdev == SYSCONS_DEV || - inode->i_rdev == CONSOLE_DEV); - - if (is_console) { - struct file *p = NULL; - - spin_lock(&redirect_lock); - if (redirect) { - get_file(redirect); - p = redirect; - } - spin_unlock(&redirect_lock); - - if (p) { - ssize_t res = p->f_op->write(p, buf, count, &p->f_pos); - fput(p); - return res; - } - } - - tty = (struct tty_struct *)file->private_data; - if (tty_paranoia_check(tty, inode->i_rdev, "tty_write")) - return -EIO; - if (!tty || !tty->driver.write || (test_bit(TTY_IO_ERROR, &tty->flags))) - return -EIO; -#if 0 - if (!is_console && L_TOSTOP(tty) && (tty->pgrp > 0) && - (current->tty == tty) && (tty->pgrp != current->pgrp)) { - if (is_orphaned_pgrp(current->pgrp)) - return -EIO; - if (!is_ignored(SIGTTOU)) { - (void) kill_pg(current->pgrp, SIGTTOU, 1); - return -ERESTARTSYS; - } - } -#endif - - ld = tty_ldisc_ref_wait(tty); - if (!ld->write) - ret = -EIO; - else - ret = do_tty_write(ld->write, tty, file, - (const unsigned char __user *)buf, count); - tty_ldisc_deref(ld); - return ret; -} - -/* Semaphore to protect creating and releasing a tty. This is shared with - vt.c for deeply disgusting hack reasons */ -static DECLARE_MUTEX(tty_sem); - -static void down_tty_sem(int index) -{ - down(&tty_sem); -} - -static void up_tty_sem(int index) -{ - up(&tty_sem); -} - -static void release_mem(struct tty_struct *tty, int idx); - -/* - * WSH 06/09/97: Rewritten to remove races and properly clean up after a - * failed open. The new code protects the open with a semaphore, so it's - * really quite straightforward. The semaphore locking can probably be - * relaxed for the (most common) case of reopening a tty. - */ -static int init_dev(kdev_t device, struct tty_struct **ret_tty) -{ - struct tty_struct *tty, *o_tty; - struct termios *tp, **tp_loc, *o_tp, **o_tp_loc; - struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc; - struct tty_driver *driver; - int retval=0; - int idx; - - driver = get_tty_driver(device); - if (!driver) - return -ENODEV; - - idx = MINOR(device) - driver->minor_start; - - /* - * Check whether we need to acquire the tty semaphore to avoid - * race conditions. For now, play it safe. - */ - down_tty_sem(idx); - - /* check whether we're reopening an existing tty */ - tty = driver->table[idx]; - if (tty) goto fast_track; - - /* - * First time open is complex, especially for PTY devices. - * This code guarantees that either everything succeeds and the - * TTY is ready for operation, or else the table slots are vacated - * and the allocated memory released. (Except that the termios - * and locked termios may be retained.) - */ - - o_tty = NULL; - tp = o_tp = NULL; - ltp = o_ltp = NULL; - - tty = alloc_tty_struct(); - if(!tty) - goto fail_no_mem; - initialize_tty_struct(tty); - tty->device = device; - tty->driver = *driver; - - tp_loc = &driver->termios[idx]; - if (!*tp_loc) { - tp = (struct termios *) kmalloc(sizeof(struct termios), - GFP_KERNEL); - if (!tp) - goto free_mem_out; - *tp = driver->init_termios; - } - - ltp_loc = &driver->termios_locked[idx]; - if (!*ltp_loc) { - ltp = (struct termios *) kmalloc(sizeof(struct termios), - GFP_KERNEL); - if (!ltp) - goto free_mem_out; - memset(ltp, 0, sizeof(struct termios)); - } - - if (driver->type == TTY_DRIVER_TYPE_PTY) { - o_tty = alloc_tty_struct(); - if (!o_tty) - goto free_mem_out; - initialize_tty_struct(o_tty); - o_tty->device = (kdev_t) MKDEV(driver->other->major, - driver->other->minor_start + idx); - o_tty->driver = *driver->other; - - o_tp_loc = &driver->other->termios[idx]; - if (!*o_tp_loc) { - o_tp = (struct termios *) - kmalloc(sizeof(struct termios), GFP_KERNEL); - if (!o_tp) - goto free_mem_out; - *o_tp = driver->other->init_termios; - } - - o_ltp_loc = &driver->other->termios_locked[idx]; - if (!*o_ltp_loc) { - o_ltp = (struct termios *) - kmalloc(sizeof(struct termios), GFP_KERNEL); - if (!o_ltp) - goto free_mem_out; - memset(o_ltp, 0, sizeof(struct termios)); - } - - /* - * Everything allocated ... set up the o_tty structure. - */ - driver->other->table[idx] = o_tty; - if (!*o_tp_loc) - *o_tp_loc = o_tp; - if (!*o_ltp_loc) - *o_ltp_loc = o_ltp; - o_tty->termios = *o_tp_loc; - o_tty->termios_locked = *o_ltp_loc; - (*driver->other->refcount)++; - if (driver->subtype == PTY_TYPE_MASTER) - o_tty->count++; - - /* Establish the links in both directions */ - tty->link = o_tty; - o_tty->link = tty; - } - - /* - * All structures have been allocated, so now we install them. - * Failures after this point use release_mem to clean up, so - * there's no need to null out the local pointers. - */ - driver->table[idx] = tty; - - if (!*tp_loc) - *tp_loc = tp; - if (!*ltp_loc) - *ltp_loc = ltp; - tty->termios = *tp_loc; - tty->termios_locked = *ltp_loc; - (*driver->refcount)++; - tty->count++; - - /* - * Structures all installed ... call the ldisc open routines. - * If we fail here just call release_mem to clean up. No need - * to decrement the use counts, as release_mem doesn't care. - */ - if (tty->ldisc.open) { - retval = (tty->ldisc.open)(tty); - if (retval) - goto release_mem_out; - } - if (o_tty && o_tty->ldisc.open) { - retval = (o_tty->ldisc.open)(o_tty); - if (retval) { - if (tty->ldisc.close) - (tty->ldisc.close)(tty); - goto release_mem_out; - } - set_bit(TTY_LDISC, &o_tty->flags); - tty_ldisc_enable(o_tty); - } - tty_ldisc_enable(tty); - goto success; - - /* - * This fast open can be used if the tty is already open. - * No memory is allocated, and the only failures are from - * attempting to open a closing tty or attempting multiple - * opens on a pty master. - */ -fast_track: - if (test_bit(TTY_CLOSING, &tty->flags)) { - retval = -EIO; - goto end_init; - } - if (driver->type == TTY_DRIVER_TYPE_PTY && - driver->subtype == PTY_TYPE_MASTER) { - /* - * special case for PTY masters: only one open permitted, - * and the slave side open count is incremented as well. - */ - if (tty->count) { - retval = -EIO; - goto end_init; - } - tty->link->count++; - } - tty->count++; - tty->driver = *driver; /* N.B. why do this every time?? */ - /* FIXME */ - if(!test_bit(TTY_LDISC, &tty->flags)) - printk(KERN_ERR "init_dev but no ldisc\n"); -success: - *ret_tty = tty; - - /* All paths come through here to release the semaphore */ -end_init: - up_tty_sem(idx); - return retval; - - /* Release locally allocated memory ... nothing placed in slots */ -free_mem_out: - if (o_tp) - kfree(o_tp); - if (o_tty) - free_tty_struct(o_tty); - if (ltp) - kfree(ltp); - if (tp) - kfree(tp); - free_tty_struct(tty); - -fail_no_mem: - retval = -ENOMEM; - goto end_init; - - /* call the tty release_mem routine to clean out this slot */ -release_mem_out: - printk(KERN_INFO "init_dev: ldisc open failed, " - "clearing slot %d\n", idx); - release_mem(tty, idx); - goto end_init; -} - -/* - * Releases memory associated with a tty structure, and clears out the - * driver table slots. - */ -static void release_mem(struct tty_struct *tty, int idx) -{ - struct tty_struct *o_tty; - struct termios *tp; - - if ((o_tty = tty->link) != NULL) { - o_tty->driver.table[idx] = NULL; - if (o_tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) { - tp = o_tty->driver.termios[idx]; - o_tty->driver.termios[idx] = NULL; - kfree(tp); - } - o_tty->magic = 0; - (*o_tty->driver.refcount)--; - list_del_init(&o_tty->tty_files); - free_tty_struct(o_tty); - } - - tty->driver.table[idx] = NULL; - if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) { - tp = tty->driver.termios[idx]; - tty->driver.termios[idx] = NULL; - kfree(tp); - } - tty->magic = 0; - (*tty->driver.refcount)--; - list_del_init(&tty->tty_files); - free_tty_struct(tty); -} - -/* - * Even releasing the tty structures is a tricky business.. We have - * to be very careful that the structures are all released at the - * same time, as interrupts might otherwise get the wrong pointers. - * - * WSH 09/09/97: rewritten to avoid some nasty race conditions that could - * lead to double frees or releasing memory still in use. - */ -static void release_dev(struct file * filp) -{ - struct tty_struct *tty, *o_tty; - int pty_master, tty_closing, o_tty_closing, do_sleep; - int idx; - char buf[64]; - unsigned long flags; - - tty = (struct tty_struct *)filp->private_data; - if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "release_dev")) - return; - - check_tty_count(tty, "release_dev"); - - tty_fasync(-1, filp, 0); - - idx = MINOR(tty->device) - tty->driver.minor_start; - pty_master = (tty->driver.type == TTY_DRIVER_TYPE_PTY && - tty->driver.subtype == PTY_TYPE_MASTER); - o_tty = tty->link; - -#ifdef TTY_PARANOIA_CHECK - if (idx < 0 || idx >= tty->driver.num) { - printk(KERN_DEBUG "release_dev: bad idx when trying to " - "free (%s)\n", kdevname(tty->device)); - return; - } - if (tty != tty->driver.table[idx]) { - printk(KERN_DEBUG "release_dev: driver.table[%d] not tty " - "for (%s)\n", idx, kdevname(tty->device)); - return; - } - if (tty->termios != tty->driver.termios[idx]) { - printk(KERN_DEBUG "release_dev: driver.termios[%d] not termios " - "for (%s)\n", - idx, kdevname(tty->device)); - return; - } - if (tty->termios_locked != tty->driver.termios_locked[idx]) { - printk(KERN_DEBUG "release_dev: driver.termios_locked[%d] not " - "termios_locked for (%s)\n", - idx, kdevname(tty->device)); - return; - } -#endif - -#ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "release_dev of %s (tty count=%d)...", - tty_name(tty, buf), tty->count); -#endif - -#ifdef TTY_PARANOIA_CHECK - if (tty->driver.other) { - if (o_tty != tty->driver.other->table[idx]) { - printk(KERN_DEBUG "release_dev: other->table[%d] " - "not o_tty for (%s)\n", - idx, kdevname(tty->device)); - return; - } - if (o_tty->termios != tty->driver.other->termios[idx]) { - printk(KERN_DEBUG "release_dev: other->termios[%d] " - "not o_termios for (%s)\n", - idx, kdevname(tty->device)); - return; - } - if (o_tty->termios_locked != - tty->driver.other->termios_locked[idx]) { - printk(KERN_DEBUG "release_dev: other->termios_locked[" - "%d] not o_termios_locked for (%s)\n", - idx, kdevname(tty->device)); - return; - } - if (o_tty->link != tty) { - printk(KERN_DEBUG "release_dev: bad pty pointers\n"); - return; - } - } -#endif - - if (tty->driver.close) - tty->driver.close(tty, filp); - - /* - * Sanity check: if tty->count is going to zero, there shouldn't be - * any waiters on tty->read_wait or tty->write_wait. We test the - * wait queues and kick everyone out _before_ actually starting to - * close. This ensures that we won't block while releasing the tty - * structure. - * - * The test for the o_tty closing is necessary, since the master and - * slave sides may close in any order. If the slave side closes out - * first, its count will be one, since the master side holds an open. - * Thus this test wouldn't be triggered at the time the slave closes, - * so we do it now. - * - * Note that it's possible for the tty to be opened again while we're - * flushing out waiters. By recalculating the closing flags before - * each iteration we avoid any problems. - */ - while (1) { - tty_closing = tty->count <= 1; - o_tty_closing = o_tty && - (o_tty->count <= (pty_master ? 1 : 0)); - do_sleep = 0; - - if (tty_closing) { - if (waitqueue_active(&tty->read_wait)) { - wake_up(&tty->read_wait); - do_sleep++; - } - if (waitqueue_active(&tty->write_wait)) { - wake_up(&tty->write_wait); - do_sleep++; - } - } - if (o_tty_closing) { - if (waitqueue_active(&o_tty->read_wait)) { - wake_up(&o_tty->read_wait); - do_sleep++; - } - if (waitqueue_active(&o_tty->write_wait)) { - wake_up(&o_tty->write_wait); - do_sleep++; - } - } - if (!do_sleep) - break; - - printk(KERN_WARNING "release_dev: %s: read/write wait queue " - "active!\n", tty_name(tty, buf)); - schedule(); - } - - /* - * The closing flags are now consistent with the open counts on - * both sides, and we've completed the last operation that could - * block, so it's safe to proceed with closing. - */ - if (pty_master) { - if (--o_tty->count < 0) { - printk(KERN_WARNING "release_dev: bad pty slave count " - "(%d) for %s\n", - o_tty->count, tty_name(o_tty, buf)); - o_tty->count = 0; - } - } - if (--tty->count < 0) { - printk(KERN_WARNING "release_dev: bad tty->count (%d) for %s\n", - tty->count, tty_name(tty, buf)); - tty->count = 0; - } - - /* - * We've decremented tty->count, so we should zero out - * filp->private_data, to break the link between the tty and - * the file descriptor. Otherwise if filp_close() blocks before - * the file descriptor is removed from the inuse_filp - * list, check_tty_count() could observe a discrepancy and - * printk a warning message to the user. - */ - filp->private_data = 0; - - /* - * Perform some housekeeping before deciding whether to return. - * - * Set the TTY_CLOSING flag if this was the last open. In the - * case of a pty we may have to wait around for the other side - * to close, and TTY_CLOSING makes sure we can't be reopened. - */ - if(tty_closing) - set_bit(TTY_CLOSING, &tty->flags); - if(o_tty_closing) - set_bit(TTY_CLOSING, &o_tty->flags); - - /* - * If _either_ side is closing, make sure there aren't any - * processes that still think tty or o_tty is their controlling - * tty. - */ - if (tty_closing || o_tty_closing) { - struct task_struct *p; - - read_lock(&tasklist_lock); - for_each_task(p) { - if (p->tty == tty || (o_tty && p->tty == o_tty)) - p->tty = NULL; - } - read_unlock(&tasklist_lock); - } - - /* check whether both sides are closing ... */ - if (!tty_closing || (o_tty && !o_tty_closing)) - return; - -#ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "freeing tty structure..."); -#endif - - /* - * Prevent flush_to_ldisc() from rescheduling the work for later. Then - * kill any delayed work. As this is the final close it does not - * race with the set_ldisc code path. - */ - clear_bit(TTY_LDISC, &tty->flags); - clear_bit(TTY_DONT_FLIP, &tty->flags); - - /* - * Wait for ->hangup_work and ->flip.work handlers to terminate - */ - - run_task_queue(&tq_timer); - flush_scheduled_tasks(); - - /* - * Wait for any short term users (we know they are just driver - * side waiters as the file is closing so user count on the file - * side is zero. - */ - - spin_lock_irqsave(&tty_ldisc_lock, flags); - while(tty->ldisc.refcount) - { - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); - spin_lock_irqsave(&tty_ldisc_lock, flags); - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - - /* - * Shutdown the current line discipline, and reset it to N_TTY. - * N.B. why reset ldisc when we're releasing the memory?? - * FIXME: this MUST get fixed for the new reflocking - */ - if (tty->ldisc.close) - (tty->ldisc.close)(tty); - tty_ldisc_put(tty->ldisc.num); - - /* - * Switch the line discipline back - */ - tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); - tty_set_termios_ldisc(tty,N_TTY); - - if (o_tty) { - /* FIXME: could o_tty be in setldisc here ? */ - clear_bit(TTY_LDISC, &o_tty->flags); - if (o_tty->ldisc.close) - (o_tty->ldisc.close)(o_tty); - tty_ldisc_put(o_tty->ldisc.num); - tty_ldisc_assign(o_tty, tty_ldisc_get(N_TTY)); - tty_set_termios_ldisc(o_tty,N_TTY); - } - - /* - * The release_mem function takes care of the details of clearing - * the slots and preserving the termios structure. - */ - release_mem(tty, idx); -} - -/* - * tty_open and tty_release keep up the tty count that contains the - * number of opens done on a tty. We cannot use the inode-count, as - * different inodes might point to the same tty. - * - * Open-counting is needed for pty masters, as well as for keeping - * track of serial lines: DTR is dropped when the last close happens. - * (This is not done solely through tty->count, now. - Ted 1/27/92) - * - * The termios state of a pty is reset on first open so that - * settings don't persist across reuse. - */ -static int tty_open(struct inode * inode, struct file * filp) -{ - struct tty_struct *tty; - int noctty, retval; - kdev_t device; - unsigned short saved_flags; - char buf[64]; - - saved_flags = filp->f_flags; -retry_open: - noctty = filp->f_flags & O_NOCTTY; - device = inode->i_rdev; - if (device == TTY_DEV) { - if (!current->tty) - return -ENXIO; - device = current->tty->device; - filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */ - /* noctty = 1; */ - } -#ifdef CONFIG_VT - if (device == CONSOLE_DEV) { - extern int fg_console; - device = MKDEV(TTY_MAJOR, fg_console + 1); - noctty = 1; - } -#endif - if (device == SYSCONS_DEV) { - struct console *c = console_drivers; - while(c && !c->device) - c = c->next; - if (!c) - return -ENODEV; - device = c->device(c); - filp->f_flags |= O_NONBLOCK; /* Don't let /dev/console block */ - noctty = 1; - } - - if (device == PTMX_DEV) { -#ifdef CONFIG_UNIX98_PTYS - - /* find a free pty. */ - int major, minor; - struct tty_driver *driver; - - /* find a device that is not in use. */ - retval = -1; - for ( major = 0 ; major < UNIX98_NR_MAJORS ; major++ ) { - driver = &ptm_driver[major]; - for (minor = driver->minor_start ; - minor < driver->minor_start + driver->num ; - minor++) { - device = MKDEV(driver->major, minor); - if (!init_dev(device, &tty)) goto ptmx_found; /* ok! */ - } - } - return -EIO; /* no free ptys */ - ptmx_found: - set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ - minor -= driver->minor_start; - devpts_pty_new(driver->other->name_base + minor, MKDEV(driver->other->major, minor + driver->other->minor_start)); - tty_register_devfs(&pts_driver[major], DEVFS_FL_DEFAULT, - pts_driver[major].minor_start + minor); - noctty = 1; - goto init_dev_done; - -#else /* CONFIG_UNIX_98_PTYS */ - - return -ENODEV; - -#endif /* CONFIG_UNIX_98_PTYS */ - } - - retval = init_dev(device, &tty); - if (retval) - return retval; - -#ifdef CONFIG_UNIX98_PTYS -init_dev_done: -#endif - filp->private_data = tty; - file_move(filp, &tty->tty_files); - check_tty_count(tty, "tty_open"); - if (tty->driver.type == TTY_DRIVER_TYPE_PTY && - tty->driver.subtype == PTY_TYPE_MASTER) - noctty = 1; -#ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "opening %s...", tty_name(tty, buf)); -#endif - if (tty->driver.open) - retval = tty->driver.open(tty, filp); - else - retval = -ENODEV; - filp->f_flags = saved_flags; - - if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) && !suser()) - retval = -EBUSY; - - if (retval) { -#ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "error %d in opening %s...", retval, - tty_name(tty, buf)); -#endif - - release_dev(filp); - if (retval != -ERESTARTSYS) - return retval; - if (signal_pending(current)) - return retval; - schedule(); - /* - * Need to reset f_op in case a hangup happened. - */ - filp->f_op = &tty_fops; - goto retry_open; - } - if (!noctty && - current->leader && - !current->tty && - tty->session == 0) { - task_lock(current); - current->tty = tty; - task_unlock(current); - current->tty_old_pgrp = 0; - tty->session = current->session; - tty->pgrp = current->pgrp; - } - if ((tty->driver.type == TTY_DRIVER_TYPE_SERIAL) && - (tty->driver.subtype == SERIAL_TYPE_CALLOUT) && - (tty->count == 1)) { - static int nr_warns; - if (nr_warns < 5) { - printk(KERN_WARNING "tty_io.c: " - "process %d (%s) used obsolete /dev/%s - " - "update software to use /dev/ttyS%d\n", - current->pid, current->comm, - tty_name(tty, buf), TTY_NUMBER(tty)); - nr_warns++; - } - } - return 0; -} - -static int tty_release(struct inode * inode, struct file * filp) -{ - lock_kernel(); - release_dev(filp); - unlock_kernel(); - return 0; -} - -/* No kernel lock held - fine */ -static unsigned int tty_poll(struct file * filp, poll_table * wait) -{ - struct tty_struct * tty; - struct tty_ldisc *ld; - int ret = 0; - - tty = (struct tty_struct *)filp->private_data; - if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_poll")) - return 0; - - ld = tty_ldisc_ref_wait(tty); - if (ld->poll) - ret = (ld->poll)(tty, filp, wait); - tty_ldisc_deref(ld); - return ret; -} - -static int tty_fasync(int fd, struct file * filp, int on) -{ - struct tty_struct * tty; - int retval; - - tty = (struct tty_struct *)filp->private_data; - if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_fasync")) - return 0; - - retval = fasync_helper(fd, filp, on, &tty->fasync); - if (retval <= 0) - return retval; - - if (on) { - if (!waitqueue_active(&tty->read_wait)) - tty->minimum_to_wake = 1; - if (filp->f_owner.pid == 0) { - filp->f_owner.pid = (-tty->pgrp) ? : current->pid; - filp->f_owner.uid = current->uid; - filp->f_owner.euid = current->euid; - } - } else { - if (!tty->fasync && !waitqueue_active(&tty->read_wait)) - tty->minimum_to_wake = N_TTY_BUF_SIZE; - } - return 0; -} - -static int tiocsti(struct tty_struct *tty, char * arg) -{ - char ch, mbz = 0; - struct tty_ldisc *ld; - - if ((current->tty != tty) && !suser()) - return -EPERM; - if (get_user(ch, arg)) - return -EFAULT; - ld = tty_ldisc_ref_wait(tty); - ld->receive_buf(tty, &ch, &mbz, 1); - tty_ldisc_deref(ld); - return 0; -} - -static int tiocgwinsz(struct tty_struct *tty, struct winsize * arg) -{ - if (copy_to_user(arg, &tty->winsize, sizeof(*arg))) - return -EFAULT; - return 0; -} - -static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, - struct winsize * arg) -{ - struct winsize tmp_ws; - - if (copy_from_user(&tmp_ws, arg, sizeof(*arg))) - return -EFAULT; - if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg))) - return 0; - if (tty->pgrp > 0) - kill_pg(tty->pgrp, SIGWINCH, 1); - if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0)) - kill_pg(real_tty->pgrp, SIGWINCH, 1); - tty->winsize = tmp_ws; - real_tty->winsize = tmp_ws; - return 0; -} - -static int tioccons(struct inode *inode, struct file *file) -{ - if (inode->i_rdev == SYSCONS_DEV || - inode->i_rdev == CONSOLE_DEV) { - struct file *f; - if (!suser()) - return -EPERM; - spin_lock(&redirect_lock); - f = redirect; - redirect = NULL; - spin_unlock(&redirect_lock); - if (f) - fput(f); - return 0; - } - spin_lock(&redirect_lock); - if (redirect) { - spin_unlock(&redirect_lock); - return -EBUSY; - } - get_file(file); - redirect = file; - spin_unlock(&redirect_lock); - return 0; -} - - -static int fionbio(struct file *file, int *arg) -{ - int nonblock; - - if (get_user(nonblock, arg)) - return -EFAULT; - - if (nonblock) - file->f_flags |= O_NONBLOCK; - else - file->f_flags &= ~O_NONBLOCK; - return 0; -} - -static int tiocsctty(struct tty_struct *tty, int arg) -{ - if (current->leader && - (current->session == tty->session)) - return 0; - /* - * The process must be a session leader and - * not have a controlling tty already. - */ - if (!current->leader || current->tty) - return -EPERM; - if (tty->session > 0) { - /* - * This tty is already the controlling - * tty for another session group! - */ - if ((arg == 1) && suser()) { - /* - * Steal it away - */ - struct task_struct *p; - - read_lock(&tasklist_lock); - for_each_task(p) - if (p->tty == tty) - p->tty = NULL; - read_unlock(&tasklist_lock); - } else - return -EPERM; - } - task_lock(current); - current->tty = tty; - task_unlock(current); - current->tty_old_pgrp = 0; - tty->session = current->session; - tty->pgrp = current->pgrp; - return 0; -} - -static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg) -{ - /* - * (tty == real_tty) is a cheap way of - * testing if the tty is NOT a master pty. - */ - if (tty == real_tty && current->tty != real_tty) - return -ENOTTY; - return put_user(real_tty->pgrp, arg); -} - -static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg) -{ - pid_t pgrp; - int retval = tty_check_change(real_tty); - - if (retval == -EIO) - return -ENOTTY; - if (retval) - return retval; - if (!current->tty || - (current->tty != real_tty) || - (real_tty->session != current->session)) - return -ENOTTY; - if (get_user(pgrp, (pid_t *) arg)) - return -EFAULT; - if (pgrp < 0) - return -EINVAL; - if (session_of_pgrp(pgrp) != current->session) - return -EPERM; - real_tty->pgrp = pgrp; - return 0; -} - -static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg) -{ - /* - * (tty == real_tty) is a cheap way of - * testing if the tty is NOT a master pty. - */ - if (tty == real_tty && current->tty != real_tty) - return -ENOTTY; - if (real_tty->session <= 0) - return -ENOTTY; - return put_user(real_tty->session, arg); -} - -static int tiocttygstruct(struct tty_struct *tty, struct tty_struct *arg) -{ - if (copy_to_user(arg, tty, sizeof(*arg))) - return -EFAULT; - return 0; -} - -static int tiocsetd(struct tty_struct *tty, int *arg) -{ - int ldisc; - - if (get_user(ldisc, arg)) - return -EFAULT; - return tty_set_ldisc(tty, ldisc); -} - -static int send_break(struct tty_struct *tty, int duration) -{ - tty->driver.break_ctl(tty, -1); - if (!signal_pending(current)) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(duration); - } - tty->driver.break_ctl(tty, 0); - if (signal_pending(current)) - return -EINTR; - return 0; -} - -static int tty_generic_brk(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) -{ - if (cmd == TCSBRK && arg) - { - /* tcdrain case */ - int retval = tty_check_change(tty); - if (retval) - return retval; - tty_wait_until_sent(tty, 0); - if (signal_pending(current)) - return -EINTR; - } - return 0; -} - -/* - * Split this up, as gcc can choke on it otherwise.. - */ -int tty_ioctl(struct inode * inode, struct file * file, - unsigned int cmd, unsigned long arg) -{ - struct tty_struct *tty, *real_tty; - int retval; - struct tty_ldisc *ld; - - tty = (struct tty_struct *)file->private_data; - if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl")) - return -EINVAL; - - real_tty = tty; - if (tty->driver.type == TTY_DRIVER_TYPE_PTY && - tty->driver.subtype == PTY_TYPE_MASTER) - real_tty = tty->link; - - /* - * Break handling by driver - */ - if (!tty->driver.break_ctl) { - switch(cmd) { - case TIOCSBRK: - case TIOCCBRK: - if (tty->driver.ioctl) - return tty->driver.ioctl(tty, file, cmd, arg); - return -EINVAL; - - /* These two ioctl's always return success; even if */ - /* the driver doesn't support them. */ - case TCSBRK: - case TCSBRKP: - retval = -ENOIOCTLCMD; - if (tty->driver.ioctl) - retval = tty->driver.ioctl(tty, file, cmd, arg); - /* Not driver handled */ - if (retval == -ENOIOCTLCMD) - retval = tty_generic_brk(tty, file, cmd, arg); - return retval; - } - } - - /* - * Factor out some common prep work - */ - switch (cmd) { - case TIOCSETD: - case TIOCSBRK: - case TIOCCBRK: - case TCSBRK: - case TCSBRKP: - retval = tty_check_change(tty); - if (retval) - return retval; - if (cmd != TIOCCBRK) { - tty_wait_until_sent(tty, 0); - if (signal_pending(current)) - return -EINTR; - } - break; - } - - switch (cmd) { - case TIOCSTI: - return tiocsti(tty, (char *)arg); - case TIOCGWINSZ: - return tiocgwinsz(tty, (struct winsize *) arg); - case TIOCSWINSZ: - return tiocswinsz(tty, real_tty, (struct winsize *) arg); - case TIOCCONS: - return real_tty!=tty ? -EINVAL : tioccons(inode, file); - case FIONBIO: - return fionbio(file, (int *) arg); - case TIOCEXCL: - set_bit(TTY_EXCLUSIVE, &tty->flags); - return 0; - case TIOCNXCL: - clear_bit(TTY_EXCLUSIVE, &tty->flags); - return 0; - case TIOCNOTTY: - if (current->tty != tty) - return -ENOTTY; - if (current->leader) - disassociate_ctty(0); - task_lock(current); - current->tty = NULL; - task_unlock(current); - return 0; - case TIOCSCTTY: - return tiocsctty(tty, arg); - case TIOCGPGRP: - return tiocgpgrp(tty, real_tty, (pid_t *) arg); - case TIOCSPGRP: - return tiocspgrp(tty, real_tty, (pid_t *) arg); - case TIOCGSID: - return tiocgsid(tty, real_tty, (pid_t *) arg); - case TIOCGETD: - /* FIXME: check this is ok */ - return put_user(tty->ldisc.num, (int *) arg); - case TIOCSETD: - return tiocsetd(tty, (int *) arg); -#ifdef CONFIG_VT - case TIOCLINUX: - return tioclinux(tty, arg); -#endif - case TIOCTTYGSTRUCT: - return tiocttygstruct(tty, (struct tty_struct *) arg); - - /* - * Break handling - */ - case TIOCSBRK: /* Turn break on, unconditionally */ - tty->driver.break_ctl(tty, -1); - return 0; - - case TIOCCBRK: /* Turn break off, unconditionally */ - tty->driver.break_ctl(tty, 0); - return 0; - case TCSBRK: /* SVID version: non-zero arg --> no break */ - /* - * XXX is the above comment correct, or the - * code below correct? Is this ioctl used at - * all by anyone? - */ - if (!arg) - return send_break(tty, HZ/4); - return 0; - case TCSBRKP: /* support for POSIX tcsendbreak() */ - return send_break(tty, arg ? arg*(HZ/10) : HZ/4); - } - if (tty->driver.ioctl) { - retval = (tty->driver.ioctl)(tty, file, cmd, arg); - if (retval != -ENOIOCTLCMD) - return retval; - } - ld = tty_ldisc_ref_wait(tty); - retval = -EINVAL; - if (ld->ioctl) { - retval = ld->ioctl(tty, file, cmd, arg); - if (retval == -ENOIOCTLCMD) - retval = -EINVAL; - } - tty_ldisc_deref(ld); - return retval; -} - - -/* - * This implements the "Secure Attention Key" --- the idea is to - * prevent trojan horses by killing all processes associated with this - * tty when the user hits the "Secure Attention Key". Required for - * super-paranoid applications --- see the Orange Book for more details. - * - * This code could be nicer; ideally it should send a HUP, wait a few - * seconds, then send a INT, and then a KILL signal. But you then - * have to coordinate with the init process, since all processes associated - * with the current tty must be dead before the new getty is allowed - * to spawn. - * - * Now, if it would be correct ;-/ The current code has a nasty hole - - * it doesn't catch files in flight. We may send the descriptor to ourselves - * via AF_UNIX socket, close it and later fetch from socket. FIXME. - * - * Nasty bug: do_SAK is being called in interrupt context. This can - * deadlock. We punt it up to process context. AKPM - 16Mar2001 - */ -static void __do_SAK(void *arg) -{ -#ifdef TTY_SOFT_SAK - tty_hangup(tty); -#else - struct tty_struct *tty = arg; - struct task_struct *p; - int session; - int i; - struct file *filp; - struct tty_ldisc *disc; - - if (!tty) - return; - session = tty->session; - /* We don't want an ldisc switch during this */ - disc = tty_ldisc_ref(tty); - if (disc && disc->flush_buffer) - disc->flush_buffer(tty); - tty_ldisc_deref(disc); - - if (tty->driver.flush_buffer) - tty->driver.flush_buffer(tty); - - read_lock(&tasklist_lock); - for_each_task(p) { - if ((p->tty == tty) || - ((session > 0) && (p->session == session))) { - send_sig(SIGKILL, p, 1); - continue; - } - task_lock(p); - if (p->files) { - read_lock(&p->files->file_lock); - for (i=0; i < p->files->max_fds; i++) { - filp = fcheck_files(p->files, i); - if (filp && (filp->f_op == &tty_fops) && - (filp->private_data == tty)) { - send_sig(SIGKILL, p, 1); - break; - } - } - read_unlock(&p->files->file_lock); - } - task_unlock(p); - } - read_unlock(&tasklist_lock); -#endif -} - -/* - * The tq handling here is a little racy - tty->SAK_tq may already be queued. - * But there's no mechanism to fix that without futzing with tqueue_lock. - * Fortunately we don't need to worry, because if ->SAK_tq is already queued, - * the values which we write to it will be identical to the values which it - * already has. --akpm - */ -void do_SAK(struct tty_struct *tty) -{ - if (!tty) - return; - PREPARE_TQUEUE(&tty->SAK_tq, __do_SAK, tty); - schedule_task(&tty->SAK_tq); -} - -/* - * This routine is called out of the software interrupt to flush data - * from the flip buffer to the line discipline. - */ -static void flush_to_ldisc(void *private_) -{ - struct tty_struct *tty = (struct tty_struct *) private_; - unsigned char *cp; - char *fp; - int count; - unsigned long flags; - struct tty_ldisc *disc; - - disc = tty_ldisc_ref(tty); - if (disc == NULL) /* !TTY_LDISC */ - return; - - if (test_bit(TTY_DONT_FLIP, &tty->flags)) { - queue_task(&tty->flip.tqueue, &tq_timer); - goto out; - } - if (tty->flip.buf_num) { - cp = tty->flip.char_buf + TTY_FLIPBUF_SIZE; - fp = tty->flip.flag_buf + TTY_FLIPBUF_SIZE; - tty->flip.buf_num = 0; - - save_flags(flags); cli(); - tty->flip.char_buf_ptr = tty->flip.char_buf; - tty->flip.flag_buf_ptr = tty->flip.flag_buf; - } else { - cp = tty->flip.char_buf; - fp = tty->flip.flag_buf; - tty->flip.buf_num = 1; - - save_flags(flags); cli(); - tty->flip.char_buf_ptr = tty->flip.char_buf + TTY_FLIPBUF_SIZE; - tty->flip.flag_buf_ptr = tty->flip.flag_buf + TTY_FLIPBUF_SIZE; - } - count = tty->flip.count; - tty->flip.count = 0; - restore_flags(flags); - - disc->receive_buf(tty, cp, fp, count); -out: - tty_ldisc_deref(disc); -} - -/* - * Call the ldisc flush directly from a driver. This function may - * return an error and need retrying by the user. - */ - -int tty_push_data(struct tty_struct *tty, unsigned char *cp, unsigned char *fp, int count) -{ - int ret = 0; - struct tty_ldisc *disc; - - disc = tty_ldisc_ref(tty); - if(test_bit(TTY_DONT_FLIP, &tty->flags)) - ret = -EAGAIN; - else if(disc == NULL) - ret = -EIO; - else - disc->receive_buf(tty, cp, fp, count); - tty_ldisc_deref(disc); - return ret; - -} - -/* - * Routine which returns the baud rate of the tty - * - * Note that the baud_table needs to be kept in sync with the - * include/asm/termbits.h file. - */ -static int baud_table[] = { - 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800, - 9600, 19200, 38400, 57600, 115200, 230400, 460800, -#ifdef __sparc__ - 76800, 153600, 307200, 614400, 921600 -#else - 500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000, - 2500000, 3000000, 3500000, 4000000 -#endif -}; - -static int n_baud_table = sizeof(baud_table)/sizeof(int); - -int tty_get_baud_rate(struct tty_struct *tty) -{ - unsigned int cflag, i; - - cflag = tty->termios->c_cflag; - - i = cflag & CBAUD; - if (i & CBAUDEX) { - i &= ~CBAUDEX; - if (i < 1 || i+15 >= n_baud_table) - tty->termios->c_cflag &= ~CBAUDEX; - else - i += 15; - } - if (i==15 && tty->alt_speed) { - if (!tty->warned) { - printk(KERN_WARNING "Use of setserial/setrocket to " - "set SPD_* flags is deprecated\n"); - tty->warned = 1; - } - return(tty->alt_speed); - } - - return baud_table[i]; -} - -void tty_flip_buffer_push(struct tty_struct *tty) -{ - if (tty->low_latency) - flush_to_ldisc((void *) tty); - else - queue_task(&tty->flip.tqueue, &tq_timer); -} - -/* - * This subroutine initializes a tty structure. - */ -static void initialize_tty_struct(struct tty_struct *tty) -{ - memset(tty, 0, sizeof(struct tty_struct)); - tty->magic = TTY_MAGIC; - tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); - tty->pgrp = -1; - tty->flip.char_buf_ptr = tty->flip.char_buf; - tty->flip.flag_buf_ptr = tty->flip.flag_buf; - tty->flip.tqueue.routine = flush_to_ldisc; - tty->flip.tqueue.data = tty; - init_MUTEX(&tty->flip.pty_sem); - init_MUTEX(&tty->termios_sem); - init_waitqueue_head(&tty->write_wait); - init_waitqueue_head(&tty->read_wait); - tty->tq_hangup.routine = do_tty_hangup; - tty->tq_hangup.data = tty; - sema_init(&tty->atomic_read, 1); - sema_init(&tty->atomic_write, 1); - spin_lock_init(&tty->read_lock); - INIT_LIST_HEAD(&tty->tty_files); - INIT_TQUEUE(&tty->SAK_tq, 0, 0); -} - -/* - * The default put_char routine if the driver did not define one. - */ -void tty_default_put_char(struct tty_struct *tty, unsigned char ch) -{ - tty->driver.write(tty, 0, &ch, 1); -} - -/* - * Register a tty device described by <driver>, with minor number <minor>. - */ -void tty_register_devfs (struct tty_driver *driver, unsigned int flags, unsigned minor) -{ -#ifdef CONFIG_DEVFS_FS - umode_t mode = S_IFCHR | S_IRUSR | S_IWUSR; - kdev_t device = MKDEV (driver->major, minor); - int idx = minor - driver->minor_start; - char buf[32]; - - switch (device) { - case TTY_DEV: - case PTMX_DEV: - mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - break; - default: - if (driver->major == PTY_MASTER_MAJOR) - mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - break; - } - if ( (minor < driver->minor_start) || - (minor >= driver->minor_start + driver->num) ) { - printk(KERN_ERR "Attempt to register invalid minor number " - "with devfs (%d:%d).\n", (int)driver->major,(int)minor); - return; - } -# ifdef CONFIG_UNIX98_PTYS - if ( (driver->major >= UNIX98_PTY_SLAVE_MAJOR) && - (driver->major < UNIX98_PTY_SLAVE_MAJOR + UNIX98_NR_MAJORS) ) - flags |= DEVFS_FL_CURRENT_OWNER; -# endif - sprintf(buf, driver->name, idx + driver->name_base); - devfs_register (NULL, buf, flags | DEVFS_FL_DEFAULT, - driver->major, minor, mode, &tty_fops, NULL); -#endif /* CONFIG_DEVFS_FS */ -} - -void tty_unregister_devfs (struct tty_driver *driver, unsigned minor) -{ -#ifdef CONFIG_DEVFS_FS - void * handle; - int idx = minor - driver->minor_start; - char buf[32]; - - sprintf(buf, driver->name, idx + driver->name_base); - handle = devfs_find_handle (NULL, buf, driver->major, minor, - DEVFS_SPECIAL_CHR, 0); - devfs_unregister (handle); -#endif /* CONFIG_DEVFS_FS */ -} - -EXPORT_SYMBOL(tty_register_devfs); -EXPORT_SYMBOL(tty_unregister_devfs); - -/* - * Called by a tty driver to register itself. - */ -int tty_register_driver(struct tty_driver *driver) -{ - int error; - int i; - - if (driver->flags & TTY_DRIVER_INSTALLED) - return 0; - - error = devfs_register_chrdev(driver->major, driver->name, &tty_fops); - if (error < 0) - return error; - else if(driver->major == 0) - driver->major = error; - - if (!driver->put_char) - driver->put_char = tty_default_put_char; - - driver->prev = 0; - driver->next = tty_drivers; - if (tty_drivers) tty_drivers->prev = driver; - tty_drivers = driver; - - if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) { - for(i = 0; i < driver->num; i++) - tty_register_devfs(driver, 0, driver->minor_start + i); - } - proc_tty_register_driver(driver); - return error; -} - -/* - * Called by a tty driver to unregister itself. - */ -int tty_unregister_driver(struct tty_driver *driver) -{ - int retval; - struct tty_driver *p; - int i, found = 0; - struct termios *tp; - const char *othername = NULL; - - if (*driver->refcount) - return -EBUSY; - - for (p = tty_drivers; p; p = p->next) { - if (p == driver) - found++; - else if (p->major == driver->major) - othername = p->name; - } - - if (!found) - return -ENOENT; - - if (othername == NULL) { - retval = devfs_unregister_chrdev(driver->major, driver->name); - if (retval) - return retval; - } else - devfs_register_chrdev(driver->major, othername, &tty_fops); - - if (driver->prev) - driver->prev->next = driver->next; - else - tty_drivers = driver->next; - - if (driver->next) - driver->next->prev = driver->prev; - - /* - * Free the termios and termios_locked structures because - * we don't want to get memory leaks when modular tty - * drivers are removed from the kernel. - */ - for (i = 0; i < driver->num; i++) { - tp = driver->termios[i]; - if (tp) { - driver->termios[i] = NULL; - kfree(tp); - } - tp = driver->termios_locked[i]; - if (tp) { - driver->termios_locked[i] = NULL; - kfree(tp); - } - tty_unregister_devfs(driver, driver->minor_start + i); - } - proc_tty_unregister_driver(driver); - return 0; -} - - -/* - * Initialize the console device. This is called *early*, so - * we can't necessarily depend on lots of kernel help here. - * Just do some early initializations, and do the complex setup - * later. - */ -void __init console_init(void) -{ - /* Setup the default TTY line discipline. */ - memset(tty_ldiscs, 0, NR_LDISCS*sizeof(struct tty_ldisc)); - (void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY); - - /* - * Set up the standard termios. Individual tty drivers may - * deviate from this; this is used as a template. - */ - memset(&tty_std_termios, 0, sizeof(struct termios)); - memcpy(tty_std_termios.c_cc, INIT_C_CC, NCCS); - tty_std_termios.c_iflag = ICRNL | IXON; - tty_std_termios.c_oflag = OPOST | ONLCR; - tty_std_termios.c_cflag = B38400 | CS8 | CREAD | HUPCL; - tty_std_termios.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK | - ECHOCTL | ECHOKE | IEXTEN; - - /* - * set up the console device so that later boot sequences can - * inform about problems etc.. - */ -#ifdef CONFIG_EARLY_PRINTK - disable_early_printk(); -#endif - -#ifdef CONFIG_XEN_CONSOLE - xen_console_init(); -#endif - -#ifdef CONFIG_VT - con_init(); -#endif -#ifdef CONFIG_AU1X00_SERIAL_CONSOLE - au1x00_serial_console_init(); -#endif -#ifdef CONFIG_SERIAL_CONSOLE -#if (defined(CONFIG_8xx) || defined(CONFIG_CPM2)) - console_8xx_init(); -#elif defined(CONFIG_MAC_SERIAL) && defined(CONFIG_SERIAL) - if (_machine == _MACH_Pmac) - mac_scc_console_init(); - else - serial_console_init(); -#elif defined(CONFIG_MAC_SERIAL) - mac_scc_console_init(); -#elif defined(CONFIG_PARISC) - pdc_console_init(); -#elif defined(CONFIG_SERIAL) - serial_console_init(); -#endif /* CONFIG_8xx */ -#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC) - vme_scc_console_init(); -#endif -#if defined(CONFIG_SERIAL167) - serial167_console_init(); -#endif -#if defined(CONFIG_SH_SCI) - sci_console_init(); -#endif -#endif -#ifdef CONFIG_SERIAL_DEC_CONSOLE - dec_serial_console_init(); -#endif -#ifdef CONFIG_TN3270_CONSOLE - tub3270_con_init(); -#endif -#ifdef CONFIG_TN3215 - con3215_init(); -#endif -#ifdef CONFIG_HWC - hwc_console_init(); -#endif -#ifdef CONFIG_STDIO_CONSOLE - stdio_console_init(); -#endif -#ifdef CONFIG_SERIAL_21285_CONSOLE - rs285_console_init(); -#endif -#ifdef CONFIG_SERIAL_SA1100_CONSOLE - sa1100_rs_console_init(); -#endif -#ifdef CONFIG_ARC_CONSOLE - arc_console_init(); -#endif -#ifdef CONFIG_SERIAL_AMBA_CONSOLE - ambauart_console_init(); -#endif -#ifdef CONFIG_SERIAL_TX3912_CONSOLE - tx3912_console_init(); -#endif -#ifdef CONFIG_TXX927_SERIAL_CONSOLE - txx927_console_init(); -#endif -#ifdef CONFIG_SERIAL_TXX9_CONSOLE - txx9_serial_console_init(); -#endif -#ifdef CONFIG_SIBYTE_SB1250_DUART_CONSOLE - sb1250_serial_console_init(); -#endif -#ifdef CONFIG_IP22_SERIAL - sgi_serial_console_init(); -#endif -} - -static struct tty_driver dev_tty_driver, dev_syscons_driver; -#ifdef CONFIG_UNIX98_PTYS -static struct tty_driver dev_ptmx_driver; -#endif -#ifdef CONFIG_HVC_CONSOLE - hvc_console_init(); -#endif -#ifdef CONFIG_VT -static struct tty_driver dev_console_driver; -#endif - -/* - * Ok, now we can initialize the rest of the tty devices and can count - * on memory allocations, interrupts etc.. - */ -void __init tty_init(void) -{ - /* - * dev_tty_driver and dev_console_driver are actually magic - * devices which get redirected at open time. Nevertheless, - * we register them so that register_chrdev is called - * appropriately. - */ - memset(&dev_tty_driver, 0, sizeof(struct tty_driver)); - dev_tty_driver.magic = TTY_DRIVER_MAGIC; - dev_tty_driver.driver_name = "/dev/tty"; - dev_tty_driver.name = dev_tty_driver.driver_name + 5; - dev_tty_driver.name_base = 0; - dev_tty_driver.major = TTYAUX_MAJOR; - dev_tty_driver.minor_start = 0; - dev_tty_driver.num = 1; - dev_tty_driver.type = TTY_DRIVER_TYPE_SYSTEM; - dev_tty_driver.subtype = SYSTEM_TYPE_TTY; - - if (tty_register_driver(&dev_tty_driver)) - panic("Couldn't register /dev/tty driver\n"); - - dev_syscons_driver = dev_tty_driver; - dev_syscons_driver.driver_name = "/dev/console"; - dev_syscons_driver.name = dev_syscons_driver.driver_name + 5; - dev_syscons_driver.major = TTYAUX_MAJOR; - dev_syscons_driver.minor_start = 1; - dev_syscons_driver.type = TTY_DRIVER_TYPE_SYSTEM; - dev_syscons_driver.subtype = SYSTEM_TYPE_SYSCONS; - - if (tty_register_driver(&dev_syscons_driver)) - panic("Couldn't register /dev/console driver\n"); - - /* console calls tty_register_driver() before kmalloc() works. - * Thus, we can't devfs_register() then. Do so now, instead. - */ -#ifdef CONFIG_VT - con_init_devfs(); -#endif - -#ifdef CONFIG_UNIX98_PTYS - dev_ptmx_driver = dev_tty_driver; - dev_ptmx_driver.driver_name = "/dev/ptmx"; - dev_ptmx_driver.name = dev_ptmx_driver.driver_name + 5; - dev_ptmx_driver.major= MAJOR(PTMX_DEV); - dev_ptmx_driver.minor_start = MINOR(PTMX_DEV); - dev_ptmx_driver.type = TTY_DRIVER_TYPE_SYSTEM; - dev_ptmx_driver.subtype = SYSTEM_TYPE_SYSPTMX; - - if (tty_register_driver(&dev_ptmx_driver)) - panic("Couldn't register /dev/ptmx driver\n"); -#endif - -#ifdef CONFIG_VT - dev_console_driver = dev_tty_driver; - dev_console_driver.driver_name = "/dev/vc/0"; - dev_console_driver.name = dev_console_driver.driver_name + 5; - dev_console_driver.major = TTY_MAJOR; - dev_console_driver.type = TTY_DRIVER_TYPE_SYSTEM; - dev_console_driver.subtype = SYSTEM_TYPE_CONSOLE; - - if (tty_register_driver(&dev_console_driver)) - panic("Couldn't register /dev/tty0 driver\n"); - - kbd_init(); -#endif - -#ifdef CONFIG_SGI_L1_SERIAL_CONSOLE - if (ia64_platform_is("sn2")) { - sn_sal_serial_console_init(); - return; /* only one console right now for SN2 */ - } -#endif -#ifdef CONFIG_ESPSERIAL /* init ESP before rs, so rs doesn't see the port */ - espserial_init(); -#endif -#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC) - vme_scc_init(); -#endif -#ifdef CONFIG_SERIAL_TX3912 - tx3912_rs_init(); -#endif -#ifdef CONFIG_ROCKETPORT - rp_init(); -#endif -#ifdef CONFIG_SERIAL167 - serial167_init(); -#endif -#ifdef CONFIG_CYCLADES - cy_init(); -#endif -#ifdef CONFIG_STALLION - stl_init(); -#endif -#ifdef CONFIG_ISTALLION - stli_init(); -#endif -#ifdef CONFIG_DIGI - pcxe_init(); -#endif -#ifdef CONFIG_DIGIEPCA - pc_init(); -#endif -#ifdef CONFIG_SPECIALIX - specialix_init(); -#endif -#if (defined(CONFIG_8xx) || defined(CONFIG_CPM2)) - rs_8xx_init(); -#endif /* CONFIG_8xx */ - pty_init(); -#ifdef CONFIG_MOXA_SMARTIO - mxser_init(); -#endif -#ifdef CONFIG_MOXA_INTELLIO - moxa_init(); -#endif -#ifdef CONFIG_VT - vcs_init(); -#endif -#ifdef CONFIG_TN3270 - tub3270_init(); -#endif -#ifdef CONFIG_TN3215 - tty3215_init(); -#endif -#ifdef CONFIG_HWC - hwc_tty_init(); -#endif -#ifdef CONFIG_A2232 - a2232board_init(); -#endif -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/drivers/scsi/aic7xxx/Makefile --- a/linux-2.4-xen-sparse/drivers/scsi/aic7xxx/Makefile Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,97 +0,0 @@ -# -# drivers/scsi/aic7xxx/Makefile -# -# Makefile for the Linux aic7xxx SCSI driver. -# - -O_TARGET := aic7xxx_drv.o - -list-multi := aic7xxx.o aic79xx.o - -obj-$(CONFIG_SCSI_AIC7XXX) += aic7xxx.o -ifeq ($(CONFIG_PCI),y) -obj-$(CONFIG_SCSI_AIC79XX) += aic79xx.o -endif - -EXTRA_CFLAGS += -I$(TOPDIR)/drivers/scsi -Werror -#EXTRA_CFLAGS += -g - -# Platform Specific Files -obj-aic7xxx = aic7xxx_osm.o aic7xxx_proc.o - -# Core Files -obj-aic7xxx += aic7xxx_core.o aic7xxx_93cx6.o -ifeq ($(CONFIG_AIC7XXX_REG_PRETTY_PRINT),y) -obj-aic7xxx += aic7xxx_reg_print.o -endif - -#EISA Specific Files -AIC7XXX_EISA_ARCH = $(filter i386 alpha xen,$(ARCH)) -ifneq ($(AIC7XXX_EISA_ARCH),) -obj-aic7xxx += aic7770.o -# Platform Specific EISA Files -obj-aic7xxx += aic7770_osm.o -endif - -#PCI Specific Files -ifeq ($(CONFIG_PCI),y) -obj-aic7xxx += aic7xxx_pci.o -# Platform Specific PCI Files -obj-aic7xxx += aic7xxx_osm_pci.o -endif - -# Platform Specific U320 Files -obj-aic79xx = aic79xx_osm.o aic79xx_proc.o aic79xx_osm_pci.o -# Core Files -obj-aic79xx += aic79xx_core.o aic79xx_pci.o -ifeq ($(CONFIG_AIC79XX_REG_PRETTY_PRINT),y) -obj-aic79xx += aic79xx_reg_print.o -endif - -# Override our module desitnation -MOD_DESTDIR = $(shell cd .. && $(CONFIG_SHELL) $(TOPDIR)/scripts/pathdown.sh) - -include $(TOPDIR)/Rules.make - -aic7xxx_core.o: aic7xxx_seq.h -$(obj-aic7xxx): aic7xxx_reg.h -aic7xxx.o: aic7xxx_seq.h aic7xxx_reg.h $(obj-aic7xxx) - $(LD) $(LD_RFLAG) -r -o $@ $(obj-aic7xxx) - -aic79xx_core.o: aic79xx_seq.h -$(obj-aic79xx): aic79xx_reg.h -aic79xx.o: aic79xx_seq.h aic79xx_reg.h $(obj-aic79xx) - $(LD) $(LD_RFLAG) -r -o $@ $(obj-aic79xx) - -ifeq ($(CONFIG_AIC7XXX_BUILD_FIRMWARE),y) -aic7xxx_gen = aic7xxx_seq.h aic7xxx_reg.h -ifeq ($(CONFIG_AIC7XXX_REG_PRETTY_PRINT),y) -aic7xxx_gen += aic7xxx_reg_print.c -aic7xxx_asm_cmd = aicasm/aicasm -I. -r aic7xxx_reg.h \ - -p aic7xxx_reg_print.c -i aic7xxx_osm.h \ - -o aic7xxx_seq.h aic7xxx.seq -else -aic7xxx_asm_cmd = aicasm/aicasm -I. -r aic7xxx_reg.h \ - -o aic7xxx_seq.h aic7xxx.seq -endif -$(aic7xxx_gen): aic7xxx.seq aic7xxx.reg aicasm/aicasm - $(aic7xxx_asm_cmd) -endif - -ifeq ($(CONFIG_AIC79XX_BUILD_FIRMWARE),y) -aic79xx_gen = aic79xx_seq.h aic79xx_reg.h -ifeq ($(CONFIG_AIC79XX_REG_PRETTY_PRINT),y) -aic79xx_gen += aic79xx_reg_print.c -aic79xx_asm_cmd = aicasm/aicasm -I. -r aic79xx_reg.h \ - -p aic79xx_reg_print.c -i aic79xx_osm.h \ - -o aic79xx_seq.h aic79xx.seq -else -aic79xx_asm_cmd = aicasm/aicasm -I. -r aic79xx_reg.h \ - -o aic79xx_seq.h aic79xx.seq -endif -$(aic79xx_gen): aic79xx.seq aic79xx.reg aicasm/aicasm - $(aic79xx_asm_cmd) -endif - -aicasm/aicasm: aicasm/*.[chyl] - $(MAKE) -C aicasm diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/bugs.h --- a/linux-2.4-xen-sparse/include/asm-xen/bugs.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,53 +0,0 @@ -/* - * include/asm-i386/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), - * <rreilova@xxxxxxxxxxxx> - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -#include <linux/config.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/msr.h> - - -static void __init check_fpu(void) -{ - boot_cpu_data.fdiv_bug = 0; -} - -static void __init check_hlt(void) -{ - boot_cpu_data.hlt_works_ok = 1; -} - -static void __init check_bugs(void) -{ - extern void __init boot_init_fpu(void); - - identify_cpu(&boot_cpu_data); - boot_init_fpu(); -#ifndef CONFIG_SMP - printk("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - check_fpu(); - check_hlt(); - system_utsname.machine[1] = '0' + - (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/desc.h --- a/linux-2.4-xen-sparse/include/asm-xen/desc.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,37 +0,0 @@ -#ifndef __ARCH_DESC_H -#define __ARCH_DESC_H - -#include <asm/ldt.h> - -#ifndef __ASSEMBLY__ - -struct desc_struct { - unsigned long a,b; -}; - -struct Xgt_desc_struct { - unsigned short size; - unsigned long address __attribute__((packed)); -}; - -extern struct desc_struct default_ldt[]; - -static inline void clear_LDT(void) -{ - xen_set_ldt(0, 0); -} - -static inline void load_LDT(mm_context_t *pc) -{ - void *segments = pc->ldt; - int count = pc->size; - - if ( count == 0 ) - segments = NULL; - - xen_set_ldt((unsigned long)segments, count); -} - -#endif /* __ASSEMBLY__ */ - -#endif /* __ARCH_DESC_H__ */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/fixmap.h --- a/linux-2.4-xen-sparse/include/asm-xen/fixmap.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,107 +0,0 @@ -/* - * fixmap.h: compile-time virtual memory allocation - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998 Ingo Molnar - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#ifndef _ASM_FIXMAP_H -#define _ASM_FIXMAP_H - -#include <linux/config.h> -#include <linux/kernel.h> -#include <asm/apicdef.h> -#include <asm/page.h> -#include <asm-xen/gnttab.h> -#ifdef CONFIG_HIGHMEM -#include <linux/threads.h> -#include <asm/kmap_types.h> -#endif - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ - -enum fixed_addresses { -#ifdef CONFIG_HIGHMEM - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#endif - FIX_BLKRING_BASE, - FIX_NETRING0_BASE, - FIX_NETRING1_BASE, - FIX_NETRING2_BASE, - FIX_NETRING3_BASE, - FIX_SHARED_INFO, - FIX_GNTTAB_BEGIN, - FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1, -#ifdef CONFIG_VGA_CONSOLE -#define NR_FIX_BTMAPS 32 /* 128KB For the Dom0 VGA Console A0000-C0000 */ -#else -#define NR_FIX_BTMAPS 1 /* in case anyone wants it in future... */ -#endif - FIX_BTMAP_END, - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, - /* our bt_ioremap is permanent, unlike other architectures */ - - __end_of_permanent_fixed_addresses, - __end_of_fixed_addresses = __end_of_permanent_fixed_addresses -}; - -extern void __set_fixmap (enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - -extern void clear_fixmap(enum fixed_addresses idx); - -/* - * used by vmalloc.c. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap, and leave one page empty - * at the top of mem.. - */ -#define FIXADDR_TOP (HYPERVISOR_VIRT_START - 2*PAGE_SIZE) -#define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) - -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(unsigned int idx) -{ - return __fix_to_virt(idx); -} - -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/highmem.h --- a/linux-2.4-xen-sparse/include/asm-xen/highmem.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,132 +0,0 @@ -/* - * highmem.h: virtual kernel memory mappings for high memory - * - * Used in CONFIG_HIGHMEM systems for memory pages which - * are not addressable by direct kernel virtual addresses. - * - * Copyright (C) 1999 Gerhard Wichert, Siemens AG - * Gerhard.Wichert@xxxxxxxxxxxxxx - * - * - * Redesigned the x86 32-bit VM architecture to deal with - * up to 16 Terabyte physical memory. With current x86 CPUs - * we now support up to 64 Gigabytes physical RAM. - * - * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx> - */ - -#ifndef _ASM_HIGHMEM_H -#define _ASM_HIGHMEM_H - -#ifdef __KERNEL__ - -#include <linux/config.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <asm/kmap_types.h> -#include <asm/pgtable.h> - -#ifdef CONFIG_DEBUG_HIGHMEM -#define HIGHMEM_DEBUG 1 -#else -#define HIGHMEM_DEBUG 0 -#endif - -/* declarations for highmem.c */ -extern unsigned long highstart_pfn, highend_pfn; - -extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; -extern pte_t *pkmap_page_table; - -extern void kmap_init(void) __init; - -/* - * Right now we initialize only a single pte table. It can be extended - * easily, subsequent pte tables have to be allocated in one physical - * chunk of RAM. - */ -#define PKMAP_BASE (HYPERVISOR_VIRT_START - (1<<23)) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif -#define LAST_PKMAP_MASK (LAST_PKMAP-1) -#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) -#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) - -extern void * FASTCALL(kmap_high(struct page *page, int nonblocking)); -extern void FASTCALL(kunmap_high(struct page *page)); - -#define kmap(page) __kmap(page, 0) -#define kmap_nonblock(page) __kmap(page, 1) - -static inline void *__kmap(struct page *page, int nonblocking) -{ - if (in_interrupt()) - out_of_line_bug(); - if (page < highmem_start_page) - return page_address(page); - return kmap_high(page, nonblocking); -} - -static inline void kunmap(struct page *page) -{ - if (in_interrupt()) - out_of_line_bug(); - if (page < highmem_start_page) - return; - kunmap_high(page); -} - -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. - */ -static inline void *kmap_atomic(struct page *page, enum km_type type) -{ - enum fixed_addresses idx; - unsigned long vaddr; - - if (page < highmem_start_page) - return page_address(page); - - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#if HIGHMEM_DEBUG - if (!pte_none(*(kmap_pte-idx))) - out_of_line_bug(); -#endif - set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); - __flush_tlb_one(vaddr); - - return (void*) vaddr; -} - -static inline void kunmap_atomic(void *kvaddr, enum km_type type) -{ -#if HIGHMEM_DEBUG - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - - if (vaddr < FIXADDR_START) // FIXME - return; - - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) - out_of_line_bug(); - - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(kmap_pte-idx); - __flush_tlb_one(vaddr); -#endif -} - -#endif /* __KERNEL__ */ - -#endif /* _ASM_HIGHMEM_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/hw_irq.h --- a/linux-2.4-xen-sparse/include/asm-xen/hw_irq.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,61 +0,0 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* - * linux/include/asm/hw_irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - */ - -#include <linux/config.h> -#include <linux/smp.h> -#include <asm/atomic.h> -#include <asm/irq.h> - -#define SYSCALL_VECTOR 0x80 - -extern int irq_vector[NR_IRQS]; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - -extern char _stext, _etext; - -extern unsigned long prof_cpu_mask; -extern unsigned int * prof_buffer; -extern unsigned long prof_len; -extern unsigned long prof_shift; - -/* - * x86 profiling function, SMP safe. We might want to do this in - * assembly totally? - */ -static inline void x86_do_profile (unsigned long eip) -{ - if (!prof_buffer) - return; - - /* - * Only measure the CPUs specified by /proc/irq/prof_cpu_mask. - * (default is all CPUs.) - */ - if (!((1<<smp_processor_id()) & prof_cpu_mask)) - return; - - eip -= (unsigned long) &_stext; - eip >>= prof_shift; - /* - * Don't ignore out-of-bounds EIP values silently, - * put them into the last histogram slot, so if - * present, they will show up as a sharp peak. - */ - if (eip > prof_len-1) - eip = prof_len-1; - atomic_inc((atomic_t *)&prof_buffer[eip]); -} - -static inline void hw_resend_irq(struct hw_interrupt_type *h, - unsigned int i) -{} - -#endif /* _ASM_HW_IRQ_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/io.h --- a/linux-2.4-xen-sparse/include/asm-xen/io.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,457 +0,0 @@ -#ifndef _ASM_IO_H -#define _ASM_IO_H - -#include <linux/config.h> - -/* - * This file contains the definitions for the x86 IO instructions - * inb/inw/inl/outb/outw/outl and the "string versions" of the same - * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" - * versions of the single-IO instructions (inb_p/inw_p/..). - * - * This file is not meant to be obfuscating: it's just complicated - * to (a) handle it all in a way that makes gcc able to optimize it - * as well as possible and (b) trying to avoid writing the same thing - * over and over again with slight variations and possibly making a - * mistake somewhere. - */ - -/* - * Thanks to James van Artsdalen for a better timing-fix than - * the two short jumps: using outb's to a nonexistent port seems - * to guarantee better timings even on fast machines. - * - * On the other hand, I'd like to be sure of a non-existent port: - * I feel a bit unsafe about using 0x80 (should be safe, though) - * - * Linus - */ - - /* - * Bit simplified and optimized by Jan Hubicka - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. - * - * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, - * isa_read[wl] and isa_write[wl] fixed - * - Arnaldo Carvalho de Melo <acme@xxxxxxxxxxxxxxxx> - */ - -#define IO_SPACE_LIMIT 0xffff - -#define XQUAD_PORTIO_BASE 0xfe400000 -#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ -#define XQUAD_PORTIO_LEN 0x80000 /* Only remapping first 2 quads */ - -#ifdef __KERNEL__ - -#include <linux/vmalloc.h> - -/* - * Temporary debugging check to catch old code using - * unmapped ISA addresses. Will be removed in 2.4. - */ -#if CONFIG_DEBUG_IOVIRT - extern void *__io_virt_debug(unsigned long x, const char *file, int line); - extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); - #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) -//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) -#else - #define __io_virt(x) ((void *)(x)) -//#define __io_phys(x) __pa(x) -#endif - -/** - * virt_to_phys - map virtual addresses to physical - * @address: address to remap - * - * The returned physical address is the physical (CPU) mapping for - * the memory address given. It is only valid to use this function on - * addresses directly mapped or allocated via kmalloc. - * - * This function does not give bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline unsigned long virt_to_phys(volatile void * address) -{ - return __pa(address); -} - -/** - * phys_to_virt - map physical address to virtual - * @address: address to remap - * - * The returned virtual address is a current CPU mapping for - * the memory address given. It is only valid to use this function on - * addresses that have a kernel mapping - * - * This function does not handle bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline void * phys_to_virt(unsigned long address) -{ - return __va(address); -} - -/* - * We define page_to_phys 'incorrectly' because it is used when merging blkdev - * requests, and the correct thing to do there is to use machine addresses. - */ -#define page_to_phys(_x) phys_to_machine(((_x) - mem_map) << PAGE_SHIFT) - -extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); - -/** - * ioremap - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - */ - -static inline void * ioremap (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, 0); -} - -/** - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In paticular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - */ - -static inline void * ioremap_nocache (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, _PAGE_PCD); -} - -extern void iounmap(void *addr); - -/* - * bt_ioremap() and bt_iounmap() are for temporary early boot-time - * mappings, before the real ioremap() is functional. - * A boot-time mapping is currently limited to at most 16 pages. - */ -extern void *bt_ioremap(unsigned long offset, unsigned long size); -extern void bt_iounmap(void *addr, unsigned long size); - -#define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x)) -#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) -#define page_to_bus(_x) phys_to_machine(((_x) - mem_map) << PAGE_SHIFT) -#define bus_to_phys(_x) machine_to_phys(_x) -#define bus_to_page(_x) (mem_map + (bus_to_phys(_x) >> PAGE_SHIFT)) - -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) -#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) -#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl - -#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) -#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) -#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel - -#define memset_io(a,b,c) __memset(__io_virt(a),(b),(c)) -#define memcpy_fromio(a,b,c) __memcpy((a),__io_virt(b),(c)) -#define memcpy_toio(a,b,c) __memcpy(__io_virt(a),(b),(c)) - -/* - * ISA space is 'always mapped' on a typical x86 system, no need to - * explicitly ioremap() it. The fact that the ISA IO space is mapped - * to PAGE_OFFSET is pure coincidence - it does not mean ISA values - * are physical addresses. The following constant pointer can be - * used as the IO-area pointer (it can be iounmapped as well, so the - * analogy with PCI is quite large): - */ -#define __ISA_IO_base ((char *)(PAGE_OFFSET)) - -#define isa_readb(a) readb(__ISA_IO_base + (a)) -#define isa_readw(a) readw(__ISA_IO_base + (a)) -#define isa_readl(a) readl(__ISA_IO_base + (a)) -#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) -#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) -#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) -#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) -#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) -#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) - - -/* - * Again, i386 does not require mem IO specific function. - */ - -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) -#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) - -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ - -static inline int check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -/** - * isa_check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the ISA mmio address io_addr. - * Returns 1 on a match. - * - * This function is deprecated. New drivers should use ioremap and - * check_signature. - */ - - -static inline int isa_check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (isa_readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) - -static inline void flush_write_buffers(void) -{ - __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); -} - -#define dma_cache_inv(_start,_size) flush_write_buffers() -#define dma_cache_wback(_start,_size) flush_write_buffers() -#define dma_cache_wback_inv(_start,_size) flush_write_buffers() - -#else - -/* Nothing to do */ - -#define dma_cache_inv(_start,_size) do { } while (0) -#define dma_cache_wback(_start,_size) do { } while (0) -#define dma_cache_wback_inv(_start,_size) do { } while (0) -#define flush_write_buffers() - -#endif - -#endif /* __KERNEL__ */ - -#ifdef SLOW_IO_BY_JUMPING -#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" -#elif defined(__UNSAFE_IO__) -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" -#else -#define __SLOW_DOWN_IO "\n1: outb %%al,$0x80\n" \ - "2:\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,2b\n" \ - ".previous" -#endif - -#ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO -#else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO -#endif - -#ifdef CONFIG_MULTIQUAD -extern void *xquad_portio; /* Where the IO area was mapped */ -#endif /* CONFIG_MULTIQUAD */ - -/* - * Talk about misusing macros.. - */ -#define __OUT1(s,x) \ -static inline void out##s(unsigned x value, unsigned short port) { - -#ifdef __UNSAFE_IO__ -#define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" -#else -#define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("1: out" #s " %" s1 "0,%" s2 "1\n" \ - "2:\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,2b\n" \ - ".previous" -#endif - -#if defined (CONFIG_MULTIQUAD) && !defined(STANDALONE) -#define __OUTQ(s,ss,x) /* Do the equivalent of the portio op on quads */ \ -static inline void out##ss(unsigned x value, unsigned short port) { \ - if (xquad_portio) \ - write##s(value, (unsigned long) xquad_portio + port); \ - else /* We're still in early boot, running on quad 0 */ \ - out##ss##_local(value, port); \ -} \ -static inline void out##ss##_quad(unsigned x value, unsigned short port, int quad) { \ - if (xquad_portio) \ - write##s(value, (unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ - + port); \ -} - -#define __INQ(s,ss) /* Do the equivalent of the portio op on quads */ \ -static inline RETURN_TYPE in##ss(unsigned short port) { \ - if (xquad_portio) \ - return read##s((unsigned long) xquad_portio + port); \ - else /* We're still in early boot, running on quad 0 */ \ - return in##ss##_local(port); \ -} \ -static inline RETURN_TYPE in##ss##_quad(unsigned short port, int quad) { \ - if (xquad_portio) \ - return read##s((unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ - + port); \ - else\ - return 0;\ -} -#endif /* CONFIG_MULTIQUAD && !STANDALONE */ - -#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) -#define __OUT(s,s1,x) \ -__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} -#else -/* Make the default portio routines operate on quad 0 */ -#define __OUT(s,s1,x) \ -__OUT1(s##_local,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p_local,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ -__OUTQ(s,s,x) \ -__OUTQ(s,s##_p,x) -#endif /* !CONFIG_MULTIQUAD || STANDALONE */ - -#define __IN1(s) \ -static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; - -#ifdef __UNSAFE_IO__ -#define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" -#else -#define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("1: in" #s " %" s2 "1,%" s1 "0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov" #s " $~0,%" s1 "0\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,3b\n" \ - ".previous" -#endif - -#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) -#define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } -#else -/* Make the default portio routines operate on quad 0 */ -#define __IN(s,s1,i...) \ -__IN1(s##_local) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p_local) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__INQ(s,s) \ -__INQ(s,s##_p) -#endif /* !CONFIG_MULTIQUAD || STANDALONE */ - -#define __INS(s) \ -static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; ins" #s \ -: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define __OUTS(s) \ -static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; outs" #s \ -: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define RETURN_TYPE unsigned char -__IN(b,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned short -__IN(w,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned int -__IN(l,"") -#undef RETURN_TYPE - -__OUT(b,"b",char) -__OUT(w,"w",short) -__OUT(l,,int) - -__INS(b) -__INS(w) -__INS(l) - -__OUTS(b) -__OUTS(w) -__OUTS(l) - -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/irq.h --- a/linux-2.4-xen-sparse/include/asm-xen/irq.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,65 +0,0 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H - -/* - * linux/include/asm/irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * IRQ/IPI changes taken from work by Thomas Radke - * <tomsoft@xxxxxxxxxxxxxxxxxxxxxxxxx> - */ - -#include <linux/config.h> -#include <asm/hypervisor.h> -#include <asm/ptrace.h> - -/* - * The flat IRQ space is divided into two regions: - * 1. A one-to-one mapping of real physical IRQs. This space is only used - * if we have physical device-access privilege. This region is at the - * start of the IRQ space so that existing device drivers do not need - * to be modified to translate physical IRQ numbers into our IRQ space. - * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These - * are bound using the provided bind/unbind functions. - */ - -#define PIRQ_BASE 0 -#define NR_PIRQS 128 - -#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) -#define NR_DYNIRQS 128 - -#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) - -#define pirq_to_irq(_x) ((_x) + PIRQ_BASE) -#define irq_to_pirq(_x) ((_x) - PIRQ_BASE) - -#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE) -#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE) - -/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */ -extern int bind_virq_to_irq(int virq); -extern void unbind_virq_from_irq(int virq); -extern int bind_evtchn_to_irq(int evtchn); -extern void unbind_evtchn_from_irq(int evtchn); - -static __inline__ int irq_cannonicalize(int irq) -{ - return (irq == 2) ? 9 : irq; -} - -extern void disable_irq(unsigned int); -extern void disable_irq_nosync(unsigned int); -extern void enable_irq(unsigned int); - -extern void irq_suspend(void); -extern void irq_resume(void); - - -#define CPU_MASK_NONE 0 - -/* XXX SMH: no-op for compat w/ 2.6 shared files */ -#define irq_ctx_init(cpu) do { ; } while (0) - -#endif /* _ASM_IRQ_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/keyboard.h --- a/linux-2.4-xen-sparse/include/asm-xen/keyboard.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,74 +0,0 @@ -/* - * linux/include/asm-i386/keyboard.h - * - * Created 3 Nov 1996 by Geert Uytterhoeven - */ - -/* - * This file contains the i386 architecture specific keyboard definitions - */ - -#ifndef _I386_KEYBOARD_H -#define _I386_KEYBOARD_H - -#ifdef __KERNEL__ - -#include <linux/kernel.h> -#include <linux/ioport.h> -#include <linux/kd.h> -#include <linux/pm.h> -#include <asm/io.h> - -#define KEYBOARD_IRQ 1 -#define DISABLE_KBD_DURING_INTERRUPTS 0 - -extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode); -extern int pckbd_getkeycode(unsigned int scancode); -extern int pckbd_translate(unsigned char scancode, unsigned char *keycode, - char raw_mode); -extern char pckbd_unexpected_up(unsigned char keycode); -extern void pckbd_leds(unsigned char leds); -extern void pckbd_init_hw(void); -extern int pckbd_pm_resume(struct pm_dev *, pm_request_t, void *); -extern pm_callback pm_kbd_request_override; -extern unsigned char pckbd_sysrq_xlate[128]; - -#define kbd_setkeycode pckbd_setkeycode -#define kbd_getkeycode pckbd_getkeycode -#define kbd_translate pckbd_translate -#define kbd_unexpected_up pckbd_unexpected_up -#define kbd_leds pckbd_leds -#define kbd_init_hw pckbd_init_hw -#define kbd_sysrq_xlate pckbd_sysrq_xlate - -#define SYSRQ_KEY 0x54 - -#define kbd_controller_present() (xen_start_info.flags & SIF_INITDOMAIN) - -/* resource allocation */ -#define kbd_request_region() -#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \ - "keyboard", NULL) - -/* How to access the keyboard macros on this platform. */ -#define kbd_read_input() inb(KBD_DATA_REG) -#define kbd_read_status() inb(KBD_STATUS_REG) -#define kbd_write_output(val) outb(val, KBD_DATA_REG) -#define kbd_write_command(val) outb(val, KBD_CNTL_REG) - -/* Some stoneage hardware needs delays after some operations. */ -#define kbd_pause() do { } while(0) - -/* - * Machine specific bits for the PS/2 driver - */ - -#define AUX_IRQ 12 - -#define aux_request_irq(hand, dev_id) \ - request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id) - -#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id) - -#endif /* __KERNEL__ */ -#endif /* _I386_KEYBOARD_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/mmu_context.h --- a/linux-2.4-xen-sparse/include/asm-xen/mmu_context.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,59 +0,0 @@ -#ifndef __I386_MMU_CONTEXT_H -#define __I386_MMU_CONTEXT_H - -#include <linux/config.h> -#include <asm/desc.h> -#include <asm/atomic.h> -#include <asm/pgalloc.h> - -/* - * hooks to add arch specific data into the mm struct. - * Note that destroy_context is called even if init_new_context - * fails. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void destroy_context(struct mm_struct *mm); - -#ifdef CONFIG_SMP - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) -{ - if(cpu_tlbstate[cpu].state == TLBSTATE_OK) - cpu_tlbstate[cpu].state = TLBSTATE_LAZY; -} -#else -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) -{ -} -#endif - -extern pgd_t *cur_pgd; - -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) -{ - struct mmuext_op _op[2], *op = _op; - if (prev != next) { - /* stop flush ipis for the previous mm */ - clear_bit(cpu, &prev->cpu_vm_mask); - /* Re-load page tables */ - cur_pgd = next->pgd; - op->cmd = MMUEXT_NEW_BASEPTR; - op->mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT); - op++; - /* load_LDT, if either the previous or next thread - * has a non-default LDT. - */ - if (next->context.size+prev->context.size) { - op->cmd = MMUEXT_SET_LDT; - op->linear_addr = (unsigned long)next->context.ldt; - op->nr_ents = next->context.size; - op++; - } - BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF)); - } -} - -#define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL,smp_processor_id()) - -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/module.h --- a/linux-2.4-xen-sparse/include/asm-xen/module.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,14 +0,0 @@ -#ifndef _ASM_I386_MODULE_H -#define _ASM_I386_MODULE_H -/* - * This file contains the i386 architecture specific module code. - */ - -extern int xen_module_init(struct module *mod); - -#define module_map(x) vmalloc(x) -#define module_unmap(x) vfree(x) -#define module_arch_init(x) xen_module_init(x) -#define arch_init_modules(x) do { } while (0) - -#endif /* _ASM_I386_MODULE_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/page.h --- a/linux-2.4-xen-sparse/include/asm-xen/page.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,178 +0,0 @@ -#ifndef _I386_PAGE_H -#define _I386_PAGE_H - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ - -#include <linux/config.h> -#include <linux/string.h> -#include <asm/types.h> -#include <asm-xen/xen-public/xen.h> - -#ifdef CONFIG_XEN_SCRUB_PAGES -#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT) -#else -#define scrub_pages(_p,_n) ((void)0) -#endif - -#ifdef CONFIG_X86_USE_3DNOW - -#include <asm/mmx.h> - -#define clear_page(page) mmx_clear_page((void *)(page)) -#define copy_page(to,from) mmx_copy_page(to,from) - -#else - -/* - * On older X86 processors its not a win to use MMX here it seems. - * Maybe the K6-III ? - */ - -#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) -#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) - -#endif - -#define clear_user_page(page, vaddr) clear_page(page) -#define copy_user_page(to, from, vaddr) copy_page(to, from) - -/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ -extern unsigned int *phys_to_machine_mapping; -#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)])) -#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)])) -static inline unsigned long phys_to_machine(unsigned long phys) -{ - unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT); - machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); - return machine; -} -static inline unsigned long machine_to_phys(unsigned long machine) -{ - unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT); - phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); - return phys; -} - -/* - * These are used to make use of C type-checking.. - */ -#if CONFIG_X86_PAE -typedef struct { unsigned long pte_low, pte_high; } pte_t; -typedef struct { unsigned long long pmd; } pmd_t; -typedef struct { unsigned long long pgd; } pgd_t; -#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) -#else -typedef struct { unsigned long pte_low; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; -typedef struct { unsigned long pgd; } pgd_t; -static inline unsigned long pte_val(pte_t x) -{ - unsigned long ret = x.pte_low; - if ( (ret & 1) ) ret = machine_to_phys(ret); - return ret; -} -#define pte_val_ma(x) ((x).pte_low) -#endif -#define PTE_MASK PAGE_MASK - -typedef struct { unsigned long pgprot; } pgprot_t; - -static inline unsigned long pmd_val(pmd_t x) -{ - unsigned long ret = x.pmd; - if ( ret ) ret = machine_to_phys(ret) | 1; - return ret; -} -#define pmd_val_ma(x) ((x).pmd) -#define pgd_val(x) ({ BUG(); (unsigned long)0; }) -#define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); }) -#define __pte_ma(x) ((pte_t) { (x) } ) -#define __pmd(x) ({ unsigned long _x = (x); \ - (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); }) -#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; }) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#endif /* !__ASSEMBLY__ */ - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) - -/* - * This handles the memory map.. We could make this a config - * option, but too many people screw it up, and too few need - * it. - * - * A __PAGE_OFFSET of 0xC0000000 means that the kernel has - * a virtual address space of one gigabyte, which limits the - * amount of physical memory you can use to about 950MB. - * - * If you want more physical memory than this then see the CONFIG_HIGHMEM4G - * and CONFIG_HIGHMEM64G options in the kernel configuration. - */ - -#define __PAGE_OFFSET (0xC0000000) - -#ifndef __ASSEMBLY__ - -/* - * Tell the user there is some problem. Beep too, so we can - * see^H^H^Hhear bugs in early bootup as well! - * The offending file and line are encoded after the "officially - * undefined" opcode for parsing in the trap handler. - */ - -#if 1 /* Set to zero for a slightly smaller kernel */ -#define BUG() \ - __asm__ __volatile__( "ud2\n" \ - "\t.word %c0\n" \ - "\t.long %c1\n" \ - : : "i" (__LINE__), "i" (__FILE__)) -#else -#define BUG() __asm__ __volatile__("ud2\n") -#endif - -#define PAGE_BUG(page) do { \ - BUG(); \ -} while (0) - -/* Pure 2^n version of get_order */ -static __inline__ int get_order(unsigned long size) -{ - int order; - - size = (size-1) >> (PAGE_SHIFT-1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} - -#endif /* __ASSEMBLY__ */ - -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) -#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) -#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) - -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -/* VIRT <-> MACHINE conversion */ -#define virt_to_machine(_a) (phys_to_machine(__pa(_a))) -#define machine_to_virt(_m) (__va(machine_to_phys(_m))) - -#endif /* __KERNEL__ */ - -#endif /* _I386_PAGE_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/pci.h --- a/linux-2.4-xen-sparse/include/asm-xen/pci.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,283 +0,0 @@ -#ifndef __i386_PCI_H -#define __i386_PCI_H - -#include <linux/config.h> - -#ifdef __KERNEL__ - -/* Can be used to override the logic in pci_scan_bus for skipping - already-configured bus numbers - to be used for buggy BIOSes - or architectures with incomplete PCI setup by the loader */ - -#ifdef CONFIG_PCI -extern unsigned int pcibios_assign_all_busses(void); -#else -#define pcibios_assign_all_busses() 0 -#endif -#define pcibios_scan_all_fns() 0 - -extern unsigned long pci_mem_start; -#define PCIBIOS_MIN_IO 0x1000 -#define PCIBIOS_MIN_MEM (pci_mem_start) - -void pcibios_config_init(void); -struct pci_bus * pcibios_scan_root(int bus); -extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value); -extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); - -void pcibios_set_master(struct pci_dev *dev); -void pcibios_penalize_isa_irq(int irq); -struct irq_routing_table *pcibios_get_irq_routing_table(void); -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); - -/* Dynamic DMA mapping stuff. - * i386 has everything mapped statically. - */ - -#include <linux/types.h> -#include <linux/slab.h> -#include <asm/scatterlist.h> -#include <linux/string.h> -#include <asm/io.h> - -struct pci_dev; - -/* The networking and block device layers use this boolean for bounce - * buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - -/* Allocate and map kernel buffer using consistent mode DMA for a device. - * hwdev should be valid struct pci_dev pointer for PCI devices, - * NULL for PCI-like buses (ISA, EISA). - * Returns non-NULL cpu-view pointer to the buffer if successful and - * sets *dma_addrp to the pci side dma address as well, else *dma_addrp - * is undefined. - */ -extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle); - -/* Free and unmap a consistent DMA buffer. - * cpu_addr is what was returned from pci_alloc_consistent, - * size must be the same as what as passed into pci_alloc_consistent, - * and likewise dma_addr must be the same as what *dma_addrp was set to. - * - * References to the memory and mappings associated with cpu_addr/dma_addr - * past this call are illegal. - */ -extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); - -/* Map a single buffer of the indicated size for DMA in streaming mode. - * The 32-bit bus address to use is returned. - * - * Once the device is given the dma address, the device owns this memory - * until either pci_unmap_single or pci_dma_sync_single is performed. - */ -static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); - return virt_to_bus(ptr); -} - -/* Unmap a single streaming mode DMA translation. The dma_addr and size - * must match what was provided for in a previous pci_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guarenteed to see - * whatever the device wrote there. - */ -static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -/* - * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical - * to pci_map_single, but takes a struct page instead of a virtual address - */ -static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - - return page_to_bus(page) + offset; -} - -static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -/* pci_unmap_{page,single} is a nop so... */ -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scather-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - int i; - - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - - /* - * temporary 2.4 hack - */ - for (i = 0; i < nents; i++ ) { - if (sg[i].address && sg[i].page) - out_of_line_bug(); - else if (!sg[i].address && !sg[i].page) - out_of_line_bug(); - - if (sg[i].address) - sg[i].dma_address = virt_to_bus(sg[i].address); - else - sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; - } - - flush_write_buffers(); - return nents; -} - -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -/* Make physical memory consistent for a single - * streaming mode DMA translation after a transfer. - * - * If you perform a pci_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, the - * device again owns the buffer. - */ -static inline void pci_dma_sync_single(struct pci_dev *hwdev, - dma_addr_t dma_handle, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); -} - -/* Make physical memory consistent for a set of streaming - * mode DMA translations after a transfer. - * - * The same as pci_dma_sync_single but for a scatter-gather list, - * same rules and usage. - */ -static inline void pci_dma_sync_sg(struct pci_dev *hwdev, - struct scatterlist *sg, - int nelems, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); -} - -/* Return whether the given PCI device DMA address mask can - * be supported properly. For example, if your device can - * only drive the low 24-bits during PCI bus mastering, then - * you would pass 0x00ffffff as the mask to this function. - */ -static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if(mask < 0x00ffffff) - return 0; - - return 1; -} - -/* This is always fine. */ -#define pci_dac_dma_supported(pci_dev, mask) (1) - -static __inline__ dma64_addr_t -pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction) -{ - return ((dma64_addr_t) page_to_bus(page) + - (dma64_addr_t) offset); -} - -static __inline__ struct page * -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return bus_to_page(dma_addr); -} - -static __inline__ unsigned long -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return (dma_addr & ~PAGE_MASK); -} - -static __inline__ void -pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) -{ - flush_write_buffers(); -} - -/* These macros should be used after a pci_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->length) - -/* Return the index of the PCI controller for device. */ -static inline int pci_controller_num(struct pci_dev *dev) -{ - return 0; -} - -#define HAVE_PCI_MMAP -extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine); - -#endif /* __KERNEL__ */ - -#endif /* __i386_PCI_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/pgalloc.h --- a/linux-2.4-xen-sparse/include/asm-xen/pgalloc.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,280 +0,0 @@ -#ifndef _I386_PGALLOC_H -#define _I386_PGALLOC_H - -#include <linux/config.h> -#include <asm/processor.h> -#include <asm/fixmap.h> -#include <asm/hypervisor.h> -#include <linux/threads.h> - -/* - * Quick lists are aligned so that least significant bits of array pointer - * are all zero when list is empty, and all one when list is full. - */ -#define QUICKLIST_ENTRIES 256 -#define QUICKLIST_EMPTY(_l) !((unsigned long)(_l) & ((QUICKLIST_ENTRIES*4)-1)) -#define QUICKLIST_FULL(_l) QUICKLIST_EMPTY((_l)+1) -#define pgd_quicklist (current_cpu_data.pgd_quick) -#define pmd_quicklist (current_cpu_data.pmd_quick) -#define pte_quicklist (current_cpu_data.pte_quick) -#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) - -#define pmd_populate(mm, pmd, pte) \ - do { \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ - } while ( 0 ) - -/* - * Allocate and free page tables. - */ - -#if defined (CONFIG_X86_PAE) - -#error "no PAE support as yet" - -/* - * We can't include <linux/slab.h> here, thus these uglinesses. - */ -struct kmem_cache_s; - -extern struct kmem_cache_s *pae_pgd_cachep; -extern void *kmem_cache_alloc(struct kmem_cache_s *, int); -extern void kmem_cache_free(struct kmem_cache_s *, void *); - - -static inline pgd_t *get_pgd_slow(void) -{ - int i; - pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); - - if (pgd) { - for (i = 0; i < USER_PTRS_PER_PGD; i++) { - unsigned long pmd = __get_free_page(GFP_KERNEL); - if (!pmd) - goto out_oom; - clear_page(pmd); - set_pgd(pgd + i, __pgd(1 + __pa(pmd))); - } - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - } - return pgd; -out_oom: - for (i--; i >= 0; i--) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kmem_cache_free(pae_pgd_cachep, pgd); - return NULL; -} - -#else - -static inline pgd_t *get_pgd_slow(void) -{ - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); - - if (pgd) { - memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - __make_page_readonly(pgd); - xen_pgd_pin(__pa(pgd)); - } - return pgd; -} - -#endif /* CONFIG_X86_PAE */ - -static inline pgd_t *get_pgd_fast(void) -{ - unsigned long ret; - - if ( !QUICKLIST_EMPTY(pgd_quicklist) ) { - ret = *(--pgd_quicklist); - pgtable_cache_size--; - - } else - ret = (unsigned long)get_pgd_slow(); - return (pgd_t *)ret; -} - -static inline void free_pgd_slow(pgd_t *pgd) -{ -#if defined(CONFIG_X86_PAE) -#error - int i; - - for (i = 0; i < USER_PTRS_PER_PGD; i++) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kmem_cache_free(pae_pgd_cachep, pgd); -#else - xen_pgd_unpin(__pa(pgd)); - __make_page_writable(pgd); - free_page((unsigned long)pgd); -#endif -} - -static inline void free_pgd_fast(pgd_t *pgd) -{ - if ( !QUICKLIST_FULL(pgd_quicklist) ) { - *(pgd_quicklist++) = (unsigned long)pgd; - pgtable_cache_size++; - } else - free_pgd_slow(pgd); -} - -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte; - - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pte) - { - clear_page(pte); - __make_page_readonly(pte); - xen_pte_pin(__pa(pte)); - } - return pte; - -} - -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, - unsigned long address) -{ - unsigned long ret = 0; - if ( !QUICKLIST_EMPTY(pte_quicklist) ) { - ret = *(--pte_quicklist); - pgtable_cache_size--; - } - return (pte_t *)ret; -} - -static __inline__ void pte_free_slow(pte_t *pte) -{ - xen_pte_unpin(__pa(pte)); - __make_page_writable(pte); - free_page((unsigned long)pte); -} - -static inline void pte_free_fast(pte_t *pte) -{ - if ( !QUICKLIST_FULL(pte_quicklist) ) { - *(pte_quicklist++) = (unsigned long)pte; - pgtable_cache_size++; - } else - pte_free_slow(pte); -} - -#define pte_free(pte) pte_free_fast(pte) -#define pgd_free(pgd) free_pgd_fast(pgd) -#define pgd_alloc(mm) get_pgd_fast() - -/* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - * (In the PAE case we free the pmds as part of the pgd.) - */ - -#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free_slow(x) do { } while (0) -#define pmd_free_fast(x) do { } while (0) -#define pmd_free(x) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() - -extern int do_check_pgt_cache(int, int); - -/* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(mm, start, end) flushes a range of pages - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables - * - * ..but the i386 has somewhat limited tlb flushing capabilities, - * and page-granular flushes are available only on i486 and up. - */ - -#ifndef CONFIG_SMP - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() __flush_tlb_all() -#define local_flush_tlb() __flush_tlb() - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) xen_tlb_flush(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) xen_invlpg(addr); -} - -static inline void flush_tlb_range(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - if (mm == current->active_mm) xen_tlb_flush(); -} - -#else -#error no kernel SMP support yet... -#include <asm/smp.h> - -#define local_flush_tlb() \ - __flush_tlb() - -extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); -extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - -#define flush_tlb() flush_tlb_current_task() - -static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end) -{ - flush_tlb_mm(mm); -} - -#define TLBSTATE_OK 1 -#define TLBSTATE_LAZY 2 - -struct tlb_state -{ - struct mm_struct *active_mm; - int state; -} ____cacheline_aligned; -extern struct tlb_state cpu_tlbstate[NR_CPUS]; - -#endif /* CONFIG_SMP */ - -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* i386 does not keep any page table caches in TLB */ -} - -/* - * NB. The 'domid' field should be zero if mapping I/O space (non RAM). - * Otherwise it identifies the owner of the memory that is being mapped. - */ -extern int direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long machine_addr, - unsigned long size, - pgprot_t prot, - domid_t domid); - -extern int __direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long size, - mmu_update_t *v); - - - -#endif /* _I386_PGALLOC_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/pgtable-2level.h --- a/linux-2.4-xen-sparse/include/asm-xen/pgtable-2level.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,97 +0,0 @@ -#ifndef _I386_PGTABLE_2LEVEL_H -#define _I386_PGTABLE_2LEVEL_H - -/* - * traditional i386 two-level paging structure: - */ - -#define PGDIR_SHIFT 22 -#define PTRS_PER_PGD 1024 - -/* - * the i386 is two-level, so we don't really have any - * PMD directory physically. - */ -#define PMD_SHIFT 22 -#define PTRS_PER_PMD 1 - -#define PTRS_PER_PTE 1024 - -#define pte_ERROR(e) \ - printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) -#define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) -#define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) - -/* - * The "pgd_xxx()" functions here are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) - */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -#define pgd_clear(xp) do { } while (0) - -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -#define set_pte(pteptr, pteval) (*(pteptr) = pteval) -#define set_pte_atomic(pteptr, pteval) (*(pteptr) = pteval) - -/* - * (pmds are folded into pgds so this doesnt get actually called, - * but the define is needed for a generic inline function.) - */ -#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval)) -#define set_pgd(pgdptr, pgdval) ((void)0) - -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) - -static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) -{ - return (pmd_t *) dir; -} - -#define ptep_get_and_clear(xp) __pte_ma(xchg(&(xp)->pte_low, 0)) -#define pte_same(a, b) ((a).pte_low == (b).pte_low) - -/* - * We detect special mappings in one of two ways: - * 1. If the MFN is an I/O page then Xen will set the m2p entry - * to be outside our maximum possible pseudophys range. - * 2. If the MFN belongs to a different domain then we will certainly - * not have MFN in our p2m table. Conversely, if the page is ours, - * then we'll have p2m(m2p(MFN))==MFN. - * If we detect a special mapping then it doesn't have a 'struct page'. - * We force !VALID_PAGE() by returning an out-of-range pointer. - * - * NB. These checks require that, for any MFN that is not in our reservation, - * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if - * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. - * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. - * - * NB2. When deliberately mapping foreign pages into the p2m table, you *must* - * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we - * require. In all the cases we care about, the high bit gets shifted out - * (e.g., phys_to_machine()) so behaviour there is correct. - */ -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) -#define pte_page(_pte) \ -({ \ - unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \ - unsigned long pfn = mfn_to_pfn(mfn); \ - if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) ) \ - pfn = max_mapnr; /* specia: force !VALID_PAGE() */ \ - &mem_map[pfn]; \ -}) - -#define pte_none(x) (!(x).pte_low) -#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) - -#endif /* _I386_PGTABLE_2LEVEL_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/pgtable.h --- a/linux-2.4-xen-sparse/include/asm-xen/pgtable.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,371 +0,0 @@ -#ifndef _I386_PGTABLE_H -#define _I386_PGTABLE_H - -#include <linux/config.h> - -/* - * The Linux memory management assumes a three-level page table setup. On - * the i386, we use that, but "fold" the mid level into the top-level page - * table, so that we physically have the same two-level page table as the - * i386 mmu expects. - * - * This file contains the functions and defines necessary to modify and use - * the i386 page table tree. - */ -#ifndef __ASSEMBLY__ -#include <asm/processor.h> -#include <asm/hypervisor.h> -#include <linux/threads.h> -#include <asm/fixmap.h> - -#ifndef _I386_BITOPS_H -#include <asm/bitops.h> -#endif - -#define swapper_pg_dir 0 -extern void paging_init(void); - -/* Caches aren't brain-dead on the intel. */ -#define flush_cache_all() do { } while (0) -#define flush_cache_mm(mm) do { } while (0) -#define flush_cache_range(mm, start, end) do { } while (0) -#define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) -#define flush_dcache_page(page) do { } while (0) -#define flush_icache_range(start, end) do { } while (0) -#define flush_icache_page(vma,pg) do { } while (0) -#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) - -extern unsigned long pgkern_mask; - -#define __flush_tlb() xen_tlb_flush() -#define __flush_tlb_global() __flush_tlb() -#define __flush_tlb_all() __flush_tlb_global() -#define __flush_tlb_one(addr) xen_invlpg(addr) -#define __flush_tlb_single(addr) xen_invlpg(addr) - -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[1024]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) - -#endif /* !__ASSEMBLY__ */ - -/* - * The Linux x86 paging architecture is 'compile-time dual-mode', it - * implements both the traditional 2-level x86 page tables and the - * newer 3-level PAE-mode page tables. - */ -#ifndef __ASSEMBLY__ -#if CONFIG_X86_PAE -# include <asm/pgtable-3level.h> - -/* - * Need to initialise the X86 PAE caches - */ -extern void pgtable_cache_init(void); - -#else -# include <asm/pgtable-2level.h> - -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - -#endif -#endif - -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_PGD_NR 0 - -#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) -#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) - -#define TWOLEVEL_PGDIR_SHIFT 22 -#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) -#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) - - -#ifndef __ASSEMBLY__ -/* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */ -#define VMALLOC_OFFSET (4*1024*1024) -extern void * high_memory; -#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ - ~(VMALLOC_OFFSET-1)) -#define VMALLOC_VMADDR(x) ((unsigned long)(x)) -#if CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) -#else -# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) -#endif - -#define _PAGE_BIT_PRESENT 0 -#define _PAGE_BIT_RW 1 -#define _PAGE_BIT_USER 2 -#define _PAGE_BIT_PWT 3 -#define _PAGE_BIT_PCD 4 -#define _PAGE_BIT_ACCESSED 5 -#define _PAGE_BIT_DIRTY 6 -#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ -#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ - -#define _PAGE_PRESENT 0x001 -#define _PAGE_RW 0x002 -#define _PAGE_USER 0x004 -#define _PAGE_PWT 0x008 -#define _PAGE_PCD 0x010 -#define _PAGE_ACCESSED 0x020 -#define _PAGE_DIRTY 0x040 -#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ -#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ - -#define _PAGE_PROTNONE 0x080 /* If not present */ - -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) - -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) - -#define __PAGE_KERNEL \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -#define __PAGE_KERNEL_NOCACHE \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) -#define __PAGE_KERNEL_RO \ - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) - -#if 0 -#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) -#else -#define MAKE_GLOBAL(x) __pgprot(x) -#endif - -#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) -#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) -#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) - -/* - * The i386 can't do page protection for execute, and considers that - * the same are read. Also, write permissions imply read permissions. - * This is the closest we can get.. - */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY -#define __P101 PAGE_READONLY -#define __P110 PAGE_COPY -#define __P111 PAGE_COPY - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY -#define __S101 PAGE_READONLY -#define __S110 PAGE_SHARED -#define __S111 PAGE_SHARED - -#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) -#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0) - -#define pmd_none(x) (!pmd_val(x)) -/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. - can temporarily clear it. */ -#define pmd_present(x) (pmd_val(x)) -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) - - -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) - -/* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } -static inline int pte_exec(pte_t pte) { return (pte).pte_low & _PAGE_USER; } -static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } -static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } - -static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } -static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } -static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } -static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } -static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } -static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } -static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } -static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } -static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } -static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } - -static inline int ptep_test_and_clear_dirty(pte_t *ptep) -{ - if (!pte_dirty(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); -} - -static inline int ptep_test_and_clear_young(pte_t *ptep) -{ - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); -} - -static inline void ptep_set_wrprotect(pte_t *ptep) -{ - if (pte_write(*ptep)) - clear_bit(_PAGE_BIT_RW, &ptep->pte_low); -} - -static inline void ptep_mkdirty(pte_t *ptep) -{ - if (!pte_dirty(*ptep)) - set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); -} - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ - -#define mk_pte(page, pgprot) __mk_pte((page) - mem_map, (pgprot)) - -/* This takes a physical page address that is used by the remapping functions */ -#define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot) - -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -{ - pte.pte_low &= _PAGE_CHG_MASK; - pte.pte_low |= pgprot_val(newprot); - return pte; -} - -#define page_pte(page) page_pte_prot(page, __pgprot(0)) - -#define pmd_page(pmd) \ -((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) - -/* to find an entry in a page-table-directory. */ -#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) - -#define __pgd_offset(address) pgd_index(address) - -#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) - -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) - -#define __pmd_offset(address) \ - (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) - -/* Find an entry in the third-level page table.. */ -#define __pte_offset(address) \ - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ - __pte_offset(address)) - -/* - * The i386 doesn't have any external MMU info: the kernel page - * tables contain all the necessary information. - */ -#define update_mmu_cache(vma,address,pte) do { } while (0) - -/* Encode and de-code a swap entry */ -#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) -#define SWP_OFFSET(x) ((x).val >> 8) -#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) -#define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) -#define swp_entry_to_pte(x) ((pte_t) { (x).val }) - -struct page; -int change_page_attr(struct page *, int, pgprot_t prot); - -static inline void __make_page_readonly(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - set_pte(pte, pte_wrprotect(*pte)); -} - -static inline void __make_page_writable(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - set_pte(pte, pte_mkwrite(*pte)); -} - -static inline void make_page_readonly(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - set_pte(pte, pte_wrprotect(*pte)); - if ( (unsigned long)va >= VMALLOC_START ) - __make_page_readonly(machine_to_virt( - *(unsigned long *)pte&PAGE_MASK)); -} - -static inline void make_page_writable(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - set_pte(pte, pte_mkwrite(*pte)); - if ( (unsigned long)va >= VMALLOC_START ) - __make_page_writable(machine_to_virt( - *(unsigned long *)pte&PAGE_MASK)); -} - -static inline void make_pages_readonly(void *va, unsigned int nr) -{ - while ( nr-- != 0 ) - { - make_page_readonly(va); - va = (void *)((unsigned long)va + PAGE_SIZE); - } -} - -static inline void make_pages_writable(void *va, unsigned int nr) -{ - while ( nr-- != 0 ) - { - make_page_writable(va); - va = (void *)((unsigned long)va + PAGE_SIZE); - } -} - -static inline unsigned long arbitrary_virt_to_machine(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK; - return pa | ((unsigned long)va & (PAGE_SIZE-1)); -} - -#endif /* !__ASSEMBLY__ */ - -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -#define PageSkip(page) (0) -#define kern_addr_valid(addr) (1) - -#define io_remap_page_range remap_page_range - -#endif /* _I386_PGTABLE_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/processor.h --- a/linux-2.4-xen-sparse/include/asm-xen/processor.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,483 +0,0 @@ -/* - * include/asm-i386/processor.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -#ifndef __ASM_I386_PROCESSOR_H -#define __ASM_I386_PROCESSOR_H - -#include <asm/vm86.h> -#include <asm/math_emu.h> -#include <asm/segment.h> -#include <asm/page.h> -#include <asm/types.h> -#include <asm/sigcontext.h> -#include <asm/cpufeature.h> -#include <linux/cache.h> -#include <linux/config.h> -#include <linux/threads.h> - -/* - * Default implementation of macro that returns current - * instruction pointer ("program counter"). - */ -#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) - -/* - * CPU type and hardware bug flags. Kept separately for each CPU. - * Members of this structure are referenced in head.S, so think twice - * before touching them. [mj] - */ - -struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; - __u8 x86_mask; - char wp_works_ok; /* It doesn't on 386's */ - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ - char hard_math; - char rfu; - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - __u32 x86_capability[NCAPINTS]; - char x86_vendor_id[16]; - char x86_model_id[64]; - int x86_cache_size; /* in KB - valid for CPUS which support this - call */ - int fdiv_bug; - int f00f_bug; - int coma_bug; - unsigned long loops_per_jiffy; - unsigned long *pgd_quick; - unsigned long *pmd_quick; - unsigned long *pte_quick; - unsigned long pgtable_cache_sz; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); - -#define X86_VENDOR_INTEL 0 -#define X86_VENDOR_CYRIX 1 -#define X86_VENDOR_AMD 2 -#define X86_VENDOR_UMC 3 -#define X86_VENDOR_NEXGEN 4 -#define X86_VENDOR_CENTAUR 5 -#define X86_VENDOR_RISE 6 -#define X86_VENDOR_TRANSMETA 7 -#define X86_VENDOR_NSC 8 -#define X86_VENDOR_SIS 9 -#define X86_VENDOR_UNKNOWN 0xff - -/* - * capabilities of CPUs - */ - -extern struct cpuinfo_x86 boot_cpu_data; -extern struct tss_struct init_tss[NR_CPUS]; - -#ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] -#else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data -#endif - -extern char ignore_irq13; - -extern void identify_cpu(struct cpuinfo_x86 *); -extern void print_cpu_info(struct cpuinfo_x86 *); - -/* - * EFLAGS bits - */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - -/* - * Generic CPUID function - */ -static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) -{ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op)); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax; - - __asm__("cpuid" - : "=a" (eax) - : "0" (op) - : "bx", "cx", "dx"); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx; - - __asm__("cpuid" - : "=a" (eax), "=b" (ebx) - : "0" (op) - : "cx", "dx" ); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ecx; - - __asm__("cpuid" - : "=a" (eax), "=c" (ecx) - : "0" (op) - : "bx", "dx" ); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, edx; - - __asm__("cpuid" - : "=a" (eax), "=d" (edx) - : "0" (op) - : "bx", "cx"); - return edx; -} - -/* - * Intel CPU features in CR4 - */ -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x0040 /* Machine check enable */ -#define X86_CR4_PGE 0x0080 /* enable global pages */ -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ - -#define load_cr3(pgdir) \ - asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir))); - -extern unsigned long mmu_cr4_features; - -#include <asm/hypervisor.h> - -static inline void set_in_cr4 (unsigned long mask) -{ - BUG(); -} - -static inline void clear_in_cr4 (unsigned long mask) -{ - BUG(); -} - -/* - * Cyrix CPU configuration register indexes - */ -#define CX86_CCR0 0xc0 -#define CX86_CCR1 0xc1 -#define CX86_CCR2 0xc2 -#define CX86_CCR3 0xc3 -#define CX86_CCR4 0xe8 -#define CX86_CCR5 0xe9 -#define CX86_CCR6 0xea -#define CX86_CCR7 0xeb -#define CX86_DIR0 0xfe -#define CX86_DIR1 0xff -#define CX86_ARR_BASE 0xc4 -#define CX86_RCR_BASE 0xdc - -/* - * Cyrix CPU indexed register access macros - */ - -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - -/* - * Bus types (default is ISA, but people can check others with these..) - */ -#ifdef CONFIG_EISA -extern int EISA_bus; -#else -#define EISA_bus (0) -#endif -extern int MCA_bus; - -/* from system description table in BIOS. Mostly for MCA use, but -others may find it useful. */ -extern unsigned int machine_id; -extern unsigned int machine_submodel_id; -extern unsigned int BIOS_revision; -extern unsigned int mca_pentium_flag; - -/* - * User space process size: 3GB (default). - */ -#define TASK_SIZE (PAGE_OFFSET) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) - -/* - * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. - */ -#define IO_BITMAP_SIZE 32 -#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4) -#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) -#define INVALID_IO_BITMAP_OFFSET 0x8000 - -struct i387_fsave_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - long status; /* software status information */ -}; - -struct i387_fxsave_struct { - unsigned short cwd; - unsigned short swd; - unsigned short twd; - unsigned short fop; - long fip; - long fcs; - long foo; - long fos; - long mxcsr; - long reserved; - long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ - long padding[56]; -} __attribute__ ((aligned (16))); - -struct i387_soft_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - unsigned char ftop, changed, lookahead, no_update, rm, alimit; - struct info *info; - unsigned long entry_eip; -}; - -union i387_union { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; - struct i387_soft_struct soft; -}; - -typedef struct { - unsigned long seg; -} mm_segment_t; - -struct tss_struct { - unsigned short back_link,__blh; - unsigned long esp0; - unsigned short ss0,__ss0h; - unsigned long esp1; - unsigned short ss1,__ss1h; - unsigned long esp2; - unsigned short ss2,__ss2h; - unsigned long __cr3; - unsigned long eip; - unsigned long eflags; - unsigned long eax,ecx,edx,ebx; - unsigned long esp; - unsigned long ebp; - unsigned long esi; - unsigned long edi; - unsigned short es, __esh; - unsigned short cs, __csh; - unsigned short ss, __ssh; - unsigned short ds, __dsh; - unsigned short fs, __fsh; - unsigned short gs, __gsh; - unsigned short ldt, __ldth; - unsigned short trace, bitmap; - unsigned long io_bitmap[IO_BITMAP_SIZE+1]; - /* - * pads the TSS to be cacheline-aligned (size is 0x100) - */ - unsigned long __cacheline_filler[5]; -}; - -struct thread_struct { - unsigned long esp0; - unsigned long eip; - unsigned long esp; - unsigned long fs; - unsigned long gs; - unsigned int io_pl; -/* Hardware debugging registers */ - unsigned long debugreg[8]; /* %%db0-7 debug registers */ -/* fault info */ - unsigned long cr2, trap_no, error_code; -/* floating point info */ - union i387_union i387; -/* virtual 86 mode info */ - struct vm86_struct * vm86_info; - unsigned long screen_bitmap; - unsigned long v86flags, v86mask, saved_esp0; -}; - -#define INIT_THREAD { sizeof(init_stack) + (long) &init_stack, \ - 0, 0, 0, 0, 0, 0, {0}, 0, 0, 0, {{0}}, 0, 0, 0, 0, 0 } - -#define INIT_TSS { \ - 0,0, /* back_link, __blh */ \ - sizeof(init_stack) + (long) &init_stack, /* esp0 */ \ - __KERNEL_DS, 0, /* ss0 */ \ - 0,0,0,0,0,0, /* stack1, stack2 */ \ - 0, /* cr3 */ \ - 0,0, /* eip,eflags */ \ - 0,0,0,0, /* eax,ecx,edx,ebx */ \ - 0,0,0,0, /* esp,ebp,esi,edi */ \ - 0,0,0,0,0,0, /* es,cs,ss */ \ - 0,0,0,0,0,0, /* ds,fs,gs */ \ - 0,0, /* ldt */ \ - 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ - {~0, } /* ioperm */ \ -} - -#define start_thread(regs, new_eip, new_esp) do { \ - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ - set_fs(USER_DS); \ - regs->xds = __USER_DS; \ - regs->xes = __USER_DS; \ - regs->xss = __USER_DS; \ - regs->xcs = __USER_CS; \ - regs->eip = new_eip; \ - regs->esp = new_esp; \ -} while (0) - -/* Forward declaration, a strange C thing */ -struct task_struct; -struct mm_struct; - -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); -/* - * create a kernel thread without removing it from tasklists - */ -extern int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); - -/* Copy and release all segment info associated with a VM - * Unusable due to lack of error handling, use {init_new,destroy}_context - * instead. - */ -static inline void copy_segments(struct task_struct *p, struct mm_struct * mm) { } -static inline void release_segments(struct mm_struct * mm) { } - -/* - * Return saved PC of a blocked thread. - */ -static inline unsigned long thread_saved_pc(struct thread_struct *t) -{ - return ((unsigned long *)t->esp)[3]; -} - -unsigned long get_wchan(struct task_struct *p); -#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) -#define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) - -#define THREAD_SIZE (2*PAGE_SIZE) -#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) -#define free_task_struct(p) free_pages((unsigned long) (p), 1) -#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) - -#define init_task (init_task_union.task) -#define init_stack (init_task_union.stack) - -struct microcode { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int reserved[5]; - unsigned int bits[500]; -}; - -/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ -#define MICROCODE_IOCFREE _IO('6',0) - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) -{ - __asm__ __volatile__("rep;nop" ::: "memory"); -} - -#define cpu_relax() rep_nop() - -/* Prefetch instructions for Pentium III and AMD Athlon */ -#if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4) - -#define ARCH_HAS_PREFETCH -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); -} - -#elif CONFIG_X86_USE_3DNOW - -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); -} - -extern inline void prefetchw(const void *x) -{ - __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); -} -#define spin_lock_prefetch(x) prefetchw(x) - -#endif - -#endif /* __ASM_I386_PROCESSOR_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/queues.h --- a/linux-2.4-xen-sparse/include/asm-xen/queues.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,20 +0,0 @@ - -/* Work-queue emulation over task queues. Pretty simple. */ - -#ifndef __QUEUES_H__ -#define __QUEUES_H__ - -#include <linux/version.h> -#include <linux/list.h> -#include <linux/tqueue.h> - -#define DECLARE_TQUEUE(_name, _fn, _arg) \ - struct tq_struct _name = { LIST_HEAD_INIT((_name).list), 0, _fn, _arg } -#define DECLARE_WORK(_name, _fn, _arg) DECLARE_TQUEUE(_name, _fn, _arg) - -#define work_struct tq_struct -#define INIT_WORK(_work, _fn, _arg) INIT_TQUEUE(_work, _fn, _arg) - -#define schedule_work(_w) schedule_task(_w) - -#endif /* __QUEUES_H__ */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/segment.h --- a/linux-2.4-xen-sparse/include/asm-xen/segment.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,15 +0,0 @@ -#ifndef _ASM_SEGMENT_H -#define _ASM_SEGMENT_H - -#ifndef __ASSEMBLY__ -#include <linux/types.h> -#endif -#include <asm-xen/xen-public/xen.h> - -#define __KERNEL_CS FLAT_RING1_CS -#define __KERNEL_DS FLAT_RING1_DS - -#define __USER_CS FLAT_RING3_CS -#define __USER_DS FLAT_RING3_DS - -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/smp.h --- a/linux-2.4-xen-sparse/include/asm-xen/smp.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,102 +0,0 @@ -#ifndef __ASM_SMP_H -#define __ASM_SMP_H - -/* - * We need the APIC definitions automatically as part of 'smp.h' - */ -#ifndef __ASSEMBLY__ -#include <linux/config.h> -#include <linux/threads.h> -#include <linux/ptrace.h> -#endif - -#ifdef CONFIG_X86_LOCAL_APIC -#ifndef __ASSEMBLY__ -#include <asm/bitops.h> -#include <asm/mpspec.h> -#ifdef CONFIG_X86_IO_APIC -#include <asm/io_apic.h> -#endif -#include <asm/apic.h> -#endif -#endif - -#ifdef CONFIG_SMP -#ifndef __ASSEMBLY__ - -/* - * Private routines/data - */ - -extern void smp_alloc_memory(void); -extern unsigned long phys_cpu_present_map; -extern unsigned long cpu_online_map; -extern volatile unsigned long smp_invalidate_needed; -extern int pic_mode; -extern int smp_num_siblings; -extern int cpu_sibling_map[]; - -extern void smp_flush_tlb(void); -extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); -extern void fastcall smp_send_reschedule(int cpu); -extern void smp_invalidate_rcv(void); /* Process an NMI */ -extern void (*mtrr_hook) (void); -extern void zap_low_mappings (void); - -/* - * On x86 all CPUs are mapped 1:1 to the APIC space. - * This simplifies scheduling and IPI sending and - * compresses data structures. - */ -static inline int cpu_logical_map(int cpu) -{ - return cpu; -} -static inline int cpu_number_map(int cpu) -{ - return cpu; -} - -/* - * Some lowlevel functions might want to know about - * the real APIC ID <-> CPU # mapping. - */ -#define MAX_APICID 256 -extern volatile int cpu_to_physical_apicid[NR_CPUS]; -extern volatile int physical_apicid_to_cpu[MAX_APICID]; -extern volatile int cpu_to_logical_apicid[NR_CPUS]; -extern volatile int logical_apicid_to_cpu[MAX_APICID]; - -/* - * General functions that each host system must provide. - */ - -extern void smp_boot_cpus(void); -extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ - -/* - * This function is needed by all SMP systems. It must _always_ be valid - * from the initial startup. We map APIC_BASE very early in page_setup(), - * so this is correct in the x86 case. - */ - -#define smp_processor_id() (current->processor) - -#endif /* !__ASSEMBLY__ */ - -#define NO_PROC_ID 0xFF /* No processor magic marker */ - -/* - * This magic constant controls our willingness to transfer - * a process across CPUs. Such a transfer incurs misses on the L1 - * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My - * gut feeling is this will vary by board in value. For a board - * with separate L2 cache it probably depends also on the RSS, and - * for a board with shared L2 cache it ought to decay fast as other - * processes are run. - */ - -#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ - -#endif -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/system.h --- a/linux-2.4-xen-sparse/include/asm-xen/system.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,424 +0,0 @@ -#ifndef __ASM_SYSTEM_H -#define __ASM_SYSTEM_H - -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/bitops.h> -#include <asm/synch_bitops.h> -#include <asm/segment.h> -#include <asm/hypervisor.h> -#include <asm/evtchn.h> - -#ifdef __KERNEL__ - -struct task_struct; -extern void FASTCALL(__switch_to(struct task_struct *prev, - struct task_struct *next)); - -#define prepare_to_switch() \ -do { \ - struct thread_struct *__t = &current->thread; \ - __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (*(int *)&__t->fs) ); \ - __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (*(int *)&__t->gs) ); \ -} while (0) -#define switch_to(prev,next,last) do { \ - asm volatile("pushl %%esi\n\t" \ - "pushl %%edi\n\t" \ - "pushl %%ebp\n\t" \ - "movl %%esp,%0\n\t" /* save ESP */ \ - "movl %3,%%esp\n\t" /* restore ESP */ \ - "movl $1f,%1\n\t" /* save EIP */ \ - "pushl %4\n\t" /* restore EIP */ \ - "jmp __switch_to\n" \ - "1:\t" \ - "popl %%ebp\n\t" \ - "popl %%edi\n\t" \ - "popl %%esi\n\t" \ - :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ - "=b" (last) \ - :"m" (next->thread.esp),"m" (next->thread.eip), \ - "a" (prev), "d" (next), \ - "b" (prev)); \ -} while (0) - -#define _set_base(addr,base) do { unsigned long __pr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %%dl,%2\n\t" \ - "movb %%dh,%3" \ - :"=&d" (__pr) \ - :"m" (*((addr)+2)), \ - "m" (*((addr)+4)), \ - "m" (*((addr)+7)), \ - "0" (base) \ - ); } while(0) - -#define _set_limit(addr,limit) do { unsigned long __lr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %2,%%dh\n\t" \ - "andb $0xf0,%%dh\n\t" \ - "orb %%dh,%%dl\n\t" \ - "movb %%dl,%2" \ - :"=&d" (__lr) \ - :"m" (*(addr)), \ - "m" (*((addr)+6)), \ - "0" (limit) \ - ); } while(0) - -#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) -#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 ) - -static inline unsigned long _get_base(char * addr) -{ - unsigned long __base; - __asm__("movb %3,%%dh\n\t" - "movb %2,%%dl\n\t" - "shll $16,%%edx\n\t" - "movw %1,%%dx" - :"=&d" (__base) - :"m" (*((addr)+2)), - "m" (*((addr)+4)), - "m" (*((addr)+7))); - return __base; -} - -#define get_base(ldt) _get_base( ((char *)&(ldt)) ) - -/* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. - */ -#define loadsegment(seg,value) \ - asm volatile("\n" \ - "1:\t" \ - "movl %0,%%" #seg "\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "pushl $0\n\t" \ - "popl %%" #seg "\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,3b\n" \ - ".previous" \ - : :"m" (*(unsigned int *)&(value))) - -/* NB. 'clts' is done for us by Xen during virtual trap. */ -#define clts() ((void)0) -#define stts() (HYPERVISOR_fpu_taskswitch(1)) - -#endif /* __KERNEL__ */ - -/** - * __ffs - find first bit in word. - * @word: The word to search - * - * Undefined if no bit exists, so code should check against 0 first. - * - * Taken from 2.6 for Xen. - */ -static inline unsigned long __ffs(unsigned long word) -{ - __asm__("bsfl %1,%0" - :"=r" (word) - :"rm" (word)); - return word; -} - -static inline unsigned long get_limit(unsigned long segment) -{ - unsigned long __limit; - __asm__("lsll %1,%0" - :"=r" (__limit):"r" (segment)); - return __limit+1; -} - -#define nop() __asm__ __volatile__ ("nop") - -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) - -#define tas(ptr) (xchg((ptr),1)) - -struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((struct __xchg_dummy *)(x)) - - -/* - * The semantics of XCHGCMP8B are a bit strange, this is why - * there is a loop and the loading of %%eax and %%edx has to - * be inside. This inlines well in most cases, the cached - * cost is around ~38 cycles. (in the future we might want - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that - * might have an implicit FPU-save as a cost, so it's not - * clear which path to go.) - * - * chmxchg8b must be used with the lock prefix here to allow - * the instruction to be executed atomically, see page 3-102 - * of the instruction set reference 24319102.pdf. We need - * the reader side to see the coherent 64bit value. - */ -static inline void __set_64bit (unsigned long long * ptr, - unsigned int low, unsigned int high) -{ - __asm__ __volatile__ ( - "\n1:\t" - "movl (%0), %%eax\n\t" - "movl 4(%0), %%edx\n\t" - "lock cmpxchg8b (%0)\n\t" - "jnz 1b" - : /* no outputs */ - : "D"(ptr), - "b"(low), - "c"(high) - : "ax","dx","memory"); -} - -static inline void __set_64bit_constant (unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); -} -#define ll_low(x) *(((unsigned int*)&(x))+0) -#define ll_high(x) *(((unsigned int*)&(x))+1) - -static inline void __set_64bit_var (unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr,ll_low(value), ll_high(value)); -} - -#define set_64bit(ptr,value) \ -(__builtin_constant_p(value) ? \ - __set_64bit_constant(ptr, value) : \ - __set_64bit_var(ptr, value) ) - -#define _set_64bit(ptr,value) \ -(__builtin_constant_p(value) ? \ - __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ - __set_64bit(ptr, ll_low(value), ll_high(value)) ) - -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("xchgb %b0,%1" - :"=q" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 2: - __asm__ __volatile__("xchgw %w0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 4: - __asm__ __volatile__("xchgl %0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - } - return x; -} - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -#ifdef CONFIG_X86_CMPXCHG -#define __HAVE_ARCH_CMPXCHG 1 - -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - __asm__ __volatile__("lock cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - __asm__ __volatile__("lock cmpxchgw %w1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - __asm__ __volatile__("lock cmpxchgl %1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -#define cmpxchg(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ - (unsigned long)(n),sizeof(*(ptr)))) - -#else -/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */ -#endif - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * For now, "wmb()" doesn't actually do anything, as all - * Intel CPU's follow what Intel calls a *Processor Order*, - * in which all writes are seen in the program order even - * outside the CPU. - * - * I expect future Intel CPU's to have a weaker ordering, - * but I'd also expect them to finally get their act together - * and add some real memory barriers if so. - * - * Some non intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ - -#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#define rmb() mb() - -#ifdef CONFIG_X86_OOSTORE -#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#else -#define wmb() __asm__ __volatile__ ("": : :"memory") -#endif - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define set_mb(var, value) do { xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - -#define safe_halt() ((void)0) - -/* - * The use of 'barrier' in the following reflects their use as local-lock - * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following - * critical operations are executed. All critical operatiosn must complete - * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also - * includes these barriers, for example. - */ - -#define __cli() \ -do { \ - HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \ - barrier(); \ -} while (0) - -#define __sti() \ -do { \ - shared_info_t *_shared = HYPERVISOR_shared_info; \ - barrier(); \ - _shared->vcpu_data[0].evtchn_upcall_mask = 0; \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ -} while (0) - -#define __save_flags(x) \ -do { \ - (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ -} while (0) - -#define __restore_flags(x) \ -do { \ - shared_info_t *_shared = HYPERVISOR_shared_info; \ - barrier(); \ - if ( (_shared->vcpu_data[0].evtchn_upcall_mask = x) == 0 ) { \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ - } \ -} while (0) - -#define __save_and_cli(x) \ -do { \ - (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ - HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \ - barrier(); \ -} while (0) - -#define __save_and_sti(x) \ -do { \ - shared_info_t *_shared = HYPERVISOR_shared_info; \ - barrier(); \ - (x) = _shared->vcpu_data[0].evtchn_upcall_mask; \ - _shared->vcpu_data[0].evtchn_upcall_mask = 0; \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ -} while (0) - -#define local_irq_save(x) __save_and_cli(x) -#define local_irq_set(x) __save_and_sti(x) -#define local_irq_restore(x) __restore_flags(x) -#define local_irq_disable() __cli() -#define local_irq_enable() __sti() - - -#ifdef CONFIG_SMP -#error no SMP -extern void __global_cli(void); -extern void __global_sti(void); -extern unsigned long __global_save_flags(void); -extern void __global_restore_flags(unsigned long); -#define cli() __global_cli() -#define sti() __global_sti() -#define save_flags(x) ((x)=__global_save_flags()) -#define restore_flags(x) __global_restore_flags(x) -#define save_and_cli(x) do { save_flags(x); cli(); } while(0); -#define save_and_sti(x) do { save_flags(x); sti(); } while(0); - -#else - -#define cli() __cli() -#define sti() __sti() -#define save_flags(x) __save_flags(x) -#define restore_flags(x) __restore_flags(x) -#define save_and_cli(x) __save_and_cli(x) -#define save_and_sti(x) __save_and_sti(x) - -#endif - -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -void disable_hlt(void); -void enable_hlt(void); - -extern unsigned long dmi_broken; -extern int is_sony_vaio_laptop; - -#define BROKEN_ACPI_Sx 0x0001 -#define BROKEN_INIT_AFTER_S1 0x0002 -#define BROKEN_PNP_BIOS 0x0004 - -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/vga.h --- a/linux-2.4-xen-sparse/include/asm-xen/vga.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,42 +0,0 @@ -/* - * Access to VGA videoram - * - * (c) 1998 Martin Mares <mj@xxxxxx> - */ - -#ifndef _LINUX_ASM_VGA_H_ -#define _LINUX_ASM_VGA_H_ - -#include <asm/io.h> - -extern unsigned char *vgacon_mmap; - -static unsigned long VGA_MAP_MEM(unsigned long x) -{ - if( vgacon_mmap == NULL ) - { - /* This is our first time in this function. This whole thing - is a rather grim hack. We know we're going to get asked - to map a 32KB region between 0xb0000 and 0xb8000 because - that's what VGAs are. We used the boot time permanent - fixed map region, and map it to machine pages. - */ - if( x != 0xb8000 ) - panic("Argghh! VGA Console is weird. 1:%08lx\n",x); - - vgacon_mmap = (unsigned char*) bt_ioremap( 0xa0000, 128*1024 ); - return (unsigned long) (vgacon_mmap+x-0xa0000); - } - else - { - if( x != 0xc0000 && x != 0xa0000 ) /* vidmem_end or charmap fonts */ - panic("Argghh! VGA Console is weird. 2:%08lx\n",x); - return (unsigned long) (vgacon_mmap+x-0xa0000); - } - return 0; -} - -static inline unsigned char vga_readb(unsigned char * x) { return (*(x)); } -static inline void vga_writeb(unsigned char x, unsigned char *y) { *(y) = (x); } - -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/asm-xen/xor.h --- a/linux-2.4-xen-sparse/include/asm-xen/xor.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,879 +0,0 @@ -/* - * include/asm-i386/xor.h - * - * Optimized RAID-5 checksumming functions for MMX and SSE. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * You should have received a copy of the GNU General Public License - * (for example /usr/src/linux/COPYING); if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * High-speed RAID5 checksumming functions utilizing MMX instructions. - * Copyright (C) 1998 Ingo Molnar. - */ - -#define FPU_SAVE \ - do { \ - if (!(current->flags & PF_USEDFPU)) \ - clts(); \ - __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0])); \ - } while (0) - -#define FPU_RESTORE \ - do { \ - __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0])); \ - if (!(current->flags & PF_USEDFPU)) \ - stts(); \ - } while (0) - -#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n" -#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n" -#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" -#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" -#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" -#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" - - -static void -xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned long lines = bytes >> 7; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - XO1(i,0) \ - ST(i,0) \ - XO1(i+1,1) \ - ST(i+1,1) \ - XO1(i+2,2) \ - ST(i+2,2) \ - XO1(i+3,3) \ - ST(i+3,3) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $128, %1 ;\n" - " addl $128, %2 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2) - : - : "memory"); - - FPU_RESTORE; -} - -static void -xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned long lines = bytes >> 7; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - XO2(i,0) \ - ST(i,0) \ - XO2(i+1,1) \ - ST(i+1,1) \ - XO2(i+2,2) \ - ST(i+2,2) \ - XO2(i+3,3) \ - ST(i+3,3) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $128, %1 ;\n" - " addl $128, %2 ;\n" - " addl $128, %3 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : - : "memory"); - - FPU_RESTORE; -} - -static void -xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned long lines = bytes >> 7; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - XO3(i,0) \ - ST(i,0) \ - XO3(i+1,1) \ - ST(i+1,1) \ - XO3(i+2,2) \ - ST(i+2,2) \ - XO3(i+3,3) \ - ST(i+3,3) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $128, %1 ;\n" - " addl $128, %2 ;\n" - " addl $128, %3 ;\n" - " addl $128, %4 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) - : - : "memory"); - - FPU_RESTORE; -} - - -static void -xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned long lines = bytes >> 7; - char fpu_save[108]; - - FPU_SAVE; - - /* need to save/restore p4/p5 manually otherwise gcc's 10 argument - limit gets exceeded (+ counts as two arguments) */ - __asm__ __volatile__ ( - " pushl %4\n" - " pushl %5\n" -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - XO3(i,0) \ - XO3(i+1,1) \ - XO3(i+2,2) \ - XO3(i+3,3) \ - XO4(i,0) \ - ST(i,0) \ - XO4(i+1,1) \ - ST(i+1,1) \ - XO4(i+2,2) \ - ST(i+2,2) \ - XO4(i+3,3) \ - ST(i+3,3) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $128, %1 ;\n" - " addl $128, %2 ;\n" - " addl $128, %3 ;\n" - " addl $128, %4 ;\n" - " addl $128, %5 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - " popl %5\n" - " popl %4\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : "r" (p4), "r" (p5) - : "memory"); - - FPU_RESTORE; -} - -#undef LD -#undef XO1 -#undef XO2 -#undef XO3 -#undef XO4 -#undef ST -#undef BLOCK - -static void -xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned long lines = bytes >> 6; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( - " .align 32 ;\n" - " 1: ;\n" - " movq (%1), %%mm0 ;\n" - " movq 8(%1), %%mm1 ;\n" - " pxor (%2), %%mm0 ;\n" - " movq 16(%1), %%mm2 ;\n" - " movq %%mm0, (%1) ;\n" - " pxor 8(%2), %%mm1 ;\n" - " movq 24(%1), %%mm3 ;\n" - " movq %%mm1, 8(%1) ;\n" - " pxor 16(%2), %%mm2 ;\n" - " movq 32(%1), %%mm4 ;\n" - " movq %%mm2, 16(%1) ;\n" - " pxor 24(%2), %%mm3 ;\n" - " movq 40(%1), %%mm5 ;\n" - " movq %%mm3, 24(%1) ;\n" - " pxor 32(%2), %%mm4 ;\n" - " movq 48(%1), %%mm6 ;\n" - " movq %%mm4, 32(%1) ;\n" - " pxor 40(%2), %%mm5 ;\n" - " movq 56(%1), %%mm7 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 48(%2), %%mm6 ;\n" - " pxor 56(%2), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" - - " addl $64, %1 ;\n" - " addl $64, %2 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2) - : - : "memory"); - - FPU_RESTORE; -} - -static void -xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned long lines = bytes >> 6; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( - " .align 32,0x90 ;\n" - " 1: ;\n" - " movq (%1), %%mm0 ;\n" - " movq 8(%1), %%mm1 ;\n" - " pxor (%2), %%mm0 ;\n" - " movq 16(%1), %%mm2 ;\n" - " pxor 8(%2), %%mm1 ;\n" - " pxor (%3), %%mm0 ;\n" - " pxor 16(%2), %%mm2 ;\n" - " movq %%mm0, (%1) ;\n" - " pxor 8(%3), %%mm1 ;\n" - " pxor 16(%3), %%mm2 ;\n" - " movq 24(%1), %%mm3 ;\n" - " movq %%mm1, 8(%1) ;\n" - " movq 32(%1), %%mm4 ;\n" - " movq 40(%1), %%mm5 ;\n" - " pxor 24(%2), %%mm3 ;\n" - " movq %%mm2, 16(%1) ;\n" - " pxor 32(%2), %%mm4 ;\n" - " pxor 24(%3), %%mm3 ;\n" - " pxor 40(%2), %%mm5 ;\n" - " movq %%mm3, 24(%1) ;\n" - " pxor 32(%3), %%mm4 ;\n" - " pxor 40(%3), %%mm5 ;\n" - " movq 48(%1), %%mm6 ;\n" - " movq %%mm4, 32(%1) ;\n" - " movq 56(%1), %%mm7 ;\n" - " pxor 48(%2), %%mm6 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 56(%2), %%mm7 ;\n" - " pxor 48(%3), %%mm6 ;\n" - " pxor 56(%3), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" - - " addl $64, %1 ;\n" - " addl $64, %2 ;\n" - " addl $64, %3 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : - : "memory" ); - - FPU_RESTORE; -} - -static void -xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned long lines = bytes >> 6; - char fpu_save[108]; - - FPU_SAVE; - - __asm__ __volatile__ ( - " .align 32,0x90 ;\n" - " 1: ;\n" - " movq (%1), %%mm0 ;\n" - " movq 8(%1), %%mm1 ;\n" - " pxor (%2), %%mm0 ;\n" - " movq 16(%1), %%mm2 ;\n" - " pxor 8(%2), %%mm1 ;\n" - " pxor (%3), %%mm0 ;\n" - " pxor 16(%2), %%mm2 ;\n" - " pxor 8(%3), %%mm1 ;\n" - " pxor (%4), %%mm0 ;\n" - " movq 24(%1), %%mm3 ;\n" - " pxor 16(%3), %%mm2 ;\n" - " pxor 8(%4), %%mm1 ;\n" - " movq %%mm0, (%1) ;\n" - " movq 32(%1), %%mm4 ;\n" - " pxor 24(%2), %%mm3 ;\n" - " pxor 16(%4), %%mm2 ;\n" - " movq %%mm1, 8(%1) ;\n" - " movq 40(%1), %%mm5 ;\n" - " pxor 32(%2), %%mm4 ;\n" - " pxor 24(%3), %%mm3 ;\n" - " movq %%mm2, 16(%1) ;\n" - " pxor 40(%2), %%mm5 ;\n" - " pxor 32(%3), %%mm4 ;\n" - " pxor 24(%4), %%mm3 ;\n" - " movq %%mm3, 24(%1) ;\n" - " movq 56(%1), %%mm7 ;\n" - " movq 48(%1), %%mm6 ;\n" - " pxor 40(%3), %%mm5 ;\n" - " pxor 32(%4), %%mm4 ;\n" - " pxor 48(%2), %%mm6 ;\n" - " movq %%mm4, 32(%1) ;\n" - " pxor 56(%2), %%mm7 ;\n" - " pxor 40(%4), %%mm5 ;\n" - " pxor 48(%3), %%mm6 ;\n" - " pxor 56(%3), %%mm7 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 48(%4), %%mm6 ;\n" - " pxor 56(%4), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" - - " addl $64, %1 ;\n" - " addl $64, %2 ;\n" - " addl $64, %3 ;\n" - " addl $64, %4 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) - : - : "memory"); - - FPU_RESTORE; -} - -static void -xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned long lines = bytes >> 6; - char fpu_save[108]; - - FPU_SAVE; - - /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ - __asm__ __volatile__ ( - " pushl %4\n" - " pushl %5\n" - " .align 32,0x90 ;\n" - " 1: ;\n" - " movq (%1), %%mm0 ;\n" - " movq 8(%1), %%mm1 ;\n" - " pxor (%2), %%mm0 ;\n" - " pxor 8(%2), %%mm1 ;\n" - " movq 16(%1), %%mm2 ;\n" - " pxor (%3), %%mm0 ;\n" - " pxor 8(%3), %%mm1 ;\n" - " pxor 16(%2), %%mm2 ;\n" - " pxor (%4), %%mm0 ;\n" - " pxor 8(%4), %%mm1 ;\n" - " pxor 16(%3), %%mm2 ;\n" - " movq 24(%1), %%mm3 ;\n" - " pxor (%5), %%mm0 ;\n" - " pxor 8(%5), %%mm1 ;\n" - " movq %%mm0, (%1) ;\n" - " pxor 16(%4), %%mm2 ;\n" - " pxor 24(%2), %%mm3 ;\n" - " movq %%mm1, 8(%1) ;\n" - " pxor 16(%5), %%mm2 ;\n" - " pxor 24(%3), %%mm3 ;\n" - " movq 32(%1), %%mm4 ;\n" - " movq %%mm2, 16(%1) ;\n" - " pxor 24(%4), %%mm3 ;\n" - " pxor 32(%2), %%mm4 ;\n" - " movq 40(%1), %%mm5 ;\n" - " pxor 24(%5), %%mm3 ;\n" - " pxor 32(%3), %%mm4 ;\n" - " pxor 40(%2), %%mm5 ;\n" - " movq %%mm3, 24(%1) ;\n" - " pxor 32(%4), %%mm4 ;\n" - " pxor 40(%3), %%mm5 ;\n" - " movq 48(%1), %%mm6 ;\n" - " movq 56(%1), %%mm7 ;\n" - " pxor 32(%5), %%mm4 ;\n" - " pxor 40(%4), %%mm5 ;\n" - " pxor 48(%2), %%mm6 ;\n" - " pxor 56(%2), %%mm7 ;\n" - " movq %%mm4, 32(%1) ;\n" - " pxor 48(%3), %%mm6 ;\n" - " pxor 56(%3), %%mm7 ;\n" - " pxor 40(%5), %%mm5 ;\n" - " pxor 48(%4), %%mm6 ;\n" - " pxor 56(%4), %%mm7 ;\n" - " movq %%mm5, 40(%1) ;\n" - " pxor 48(%5), %%mm6 ;\n" - " pxor 56(%5), %%mm7 ;\n" - " movq %%mm6, 48(%1) ;\n" - " movq %%mm7, 56(%1) ;\n" - - " addl $64, %1 ;\n" - " addl $64, %2 ;\n" - " addl $64, %3 ;\n" - " addl $64, %4 ;\n" - " addl $64, %5 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - " popl %5\n" - " popl %4\n" - : "+g" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : "r" (p4), "r" (p5) - : "memory"); - - FPU_RESTORE; -} - -static struct xor_block_template xor_block_pII_mmx = { - name: "pII_mmx", - do_2: xor_pII_mmx_2, - do_3: xor_pII_mmx_3, - do_4: xor_pII_mmx_4, - do_5: xor_pII_mmx_5, -}; - -static struct xor_block_template xor_block_p5_mmx = { - name: "p5_mmx", - do_2: xor_p5_mmx_2, - do_3: xor_p5_mmx_3, - do_4: xor_p5_mmx_4, - do_5: xor_p5_mmx_5, -}; - -#undef FPU_SAVE -#undef FPU_RESTORE - -/* - * Cache avoiding checksumming functions utilizing KNI instructions - * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) - */ - -#define XMMS_SAVE \ - if (!(current->flags & PF_USEDFPU)) \ - clts(); \ - __asm__ __volatile__ ( \ - "movups %%xmm0,(%1) ;\n\t" \ - "movups %%xmm1,0x10(%1) ;\n\t" \ - "movups %%xmm2,0x20(%1) ;\n\t" \ - "movups %%xmm3,0x30(%1) ;\n\t" \ - : "=&r" (cr0) \ - : "r" (xmm_save) \ - : "memory") - -#define XMMS_RESTORE \ - __asm__ __volatile__ ( \ - "sfence ;\n\t" \ - "movups (%1),%%xmm0 ;\n\t" \ - "movups 0x10(%1),%%xmm1 ;\n\t" \ - "movups 0x20(%1),%%xmm2 ;\n\t" \ - "movups 0x30(%1),%%xmm3 ;\n\t" \ - : \ - : "r" (cr0), "r" (xmm_save) \ - : "memory"); \ - if (!(current->flags & PF_USEDFPU)) \ - stts() - -#define ALIGN16 __attribute__((aligned(16))) - -#define OFFS(x) "16*("#x")" -#define PF_OFFS(x) "256+16*("#x")" -#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" -#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" -#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" -#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" -#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" -#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" -#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" -#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" -#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" -#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" -#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" -#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" -#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" - - -static void -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - - XMMS_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - LD(i,0) \ - LD(i+1,1) \ - PF1(i) \ - PF1(i+2) \ - LD(i+2,2) \ - LD(i+3,3) \ - PF0(i+4) \ - PF0(i+6) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - ST(i,0) \ - ST(i+1,1) \ - ST(i+2,2) \ - ST(i+3,3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2) - : - : "memory"); - - XMMS_RESTORE; -} - -static void -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - - XMMS_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i+2) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - PF2(i) \ - PF2(i+2) \ - PF0(i+4) \ - PF0(i+6) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - ST(i,0) \ - ST(i+1,1) \ - ST(i+2,2) \ - ST(i+3,3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r"(p2), "+r"(p3) - : - : "memory" ); - - XMMS_RESTORE; -} - -static void -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - - XMMS_SAVE; - - __asm__ __volatile__ ( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i+2) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - PF2(i) \ - PF2(i+2) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - PF3(i) \ - PF3(i+2) \ - PF0(i+4) \ - PF0(i+6) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - XO3(i,0) \ - XO3(i+1,1) \ - XO3(i+2,2) \ - XO3(i+3,3) \ - ST(i,0) \ - ST(i+1,1) \ - ST(i+2,2) \ - ST(i+3,3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " addl $256, %4 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) - : - : "memory" ); - - XMMS_RESTORE; -} - -static void -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - - XMMS_SAVE; - - /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ - __asm__ __volatile__ ( - " pushl %4\n" - " pushl %5\n" -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i+2) \ - LD(i,0) \ - LD(i+1,1) \ - LD(i+2,2) \ - LD(i+3,3) \ - PF2(i) \ - PF2(i+2) \ - XO1(i,0) \ - XO1(i+1,1) \ - XO1(i+2,2) \ - XO1(i+3,3) \ - PF3(i) \ - PF3(i+2) \ - XO2(i,0) \ - XO2(i+1,1) \ - XO2(i+2,2) \ - XO2(i+3,3) \ - PF4(i) \ - PF4(i+2) \ - PF0(i+4) \ - PF0(i+6) \ - XO3(i,0) \ - XO3(i+1,1) \ - XO3(i+2,2) \ - XO3(i+3,3) \ - XO4(i,0) \ - XO4(i+1,1) \ - XO4(i+2,2) \ - XO4(i+3,3) \ - ST(i,0) \ - ST(i+1,1) \ - ST(i+2,2) \ - ST(i+3,3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " addl $256, %4 ;\n" - " addl $256, %5 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - " popl %5\n" - " popl %4\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : "r" (p4), "r" (p5) - : "memory"); - - XMMS_RESTORE; -} - -static struct xor_block_template xor_block_pIII_sse = { - name: "pIII_sse", - do_2: xor_sse_2, - do_3: xor_sse_3, - do_4: xor_sse_4, - do_5: xor_sse_5, -}; - -/* Also try the generic routines. */ -#include <asm-generic/xor.h> - -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ - do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_32regs); \ - if (cpu_has_xmm) \ - xor_speed(&xor_block_pIII_sse); \ - if (md_cpu_has_mmx()) { \ - xor_speed(&xor_block_pII_mmx); \ - xor_speed(&xor_block_p5_mmx); \ - } \ - } while (0) - -/* We force the use of the SSE xor block because it can write around L2. - We may also be able to load into the L1 only depending on how the cpu - deals with a load to a line that is being prefetched. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ - (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/linux/blk.h --- a/linux-2.4-xen-sparse/include/linux/blk.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,409 +0,0 @@ -#ifndef _BLK_H -#define _BLK_H - -#include <linux/blkdev.h> -#include <linux/locks.h> -#include <linux/config.h> -#include <linux/spinlock.h> - -/* - * Spinlock for protecting the request queue which - * is mucked around with in interrupts on potentially - * multiple CPU's.. - */ -extern spinlock_t io_request_lock; - -/* - * Initialization functions. - */ -extern int isp16_init(void); -extern int cdu31a_init(void); -extern int acsi_init(void); -extern int mcd_init(void); -extern int mcdx_init(void); -extern int sbpcd_init(void); -extern int aztcd_init(void); -extern int sony535_init(void); -extern int gscd_init(void); -extern int cm206_init(void); -extern int optcd_init(void); -extern int sjcd_init(void); -extern int cdi_init(void); -extern int hd_init(void); -extern int ide_init(void); -extern int xd_init(void); -extern int mfm_init(void); -extern int loop_init(void); -extern int md_init(void); -extern int ap_init(void); -extern int ddv_init(void); -extern int z2_init(void); -extern int swim3_init(void); -extern int swimiop_init(void); -extern int amiga_floppy_init(void); -extern int atari_floppy_init(void); -extern int ez_init(void); -extern int bpcd_init(void); -extern int ps2esdi_init(void); -extern int jsfd_init(void); -extern int viodasd_init(void); -extern int viocd_init(void); - -#if defined(CONFIG_ARCH_S390) -extern int dasd_init(void); -extern int xpram_init(void); -extern int tapeblock_init(void); -#endif /* CONFIG_ARCH_S390 */ - -#if defined(CONFIG_XEN) -extern int xlblk_init(void); -#endif /* CONFIG_XEN */ - -extern void set_device_ro(kdev_t dev,int flag); -void add_blkdev_randomness(int major); - -extern int floppy_init(void); -extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */ -extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */ -extern int rd_image_start; /* starting block # of image */ - -#ifdef CONFIG_BLK_DEV_INITRD - -#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ - -extern unsigned long initrd_start,initrd_end; -extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */ -void initrd_init(void); - -#endif - - -/* - * end_request() and friends. Must be called with the request queue spinlock - * acquired. All functions called within end_request() _must_be_ atomic. - * - * Several drivers define their own end_request and call - * end_that_request_first() and end_that_request_last() - * for parts of the original function. This prevents - * code duplication in drivers. - */ - -static inline void blkdev_dequeue_request(struct request * req) -{ - list_del(&req->queue); -} - -int end_that_request_first(struct request *req, int uptodate, char *name); -void end_that_request_last(struct request *req); - -#if defined(MAJOR_NR) || defined(IDE_DRIVER) - -#undef DEVICE_ON -#undef DEVICE_OFF - -/* - * Add entries as needed. - */ - -#ifdef IDE_DRIVER - -#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) -#define DEVICE_NAME "ide" - -#elif (MAJOR_NR == RAMDISK_MAJOR) - -/* ram disk */ -#define DEVICE_NAME "ramdisk" -#define DEVICE_NR(device) (MINOR(device)) -#define DEVICE_NO_RANDOM - -#elif (MAJOR_NR == Z2RAM_MAJOR) - -/* Zorro II Ram */ -#define DEVICE_NAME "Z2RAM" -#define DEVICE_REQUEST do_z2_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == FLOPPY_MAJOR) - -static void floppy_off(unsigned int nr); - -#define DEVICE_NAME "floppy" -#define DEVICE_INTR do_floppy -#define DEVICE_REQUEST do_fd_request -#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 )) -#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device)) - -#elif (MAJOR_NR == HD_MAJOR) - -/* Hard disk: timeout is 6 seconds. */ -#define DEVICE_NAME "hard disk" -#define DEVICE_INTR do_hd -#define TIMEOUT_VALUE (6*HZ) -#define DEVICE_REQUEST do_hd_request -#define DEVICE_NR(device) (MINOR(device)>>6) - -#elif (SCSI_DISK_MAJOR(MAJOR_NR)) - -#define DEVICE_NAME "scsidisk" -#define TIMEOUT_VALUE (2*HZ) -#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4)) - -/* Kludge to use the same number for both char and block major numbers */ -#elif (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER) - -#define DEVICE_NAME "Multiple devices driver" -#define DEVICE_REQUEST do_md_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == SCSI_TAPE_MAJOR) - -#define DEVICE_NAME "scsitape" -#define DEVICE_INTR do_st -#define DEVICE_NR(device) (MINOR(device) & 0x7f) - -#elif (MAJOR_NR == OSST_MAJOR) - -#define DEVICE_NAME "onstream" -#define DEVICE_INTR do_osst -#define DEVICE_NR(device) (MINOR(device) & 0x7f) -#define DEVICE_ON(device) -#define DEVICE_OFF(device) - -#elif (MAJOR_NR == SCSI_CDROM_MAJOR) - -#define DEVICE_NAME "CD-ROM" -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == XT_DISK_MAJOR) - -#define DEVICE_NAME "xt disk" -#define DEVICE_REQUEST do_xd_request -#define DEVICE_NR(device) (MINOR(device) >> 6) - -#elif (MAJOR_NR == PS2ESDI_MAJOR) - -#define DEVICE_NAME "PS/2 ESDI" -#define DEVICE_REQUEST do_ps2esdi_request -#define DEVICE_NR(device) (MINOR(device) >> 6) - -#elif (MAJOR_NR == CDU31A_CDROM_MAJOR) - -#define DEVICE_NAME "CDU31A" -#define DEVICE_REQUEST do_cdu31a_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE)) - -#define DEVICE_NAME "ACSI" -#define DEVICE_INTR do_acsi -#define DEVICE_REQUEST do_acsi_request -#define DEVICE_NR(device) (MINOR(device) >> 4) - -#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR) - -#define DEVICE_NAME "Mitsumi CD-ROM" -/* #define DEVICE_INTR do_mcd */ -#define DEVICE_REQUEST do_mcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR) - -#define DEVICE_NAME "Mitsumi CD-ROM" -/* #define DEVICE_INTR do_mcdx */ -#define DEVICE_REQUEST do_mcdx_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR) - -#define DEVICE_NAME "Matsushita CD-ROM controller #1" -#define DEVICE_REQUEST do_sbpcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR) - -#define DEVICE_NAME "Matsushita CD-ROM controller #2" -#define DEVICE_REQUEST do_sbpcd2_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR) - -#define DEVICE_NAME "Matsushita CD-ROM controller #3" -#define DEVICE_REQUEST do_sbpcd3_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR) - -#define DEVICE_NAME "Matsushita CD-ROM controller #4" -#define DEVICE_REQUEST do_sbpcd4_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == AZTECH_CDROM_MAJOR) - -#define DEVICE_NAME "Aztech CD-ROM" -#define DEVICE_REQUEST do_aztcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == CDU535_CDROM_MAJOR) - -#define DEVICE_NAME "SONY-CDU535" -#define DEVICE_INTR do_cdu535 -#define DEVICE_REQUEST do_cdu535_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR) - -#define DEVICE_NAME "Goldstar R420" -#define DEVICE_REQUEST do_gscd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == CM206_CDROM_MAJOR) -#define DEVICE_NAME "Philips/LMS CD-ROM cm206" -#define DEVICE_REQUEST do_cm206_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == OPTICS_CDROM_MAJOR) - -#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM" -#define DEVICE_REQUEST do_optcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == SANYO_CDROM_MAJOR) - -#define DEVICE_NAME "Sanyo H94A CD-ROM" -#define DEVICE_REQUEST do_sjcd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == APBLOCK_MAJOR) - -#define DEVICE_NAME "apblock" -#define DEVICE_REQUEST ap_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == DDV_MAJOR) - -#define DEVICE_NAME "ddv" -#define DEVICE_REQUEST ddv_request -#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS) - -#elif (MAJOR_NR == MFM_ACORN_MAJOR) - -#define DEVICE_NAME "mfm disk" -#define DEVICE_INTR do_mfm -#define DEVICE_REQUEST do_mfm_request -#define DEVICE_NR(device) (MINOR(device) >> 6) - -#elif (MAJOR_NR == NBD_MAJOR) - -#define DEVICE_NAME "nbd" -#define DEVICE_REQUEST do_nbd_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == MDISK_MAJOR) - -#define DEVICE_NAME "mdisk" -#define DEVICE_REQUEST mdisk_request -#define DEVICE_NR(device) (MINOR(device)) - -#elif (MAJOR_NR == DASD_MAJOR) - -#define DEVICE_NAME "dasd" -#define DEVICE_REQUEST do_dasd_request -#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) - -#elif (MAJOR_NR == I2O_MAJOR) - -#define DEVICE_NAME "I2O block" -#define DEVICE_REQUEST i2ob_request -#define DEVICE_NR(device) (MINOR(device)>>4) - -#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR) - -#define DEVICE_NAME "ida" -#define TIMEOUT_VALUE (25*HZ) -#define DEVICE_REQUEST do_ida_request -#define DEVICE_NR(device) (MINOR(device) >> 4) - -#endif /* MAJOR_NR == whatever */ - -/* provide DEVICE_xxx defaults, if not explicitly defined - * above in the MAJOR_NR==xxx if-elif tree */ -#ifndef DEVICE_ON -#define DEVICE_ON(device) do {} while (0) -#endif -#ifndef DEVICE_OFF -#define DEVICE_OFF(device) do {} while (0) -#endif - -#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR) -#if !defined(IDE_DRIVER) - -#ifndef CURRENT -#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) -#endif -#ifndef QUEUE_EMPTY -#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) -#endif - -#ifndef DEVICE_NAME -#define DEVICE_NAME "unknown" -#endif - -#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev) - -#ifdef DEVICE_INTR -static void (*DEVICE_INTR)(void) = NULL; -#endif - -#define SET_INTR(x) (DEVICE_INTR = (x)) - -#ifdef DEVICE_REQUEST -static void (DEVICE_REQUEST)(request_queue_t *); -#endif - -#ifdef DEVICE_INTR -#define CLEAR_INTR SET_INTR(NULL) -#else -#define CLEAR_INTR -#endif - -#define INIT_REQUEST \ - if (QUEUE_EMPTY) {\ - CLEAR_INTR; \ - return; \ - } \ - if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ - panic(DEVICE_NAME ": request list destroyed"); \ - if (CURRENT->bh) { \ - if (!buffer_locked(CURRENT->bh)) \ - panic(DEVICE_NAME ": block not locked"); \ - } - -#endif /* !defined(IDE_DRIVER) */ - - -#ifndef LOCAL_END_REQUEST /* If we have our own end_request, we do not want to include this mess */ - -#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) - -static inline void end_request(int uptodate) { - struct request *req = CURRENT; - - if (end_that_request_first(req, uptodate, DEVICE_NAME)) - return; - -#ifndef DEVICE_NO_RANDOM - add_blkdev_randomness(MAJOR(req->rq_dev)); -#endif - DEVICE_OFF(req->rq_dev); - blkdev_dequeue_request(req); - end_that_request_last(req); -} - -#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */ -#endif /* LOCAL_END_REQUEST */ - -#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */ -#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */ - -#endif /* _BLK_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/linux/highmem.h --- a/linux-2.4-xen-sparse/include/linux/highmem.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,137 +0,0 @@ -#ifndef _LINUX_HIGHMEM_H -#define _LINUX_HIGHMEM_H - -#include <linux/config.h> -#include <asm/pgalloc.h> - -#ifdef CONFIG_HIGHMEM - -extern struct page *highmem_start_page; - -#include <asm/highmem.h> - -/* declarations for linux/mm/highmem.c */ -unsigned int nr_free_highpages(void); -void kmap_flush_unused(void); - -extern struct buffer_head *create_bounce(int rw, struct buffer_head * bh_orig); - -static inline char *bh_kmap(struct buffer_head *bh) -{ - return kmap(bh->b_page) + bh_offset(bh); -} - -static inline void bh_kunmap(struct buffer_head *bh) -{ - kunmap(bh->b_page); -} - -/* - * remember to add offset! and never ever reenable interrupts between a - * bh_kmap_irq and bh_kunmap_irq!! - */ -static inline char *bh_kmap_irq(struct buffer_head *bh, unsigned long *flags) -{ - unsigned long addr; - - __save_flags(*flags); - - /* - * could be low - */ - if (!PageHighMem(bh->b_page)) - return bh->b_data; - - /* - * it's a highmem page - */ - __cli(); - addr = (unsigned long) kmap_atomic(bh->b_page, KM_BH_IRQ); - - if (addr & ~PAGE_MASK) - BUG(); - - return (char *) addr + bh_offset(bh); -} - -static inline void bh_kunmap_irq(char *buffer, unsigned long *flags) -{ - unsigned long ptr = (unsigned long) buffer & PAGE_MASK; - - kunmap_atomic((void *) ptr, KM_BH_IRQ); - __restore_flags(*flags); -} - -#else /* CONFIG_HIGHMEM */ - -static inline unsigned int nr_free_highpages(void) { return 0; } -static inline void kmap_flush_unused(void) { } - -static inline void *kmap(struct page *page) { return page_address(page); } - -#define kunmap(page) do { } while (0) - -#define kmap_atomic(page,idx) kmap(page) -#define kunmap_atomic(page,idx) kunmap(page) - -#define bh_kmap(bh) ((bh)->b_data) -#define bh_kunmap(bh) do { } while (0) -#define kmap_nonblock(page) kmap(page) -#define bh_kmap_irq(bh, flags) ((bh)->b_data) -#define bh_kunmap_irq(bh, flags) do { *(flags) = 0; } while (0) - -#endif /* CONFIG_HIGHMEM */ - -/* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ -static inline void clear_user_highpage(struct page *page, unsigned long vaddr) -{ - void *addr = kmap_atomic(page, KM_USER0); - clear_user_page(addr, vaddr); - kunmap_atomic(addr, KM_USER0); -} - -static inline void clear_highpage(struct page *page) -{ - clear_page(kmap(page)); - kunmap(page); -} - -/* - * Same but also flushes aliased cache contents to RAM. - */ -static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size) -{ - char *kaddr; - - if (offset + size > PAGE_SIZE) - out_of_line_bug(); - kaddr = kmap(page); - memset(kaddr + offset, 0, size); - flush_dcache_page(page); - flush_page_to_ram(page); - kunmap(page); -} - -static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr) -{ - char *vfrom, *vto; - - vfrom = kmap_atomic(from, KM_USER0); - vto = kmap_atomic(to, KM_USER1); - copy_user_page(vto, vfrom, vaddr); - kunmap_atomic(vfrom, KM_USER0); - kunmap_atomic(vto, KM_USER1); -} - -static inline void copy_highpage(struct page *to, struct page *from) -{ - char *vfrom, *vto; - - vfrom = kmap_atomic(from, KM_USER0); - vto = kmap_atomic(to, KM_USER1); - copy_page(vto, vfrom); - kunmap_atomic(vfrom, KM_USER0); - kunmap_atomic(vto, KM_USER1); -} - -#endif /* _LINUX_HIGHMEM_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/linux/irq.h --- a/linux-2.4-xen-sparse/include/linux/irq.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,80 +0,0 @@ -#ifndef __irq_h -#define __irq_h - -/* - * Please do not include this file in generic code. There is currently - * no requirement for any architecture to implement anything held - * within this file. - * - * Thanks. --rmk - */ - -#include <linux/config.h> - -#if !defined(CONFIG_ARCH_S390) - -#include <linux/cache.h> -#include <linux/spinlock.h> - -#include <asm/irq.h> -#include <asm/ptrace.h> - -/* - * IRQ line status. - */ -#define IRQ_INPROGRESS 1 /* IRQ handler active - do not enter! */ -#define IRQ_DISABLED 2 /* IRQ disabled - do not enter! */ -#define IRQ_PENDING 4 /* IRQ pending - replay on enable */ -#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */ -#define IRQ_AUTODETECT 16 /* IRQ is being autodetected */ -#define IRQ_WAITING 32 /* IRQ not yet seen - for autodetection */ -#define IRQ_LEVEL 64 /* IRQ level triggered */ -#define IRQ_MASKED 128 /* IRQ masked - shouldn't be seen again */ -#define IRQ_PER_CPU 256 /* IRQ is per CPU */ - -/* - * Interrupt controller descriptor. This is all we need - * to describe about the low-level hardware. - */ -struct hw_interrupt_type { - const char * typename; - unsigned int (*startup)(unsigned int irq); - void (*shutdown)(unsigned int irq); - void (*enable)(unsigned int irq); - void (*disable)(unsigned int irq); - void (*ack)(unsigned int irq); - void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, unsigned long mask); -}; - -typedef struct hw_interrupt_type hw_irq_controller; - -/* - * This is the "IRQ descriptor", which contains various information - * about the irq, including what kind of hardware handling it has, - * whether it is disabled etc etc. - * - * Pad this out to 32 bytes for cache and indexing reasons. - */ -typedef struct { - unsigned int status; /* IRQ status */ - hw_irq_controller *handler; - struct irqaction *action; /* IRQ action list */ - unsigned int depth; /* nested irq disables */ - spinlock_t lock; -} ____cacheline_aligned irq_desc_t; - -extern irq_desc_t irq_desc [NR_IRQS]; - -#include <asm/hw_irq.h> /* the arch dependent stuff */ - -extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); -extern int setup_irq(unsigned int , struct irqaction * ); -extern int teardown_irq(unsigned int , struct irqaction * ); - -extern hw_irq_controller no_irq_type; /* needed in every arch ? */ -extern void no_action(int cpl, void *dev_id, struct pt_regs *regs); - -#endif - -#endif /* __irq_h */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/linux/mm.h --- a/linux-2.4-xen-sparse/include/linux/mm.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,727 +0,0 @@ -#ifndef _LINUX_MM_H -#define _LINUX_MM_H - -#include <linux/sched.h> -#include <linux/errno.h> - -#ifdef __KERNEL__ - -#include <linux/config.h> -#include <linux/string.h> -#include <linux/list.h> -#include <linux/mmzone.h> -#include <linux/swap.h> -#include <linux/rbtree.h> - -extern unsigned long max_mapnr; -extern unsigned long num_physpages; -extern unsigned long num_mappedpages; -extern void * high_memory; -extern int page_cluster; -/* The inactive_clean lists are per zone. */ -extern struct list_head active_list; -extern struct list_head inactive_list; - -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/atomic.h> - -/* - * Linux kernel virtual memory manager primitives. - * The idea being to have a "virtual" mm in the same way - * we have a virtual fs - giving a cleaner interface to the - * mm details, and allowing different kinds of memory mappings - * (from shared memory to executable loading to arbitrary - * mmap() functions). - */ - -/* - * This struct defines a memory VMM memory area. There is one of these - * per VM-area/task. A VM area is any part of the process virtual memory - * space that has a special rule for the page-fault handlers (ie a shared - * library, the executable area etc). - */ -struct vm_area_struct { - struct mm_struct * vm_mm; /* The address space we belong to. */ - unsigned long vm_start; /* Our start address within vm_mm. */ - unsigned long vm_end; /* The first byte after our end address - within vm_mm. */ - - /* linked list of VM areas per task, sorted by address */ - struct vm_area_struct *vm_next; - - pgprot_t vm_page_prot; /* Access permissions of this VMA. */ - unsigned long vm_flags; /* Flags, listed below. */ - - rb_node_t vm_rb; - - /* - * For areas with an address space and backing store, - * one of the address_space->i_mmap{,shared} lists, - * for shm areas, the list of attaches, otherwise unused. - */ - struct vm_area_struct *vm_next_share; - struct vm_area_struct **vm_pprev_share; - - /* Function pointers to deal with this struct. */ - struct vm_operations_struct * vm_ops; - - /* Information about our backing store: */ - unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE - units, *not* PAGE_CACHE_SIZE */ - struct file * vm_file; /* File we map to (can be NULL). */ - unsigned long vm_raend; /* XXX: put full readahead info here. */ - void * vm_private_data; /* was vm_pte (shared mem) */ -}; - -/* - * vm_flags.. - */ -#define VM_READ 0x00000001 /* currently active flags */ -#define VM_WRITE 0x00000002 -#define VM_EXEC 0x00000004 -#define VM_SHARED 0x00000008 - -#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ -#define VM_MAYWRITE 0x00000020 -#define VM_MAYEXEC 0x00000040 -#define VM_MAYSHARE 0x00000080 - -#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ -#define VM_GROWSUP 0x00000200 -#define VM_SHM 0x00000400 /* shared memory area, don't swap out */ -#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ - -#define VM_EXECUTABLE 0x00001000 -#define VM_LOCKED 0x00002000 -#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ - - /* Used by sys_madvise() */ -#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ -#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ - -#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ -#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ -#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ - -#ifndef VM_STACK_FLAGS -#define VM_STACK_FLAGS 0x00000177 -#endif - -#define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ) -#define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK -#define VM_NormalReadHint(v) (!((v)->vm_flags & VM_READHINTMASK)) -#define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) -#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) - -/* read ahead limits */ -extern int vm_min_readahead; -extern int vm_max_readahead; - -/* - * mapping from the currently active vm_flags protection bits (the - * low four bits) to a page protection mask.. - */ -extern pgprot_t protection_map[16]; - - -/* - * These are the virtual MM functions - opening of an area, closing and - * unmapping it (needed to keep files on disk up-to-date etc), pointer - * to the functions called when a no-page or a wp-page exception occurs. - */ -struct vm_operations_struct { - void (*open)(struct vm_area_struct * area); - void (*close)(struct vm_area_struct * area); - struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused); -}; - -/* - * Each physical page in the system has a struct page associated with - * it to keep track of whatever it is we are using the page for at the - * moment. Note that we have no way to track which tasks are using - * a page. - * - * Try to keep the most commonly accessed fields in single cache lines - * here (16 bytes or greater). This ordering should be particularly - * beneficial on 32-bit processors. - * - * The first line is data used in page cache lookup, the second line - * is used for linear searches (eg. clock algorithm scans). - * - * TODO: make this structure smaller, it could be as small as 32 bytes. - */ -typedef struct page { - struct list_head list; /* ->mapping has some page lists. */ - struct address_space *mapping; /* The inode (or ...) we belong to. */ - unsigned long index; /* Our offset within mapping. */ - struct page *next_hash; /* Next page sharing our hash bucket in - the pagecache hash table. */ - atomic_t count; /* Usage count, see below. */ - unsigned long flags; /* atomic flags, some possibly - updated asynchronously */ - struct list_head lru; /* Pageout list, eg. active_list; - protected by pagemap_lru_lock !! */ - struct page **pprev_hash; /* Complement to *next_hash. */ - struct buffer_head * buffers; /* Buffer maps us to a disk block. */ - - /* - * On machines where all RAM is mapped into kernel address space, - * we can simply calculate the virtual address. On machines with - * highmem some memory is mapped into kernel virtual memory - * dynamically, so we need a place to store that address. - * Note that this field could be 16 bits on x86 ... ;) - * - * Architectures with slow multiplication can define - * WANT_PAGE_VIRTUAL in asm/page.h - */ -#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) - void *virtual; /* Kernel virtual address (NULL if - not kmapped, ie. highmem) */ -#endif /* CONFIG_HIGMEM || WANT_PAGE_VIRTUAL */ -} mem_map_t; - -/* - * Methods to modify the page usage count. - * - * What counts for a page usage: - * - cache mapping (page->mapping) - * - disk mapping (page->buffers) - * - page mapped in a task's page tables, each mapping - * is counted separately - * - * Also, many kernel routines increase the page count before a critical - * routine so they can be sure the page doesn't go away from under them. - */ -#define get_page(p) atomic_inc(&(p)->count) -#define put_page(p) __free_page(p) -#define put_page_testzero(p) atomic_dec_and_test(&(p)->count) -#define page_count(p) atomic_read(&(p)->count) -#define set_page_count(p,v) atomic_set(&(p)->count, v) - -static inline struct page *nth_page(struct page *page, int n) -{ - return page + n; -} - -/* - * Various page->flags bits: - * - * PG_reserved is set for special pages, which can never be swapped - * out. Some of them might not even exist (eg empty_bad_page)... - * - * Multiple processes may "see" the same page. E.g. for untouched - * mappings of /dev/null, all processes see the same page full of - * zeroes, and text pages of executables and shared libraries have - * only one copy in memory, at most, normally. - * - * For the non-reserved pages, page->count denotes a reference count. - * page->count == 0 means the page is free. - * page->count == 1 means the page is used for exactly one purpose - * (e.g. a private data page of one process). - * - * A page may be used for kmalloc() or anyone else who does a - * __get_free_page(). In this case the page->count is at least 1, and - * all other fields are unused but should be 0 or NULL. The - * management of this page is the responsibility of the one who uses - * it. - * - * The other pages (we may call them "process pages") are completely - * managed by the Linux memory manager: I/O, buffers, swapping etc. - * The following discussion applies only to them. - * - * A page may belong to an inode's memory mapping. In this case, - * page->mapping is the pointer to the inode, and page->index is the - * file offset of the page, in units of PAGE_CACHE_SIZE. - * - * A page may have buffers allocated to it. In this case, - * page->buffers is a circular list of these buffer heads. Else, - * page->buffers == NULL. - * - * For pages belonging to inodes, the page->count is the number of - * attaches, plus 1 if buffers are allocated to the page, plus one - * for the page cache itself. - * - * All pages belonging to an inode are in these doubly linked lists: - * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages; - * using the page->list list_head. These fields are also used for - * freelist managemet (when page->count==0). - * - * There is also a hash table mapping (mapping,index) to the page - * in memory if present. The lists for this hash table use the fields - * page->next_hash and page->pprev_hash. - * - * All process pages can do I/O: - * - inode pages may need to be read from disk, - * - inode pages which have been modified and are MAP_SHARED may need - * to be written to disk, - * - private pages which have been modified may need to be swapped out - * to swap space and (later) to be read back into memory. - * During disk I/O, PG_locked is used. This bit is set before I/O - * and reset when I/O completes. page_waitqueue(page) is a wait queue of all - * tasks waiting for the I/O on this page to complete. - * PG_uptodate tells whether the page's contents is valid. - * When a read completes, the page becomes uptodate, unless a disk I/O - * error happened. - * - * For choosing which pages to swap out, inode pages carry a - * PG_referenced bit, which is set any time the system accesses - * that page through the (mapping,index) hash table. This referenced - * bit, together with the referenced bit in the page tables, is used - * to manipulate page->age and move the page across the active, - * inactive_dirty and inactive_clean lists. - * - * Note that the referenced bit, the page->lru list_head and the - * active, inactive_dirty and inactive_clean lists are protected by - * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit! - * - * PG_skip is used on sparc/sparc64 architectures to "skip" certain - * parts of the address space. - * - * PG_error is set to indicate that an I/O error occurred on this page. - * - * PG_arch_1 is an architecture specific page state bit. The generic - * code guarantees that this bit is cleared for a page when it first - * is entered into the page cache. - * - * PG_highmem pages are not permanently mapped into the kernel virtual - * address space, they need to be kmapped separately for doing IO on - * the pages. The struct page (these bits with information) are always - * mapped into kernel address space... - */ -#define PG_locked 0 /* Page is locked. Don't touch. */ -#define PG_error 1 -#define PG_referenced 2 -#define PG_uptodate 3 -#define PG_dirty 4 -#define PG_unused 5 -#define PG_lru 6 -#define PG_active 7 -#define PG_slab 8 -#define PG_skip 10 -#define PG_highmem 11 -#define PG_checked 12 /* kill me in 2.5.<early>. */ -#define PG_arch_1 13 -#define PG_reserved 14 -#define PG_launder 15 /* written out by VM pressure.. */ -#define PG_fs_1 16 /* Filesystem specific */ -#define PG_foreign 21 /* Page belongs to foreign allocator */ - -#ifndef arch_set_page_uptodate -#define arch_set_page_uptodate(page) -#endif - -/* Make it prettier to test the above... */ -#define UnlockPage(page) unlock_page(page) -#define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags) -#ifndef SetPageUptodate -#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) -#endif -#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) -#define PageDirty(page) test_bit(PG_dirty, &(page)->flags) -#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) -#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) -#define PageLocked(page) test_bit(PG_locked, &(page)->flags) -#define LockPage(page) set_bit(PG_locked, &(page)->flags) -#define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags) -#define PageChecked(page) test_bit(PG_checked, &(page)->flags) -#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) -#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) -#define PageLaunder(page) test_bit(PG_launder, &(page)->flags) -#define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags) -#define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags) -#define ClearPageArch1(page) clear_bit(PG_arch_1, &(page)->flags) - -/* A foreign page uses a custom destructor rather than the buddy allocator. */ -#ifdef CONFIG_FOREIGN_PAGES -#define PageForeign(page) test_bit(PG_foreign, &(page)->flags) -#define SetPageForeign(page, dtor) do { \ - set_bit(PG_foreign, &(page)->flags); \ - (page)->mapping = (void *)dtor; \ -} while (0) -#define ClearPageForeign(page) do { \ - clear_bit(PG_foreign, &(page)->flags); \ - (page)->mapping = NULL; \ -} while (0) -#define PageForeignDestructor(page) \ - ( (void (*) (struct page *)) (page)->mapping ) -#else -#define PageForeign(page) 0 -#define PageForeignDestructor(page) void -#endif - -/* - * The zone field is never updated after free_area_init_core() - * sets it, so none of the operations on it need to be atomic. - */ -#define NODE_SHIFT 4 -#define ZONE_SHIFT (BITS_PER_LONG - 8) - -struct zone_struct; -extern struct zone_struct *zone_table[]; - -static inline zone_t *page_zone(struct page *page) -{ - return zone_table[page->flags >> ZONE_SHIFT]; -} - -static inline void set_page_zone(struct page *page, unsigned long zone_num) -{ - page->flags &= ~(~0UL << ZONE_SHIFT); - page->flags |= zone_num << ZONE_SHIFT; -} - -/* - * In order to avoid #ifdefs within C code itself, we define - * set_page_address to a noop for non-highmem machines, where - * the field isn't useful. - * The same is true for page_address() in arch-dependent code. - */ -#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) - -#define set_page_address(page, address) \ - do { \ - (page)->virtual = (address); \ - } while(0) - -#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ -#define set_page_address(page, address) do { } while(0) -#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ - -/* - * Permanent address of a page. Obviously must never be - * called on a highmem page. - */ -#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) - -#define page_address(page) ((page)->virtual) - -#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ - -#define page_address(page) \ - __va( (((page) - page_zone(page)->zone_mem_map) << PAGE_SHIFT) \ - + page_zone(page)->zone_start_paddr) - -#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ - -extern void FASTCALL(set_page_dirty(struct page *)); - -/* - * The first mb is necessary to safely close the critical section opened by the - * TryLockPage(), the second mb is necessary to enforce ordering between - * the clear_bit and the read of the waitqueue (to avoid SMP races with a - * parallel wait_on_page). - */ -#define PageError(page) test_bit(PG_error, &(page)->flags) -#define SetPageError(page) set_bit(PG_error, &(page)->flags) -#define ClearPageError(page) clear_bit(PG_error, &(page)->flags) -#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) -#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) -#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) -#define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) -#define PageSlab(page) test_bit(PG_slab, &(page)->flags) -#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags) -#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags) -#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) - -#define PageActive(page) test_bit(PG_active, &(page)->flags) -#define SetPageActive(page) set_bit(PG_active, &(page)->flags) -#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) - -#define PageLRU(page) test_bit(PG_lru, &(page)->flags) -#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) -#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) - -#ifdef CONFIG_HIGHMEM -#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) -#else -#define PageHighMem(page) 0 /* needed to optimize away at compile time */ -#endif - -#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) -#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) - -/* - * Error return values for the *_nopage functions - */ -#define NOPAGE_SIGBUS (NULL) -#define NOPAGE_OOM ((struct page *) (-1)) - -/* The array of struct pages */ -extern mem_map_t * mem_map; - -/* - * There is only one page-allocator function, and two main namespaces to - * it. The alloc_page*() variants return 'struct page *' and as such - * can allocate highmem pages, the *get*page*() variants return - * virtual kernel addresses to the allocated page(s). - */ -extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order)); -extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); -extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); - -static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order) -{ - /* - * Gets optimized away by the compiler. - */ - if (order >= MAX_ORDER) - return NULL; - return _alloc_pages(gfp_mask, order); -} - -#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) - -extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order)); -extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask)); - -#define __get_free_page(gfp_mask) \ - __get_free_pages((gfp_mask),0) - -#define __get_dma_pages(gfp_mask, order) \ - __get_free_pages((gfp_mask) | GFP_DMA,(order)) - -/* - * The old interface name will be removed in 2.5: - */ -#define get_free_page get_zeroed_page - -/* - * There is only one 'core' page-freeing function. - */ -extern void FASTCALL(__free_pages(struct page *page, unsigned int order)); -extern void FASTCALL(free_pages(unsigned long addr, unsigned int order)); - -#define __free_page(page) __free_pages((page), 0) -#define free_page(addr) free_pages((addr),0) - -extern void show_free_areas(void); -extern void show_free_areas_node(pg_data_t *pgdat); - -extern void clear_page_tables(struct mm_struct *, unsigned long, int); - -extern int fail_writepage(struct page *); -struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused); -struct file *shmem_file_setup(char * name, loff_t size); -extern void shmem_lock(struct file * file, int lock); -extern int shmem_zero_setup(struct vm_area_struct *); - -extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size); -extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); -extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); -extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot); - -extern int vmtruncate(struct inode * inode, loff_t offset); -extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); -extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); -extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); -extern int make_pages_present(unsigned long addr, unsigned long end); -extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); -extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len); -extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len); -extern int ptrace_attach(struct task_struct *tsk); -extern int ptrace_detach(struct task_struct *, unsigned int); -extern void ptrace_disable(struct task_struct *); -extern int ptrace_check_attach(struct task_struct *task, int kill); - -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); - -/* - * On a two-level page table, this ends up being trivial. Thus the - * inlining and the symmetry break with pte_alloc() that does all - * of this out-of-line. - */ -static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) -{ - if (pgd_none(*pgd)) - return __pmd_alloc(mm, pgd, address); - return pmd_offset(pgd, address); -} - -extern int pgt_cache_water[2]; -extern int check_pgt_cache(void); - -extern void free_area_init(unsigned long * zones_size); -extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap, - unsigned long * zones_size, unsigned long zone_start_paddr, - unsigned long *zholes_size); -extern void mem_init(void); -extern void show_mem(void); -extern void si_meminfo(struct sysinfo * val); -extern void swapin_readahead(swp_entry_t); - -extern struct address_space swapper_space; -#define PageSwapCache(page) ((page)->mapping == &swapper_space) - -static inline int is_page_cache_freeable(struct page * page) -{ - return page_count(page) - !!page->buffers == 1; -} - -extern int FASTCALL(can_share_swap_page(struct page *)); -extern int FASTCALL(remove_exclusive_swap_page(struct page *)); - -extern void __free_pte(pte_t); - -/* mmap.c */ -extern void lock_vma_mappings(struct vm_area_struct *); -extern void unlock_vma_mappings(struct vm_area_struct *); -extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void build_mmap_rb(struct mm_struct *); -extern void exit_mmap(struct mm_struct *); - -extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - -extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long pgoff); - -static inline unsigned long do_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret = -EINVAL; - if ((offset + PAGE_ALIGN(len)) < offset) - goto out; - if (!(offset & ~PAGE_MASK)) - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -out: - return ret; -} - -extern int do_munmap(struct mm_struct *, unsigned long, size_t); - -extern unsigned long do_brk(unsigned long, unsigned long); - -static inline void __vma_unlink(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev) -{ - prev->vm_next = vma->vm_next; - rb_erase(&vma->vm_rb, &mm->mm_rb); - if (mm->mmap_cache == vma) - mm->mmap_cache = prev; -} - -static inline int can_vma_merge(struct vm_area_struct * vma, unsigned long vm_flags) -{ - if (!vma->vm_file && vma->vm_flags == vm_flags) - return 1; - else - return 0; -} - -struct zone_t; -/* filemap.c */ -extern void remove_inode_page(struct page *); -extern unsigned long page_unuse(struct page *); -extern void truncate_inode_pages(struct address_space *, loff_t); - -/* generic vm_area_ops exported for stackable file systems */ -extern int filemap_sync(struct vm_area_struct *, unsigned long, size_t, unsigned int); -extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int); - -/* - * GFP bitmasks.. - */ -/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */ -#define __GFP_DMA 0x01 -#define __GFP_HIGHMEM 0x02 - -/* Action modifiers - doesn't change the zoning */ -#define __GFP_WAIT 0x10 /* Can wait and reschedule? */ -#define __GFP_HIGH 0x20 /* Should access emergency pools? */ -#define __GFP_IO 0x40 /* Can start low memory physical IO? */ -#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ -#define __GFP_FS 0x100 /* Can call down to low-level FS? */ - -#define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) -#define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) -#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO) -#define GFP_ATOMIC (__GFP_HIGH) -#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) -#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM) -#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) -#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) -#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) - -/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some - platforms, used as appropriate on others */ - -#define GFP_DMA __GFP_DMA - -static inline unsigned int pf_gfp_mask(unsigned int gfp_mask) -{ - /* avoid all memory balancing I/O methods if this task cannot block on I/O */ - if (current->flags & PF_NOIO) - gfp_mask &= ~(__GFP_IO | __GFP_HIGHIO | __GFP_FS); - - return gfp_mask; -} - -/* vma is the first one with address < vma->vm_end, - * and even address < vma->vm_start. Have to extend vma. */ -static inline int expand_stack(struct vm_area_struct * vma, unsigned long address) -{ - unsigned long grow; - - /* - * vma->vm_start/vm_end cannot change under us because the caller - * is required to hold the mmap_sem in read mode. We need the - * page_table_lock lock to serialize against concurrent expand_stacks. - */ - address &= PAGE_MASK; - spin_lock(&vma->vm_mm->page_table_lock); - - /* already expanded while we were spinning? */ - if (vma->vm_start <= address) { - spin_unlock(&vma->vm_mm->page_table_lock); - return 0; - } - - grow = (vma->vm_start - address) >> PAGE_SHIFT; - if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || - ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { - spin_unlock(&vma->vm_mm->page_table_lock); - return -ENOMEM; - } - - if ((vma->vm_flags & VM_LOCKED) && - ((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_MEMLOCK].rlim_cur) { - spin_unlock(&vma->vm_mm->page_table_lock); - return -ENOMEM; - } - - - vma->vm_start = address; - vma->vm_pgoff -= grow; - vma->vm_mm->total_vm += grow; - if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; - spin_unlock(&vma->vm_mm->page_table_lock); - return 0; -} - -/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ -extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); -extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, - struct vm_area_struct **pprev); - -/* Look up the first VMA which intersects the interval start_addr..end_addr-1, - NULL if none. Assume start_addr < end_addr. */ -static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) -{ - struct vm_area_struct * vma = find_vma(mm,start_addr); - - if (vma && end_addr <= vma->vm_start) - vma = NULL; - return vma; -} - -extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); - -extern struct page * vmalloc_to_page(void *addr); - -#endif /* __KERNEL__ */ - -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/linux/sched.h --- a/linux-2.4-xen-sparse/include/linux/sched.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,971 +0,0 @@ -#ifndef _LINUX_SCHED_H -#define _LINUX_SCHED_H - -#include <asm/param.h> /* for HZ */ - -extern unsigned long event; - -#include <linux/config.h> -#include <linux/binfmts.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/times.h> -#include <linux/timex.h> -#include <linux/rbtree.h> - -#include <asm/system.h> -#include <asm/semaphore.h> -#include <asm/page.h> -#include <asm/ptrace.h> -#include <asm/mmu.h> - -#include <linux/smp.h> -#include <linux/tty.h> -#include <linux/sem.h> -#include <linux/signal.h> -#include <linux/securebits.h> -#include <linux/fs_struct.h> - -struct exec_domain; - -/* - * cloning flags: - */ -#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ -#define CLONE_VM 0x00000100 /* set if VM shared between processes */ -#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ -#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ -#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ -#define CLONE_PID 0x00001000 /* set if pid shared */ -#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ -#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ -#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ -#define CLONE_THREAD 0x00010000 /* Same thread group? */ -#define CLONE_NEWNS 0x00020000 /* New namespace group? */ - -#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) - -/* - * These are the constant used to fake the fixed-point load-average - * counting. Some notes: - * - 11 bit fractions expand to 22 bits by the multiplies: this gives - * a load-average precision of 10 bits integer + 11 bits fractional - * - if you want to count load-averages more often, you need more - * precision, or rounding will get you. With 2-second counting freq, - * the EXP_n values would be 1981, 2034 and 2043 if still using only - * 11 bit fractions. - */ -extern unsigned long avenrun[]; /* Load averages */ - -#define FSHIFT 11 /* nr of bits of precision */ -#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ -#define LOAD_FREQ (5*HZ) /* 5 sec intervals */ -#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ -#define EXP_5 2014 /* 1/exp(5sec/5min) */ -#define EXP_15 2037 /* 1/exp(5sec/15min) */ - -#define CALC_LOAD(load,exp,n) \ - load *= exp; \ - load += n*(FIXED_1-exp); \ - load >>= FSHIFT; - -#define CT_TO_SECS(x) ((x) / HZ) -#define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ) - -extern int nr_running, nr_threads; -extern int last_pid; - -#include <linux/fs.h> -#include <linux/time.h> -#include <linux/param.h> -#include <linux/resource.h> -#ifdef __KERNEL__ -#include <linux/timer.h> -#endif - -#include <asm/processor.h> - -#define TASK_RUNNING 0 -#define TASK_INTERRUPTIBLE 1 -#define TASK_UNINTERRUPTIBLE 2 -#define TASK_ZOMBIE 4 -#define TASK_STOPPED 8 - -#define __set_task_state(tsk, state_value) \ - do { (tsk)->state = (state_value); } while (0) -#define set_task_state(tsk, state_value) \ - set_mb((tsk)->state, (state_value)) - -#define __set_current_state(state_value) \ - do { current->state = (state_value); } while (0) -#define set_current_state(state_value) \ - set_mb(current->state, (state_value)) - -/* - * Scheduling policies - */ -#define SCHED_OTHER 0 -#define SCHED_FIFO 1 -#define SCHED_RR 2 - -/* - * This is an additional bit set when we want to - * yield the CPU for one re-schedule.. - */ -#define SCHED_YIELD 0x10 - -struct sched_param { - int sched_priority; -}; - -struct completion; - -#ifdef __KERNEL__ - -#include <linux/spinlock.h> - -/* - * This serializes "schedule()" and also protects - * the run-queue from deletions/modifications (but - * _adding_ to the beginning of the run-queue has - * a separate lock). - */ -extern rwlock_t tasklist_lock; -extern spinlock_t runqueue_lock; -extern spinlock_t mmlist_lock; - -extern void sched_init(void); -extern void init_idle(void); -extern void show_state(void); -extern void cpu_init (void); -extern void trap_init(void); -extern void update_process_times(int user); -#ifdef CONFIG_NO_IDLE_HZ -extern void update_process_times_us(int user, int system); -#endif -extern void update_one_process(struct task_struct *p, unsigned long user, - unsigned long system, int cpu); - -#define MAX_SCHEDULE_TIMEOUT LONG_MAX -extern signed long FASTCALL(schedule_timeout(signed long timeout)); -asmlinkage void schedule(void); - -extern int schedule_task(struct tq_struct *task); -extern void flush_scheduled_tasks(void); -extern int start_context_thread(void); -extern int current_is_keventd(void); - -#if CONFIG_SMP -extern void set_cpus_allowed(struct task_struct *p, unsigned long new_mask); -#else -# define set_cpus_allowed(p, new_mask) do { } while (0) -#endif - -/* - * The default fd array needs to be at least BITS_PER_LONG, - * as this is the granularity returned by copy_fdset(). - */ -#define NR_OPEN_DEFAULT BITS_PER_LONG - -struct namespace; -/* - * Open file table structure - */ -struct files_struct { - atomic_t count; - rwlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ - int max_fds; - int max_fdset; - int next_fd; - struct file ** fd; /* current fd array */ - fd_set *close_on_exec; - fd_set *open_fds; - fd_set close_on_exec_init; - fd_set open_fds_init; - struct file * fd_array[NR_OPEN_DEFAULT]; -}; - -#define INIT_FILES \ -{ \ - count: ATOMIC_INIT(1), \ - file_lock: RW_LOCK_UNLOCKED, \ - max_fds: NR_OPEN_DEFAULT, \ - max_fdset: __FD_SETSIZE, \ - next_fd: 0, \ - fd: &init_files.fd_array[0], \ - close_on_exec: &init_files.close_on_exec_init, \ - open_fds: &init_files.open_fds_init, \ - close_on_exec_init: { { 0, } }, \ - open_fds_init: { { 0, } }, \ - fd_array: { NULL, } \ -} - -/* Maximum number of active map areas.. This is a random (large) number */ -#define DEFAULT_MAX_MAP_COUNT (65536) - -extern int max_map_count; - -struct mm_struct { - struct vm_area_struct * mmap; /* list of VMAs */ - rb_root_t mm_rb; - struct vm_area_struct * mmap_cache; /* last find_vma result */ - pgd_t * pgd; - atomic_t mm_users; /* How many users with user space? */ - atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ - int map_count; /* number of VMAs */ - struct rw_semaphore mmap_sem; - spinlock_t page_table_lock; /* Protects task page tables and mm->rss */ - - struct list_head mmlist; /* List of all active mm's. These are globally strung - * together off init_mm.mmlist, and are protected - * by mmlist_lock - */ - - unsigned long start_code, end_code, start_data, end_data; - unsigned long start_brk, brk, start_stack; - unsigned long arg_start, arg_end, env_start, env_end; - unsigned long rss, total_vm, locked_vm; - unsigned long def_flags; - unsigned long cpu_vm_mask; - unsigned long swap_address; - - unsigned dumpable:1; - - /* Architecture-specific MM context */ - mm_context_t context; -}; - -extern int mmlist_nr; - -#define INIT_MM(name) \ -{ \ - mm_rb: RB_ROOT, \ - pgd: swapper_pg_dir, \ - mm_users: ATOMIC_INIT(2), \ - mm_count: ATOMIC_INIT(1), \ - mmap_sem: __RWSEM_INITIALIZER(name.mmap_sem), \ - page_table_lock: SPIN_LOCK_UNLOCKED, \ - mmlist: LIST_HEAD_INIT(name.mmlist), \ -} - -struct signal_struct { - atomic_t count; - struct k_sigaction action[_NSIG]; - spinlock_t siglock; -}; - - -#define INIT_SIGNALS { \ - count: ATOMIC_INIT(1), \ - action: { {{0,}}, }, \ - siglock: SPIN_LOCK_UNLOCKED \ -} - -/* - * Some day this will be a full-fledged user tracking system.. - */ -struct user_struct { - atomic_t __count; /* reference count */ - atomic_t processes; /* How many processes does this user have? */ - atomic_t files; /* How many open files does this user have? */ - - /* Hash table maintenance information */ - struct user_struct *next, **pprev; - uid_t uid; -}; - -#define get_current_user() ({ \ - struct user_struct *__tmp_user = current->user; \ - atomic_inc(&__tmp_user->__count); \ - __tmp_user; }) - -extern struct user_struct root_user; -#define INIT_USER (&root_user) - -struct task_struct { - /* - * offsets of these are hardcoded elsewhere - touch with care - */ - volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - unsigned long flags; /* per process flags, defined below */ - int sigpending; - mm_segment_t addr_limit; /* thread address space: - 0-0xBFFFFFFF for user-thead - 0-0xFFFFFFFF for kernel-thread - */ - struct exec_domain *exec_domain; - volatile long need_resched; - unsigned long ptrace; - - int lock_depth; /* Lock depth */ - -/* - * offset 32 begins here on 32-bit platforms. We keep - * all fields in a single cacheline that are needed for - * the goodness() loop in schedule(). - */ - long counter; - long nice; - unsigned long policy; - struct mm_struct *mm; - int processor; - /* - * cpus_runnable is ~0 if the process is not running on any - * CPU. It's (1 << cpu) if it's running on a CPU. This mask - * is updated under the runqueue lock. - * - * To determine whether a process might run on a CPU, this - * mask is AND-ed with cpus_allowed. - */ - unsigned long cpus_runnable, cpus_allowed; - /* - * (only the 'next' pointer fits into the cacheline, but - * that's just fine.) - */ - struct list_head run_list; - unsigned long sleep_time; - - struct task_struct *next_task, *prev_task; - struct mm_struct *active_mm; - struct list_head local_pages; - unsigned int allocation_order, nr_local_pages; - -/* task state */ - struct linux_binfmt *binfmt; - int exit_code, exit_signal; - int pdeath_signal; /* The signal sent when the parent dies */ - /* ??? */ - unsigned long personality; - int did_exec:1; - unsigned task_dumpable:1; - pid_t pid; - pid_t pgrp; - pid_t tty_old_pgrp; - pid_t session; - pid_t tgid; - /* boolean value for session group leader */ - int leader; - /* - * pointers to (original) parent process, youngest child, younger sibling, - * older sibling, respectively. (p->father can be replaced with - * p->p_pptr->pid) - */ - struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr; - struct list_head thread_group; - - /* PID hash table linkage. */ - struct task_struct *pidhash_next; - struct task_struct **pidhash_pprev; - - wait_queue_head_t wait_chldexit; /* for wait4() */ - struct completion *vfork_done; /* for vfork() */ - unsigned long rt_priority; - unsigned long it_real_value, it_prof_value, it_virt_value; - unsigned long it_real_incr, it_prof_incr, it_virt_incr; - struct timer_list real_timer; - struct tms times; - unsigned long start_time; - long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS]; -/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ - unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap; - int swappable:1; -/* process credentials */ - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - int ngroups; - gid_t groups[NGROUPS]; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted; - int keep_capabilities:1; - struct user_struct *user; -/* limits */ - struct rlimit rlim[RLIM_NLIMITS]; - unsigned short used_math; - char comm[16]; -/* file system info */ - int link_count, total_link_count; - struct tty_struct *tty; /* NULL if no tty */ - unsigned int locks; /* How many file locks are being held */ -/* ipc stuff */ - struct sem_undo *semundo; - struct sem_queue *semsleeping; -/* CPU-specific state of this task */ - struct thread_struct thread; -/* filesystem information */ - struct fs_struct *fs; -/* open file information */ - struct files_struct *files; -/* namespace */ - struct namespace *namespace; -/* signal handlers */ - spinlock_t sigmask_lock; /* Protects signal and blocked */ - struct signal_struct *sig; - - sigset_t blocked; - struct sigpending pending; - - unsigned long sas_ss_sp; - size_t sas_ss_size; - int (*notifier)(void *priv); - void *notifier_data; - sigset_t *notifier_mask; - -/* Thread group tracking */ - u32 parent_exec_id; - u32 self_exec_id; -/* Protection of (de-)allocation: mm, files, fs, tty */ - spinlock_t alloc_lock; - -/* journalling filesystem info */ - void *journal_info; -}; - -/* - * Per process flags - */ -#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ - /* Not implemented yet, only for 486*/ -#define PF_STARTING 0x00000002 /* being created */ -#define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ -#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ -#define PF_DUMPCORE 0x00000200 /* dumped core */ -#define PF_SIGNALED 0x00000400 /* killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_MEMDIE 0x00001000 /* Killed for out-of-memory */ -#define PF_FREE_PAGES 0x00002000 /* per process page freeing */ -#define PF_NOIO 0x00004000 /* avoid generating further I/O */ -#define PF_FSTRANS 0x00008000 /* inside a filesystem transaction */ - -#define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */ - -/* - * Ptrace flags - */ - -#define PT_PTRACED 0x00000001 -#define PT_TRACESYS 0x00000002 -#define PT_DTRACE 0x00000004 /* delayed trace (used on m68k, i386) */ -#define PT_TRACESYSGOOD 0x00000008 -#define PT_PTRACE_CAP 0x00000010 /* ptracer can follow suid-exec */ - -#define is_dumpable(tsk) ((tsk)->task_dumpable && (tsk)->mm && (tsk)->mm->dumpable) - -/* - * Limit the stack by to some sane default: root can always - * increase this limit if needed.. 8MB seems reasonable. - */ -#define _STK_LIM (8*1024*1024) - -#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */ -#define MAX_COUNTER (20*HZ/100) -#define DEF_NICE (0) - -extern void yield(void); - -/* - * The default (Linux) execution domain. - */ -extern struct exec_domain default_exec_domain; - -/* - * INIT_TASK is used to set up the first task table, touch at - * your own risk!. Base=0, limit=0x1fffff (=2MB) - */ -#define INIT_TASK(tsk) \ -{ \ - state: 0, \ - flags: 0, \ - sigpending: 0, \ - addr_limit: KERNEL_DS, \ - exec_domain: &default_exec_domain, \ - lock_depth: -1, \ - counter: DEF_COUNTER, \ - nice: DEF_NICE, \ - policy: SCHED_OTHER, \ - mm: NULL, \ - active_mm: &init_mm, \ - cpus_runnable: ~0UL, \ - cpus_allowed: ~0UL, \ - run_list: LIST_HEAD_INIT(tsk.run_list), \ - next_task: &tsk, \ - prev_task: &tsk, \ - p_opptr: &tsk, \ - p_pptr: &tsk, \ - thread_group: LIST_HEAD_INIT(tsk.thread_group), \ - wait_chldexit: __WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\ - real_timer: { \ - function: it_real_fn \ - }, \ - cap_effective: CAP_INIT_EFF_SET, \ - cap_inheritable: CAP_INIT_INH_SET, \ - cap_permitted: CAP_FULL_SET, \ - keep_capabilities: 0, \ - rlim: INIT_RLIMITS, \ - user: INIT_USER, \ - comm: "swapper", \ - thread: INIT_THREAD, \ - fs: &init_fs, \ - files: &init_files, \ - sigmask_lock: SPIN_LOCK_UNLOCKED, \ - sig: &init_signals, \ - pending: { NULL, &tsk.pending.head, {{0}}}, \ - blocked: {{0}}, \ - alloc_lock: SPIN_LOCK_UNLOCKED, \ - journal_info: NULL, \ -} - - -#ifndef INIT_TASK_SIZE -# define INIT_TASK_SIZE 2048*sizeof(long) -#endif - -union task_union { - struct task_struct task; - unsigned long stack[INIT_TASK_SIZE/sizeof(long)]; -}; - -extern union task_union init_task_union; - -extern struct mm_struct init_mm; -extern struct task_struct *init_tasks[NR_CPUS]; - -/* PID hashing. (shouldnt this be dynamic?) */ -#define PIDHASH_SZ (4096 >> 2) -extern struct task_struct *pidhash[PIDHASH_SZ]; - -#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) - -static inline void hash_pid(struct task_struct *p) -{ - struct task_struct **htable = &pidhash[pid_hashfn(p->pid)]; - - if((p->pidhash_next = *htable) != NULL) - (*htable)->pidhash_pprev = &p->pidhash_next; - *htable = p; - p->pidhash_pprev = htable; -} - -static inline void unhash_pid(struct task_struct *p) -{ - if(p->pidhash_next) - p->pidhash_next->pidhash_pprev = p->pidhash_pprev; - *p->pidhash_pprev = p->pidhash_next; -} - -static inline struct task_struct *find_task_by_pid(int pid) -{ - struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)]; - - for(p = *htable; p && p->pid != pid; p = p->pidhash_next) - ; - - return p; -} - -#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL) - -static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu) -{ - tsk->processor = cpu; - tsk->cpus_runnable = 1UL << cpu; -} - -static inline void task_release_cpu(struct task_struct *tsk) -{ - tsk->cpus_runnable = ~0UL; -} - -/* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); -extern void free_uid(struct user_struct *); -extern void switch_uid(struct user_struct *); - -#include <asm/current.h> - -extern unsigned long volatile jiffies; -extern unsigned long itimer_ticks; -extern unsigned long itimer_next; -extern struct timeval xtime; -extern void do_timer(struct pt_regs *); -#ifdef CONFIG_NO_IDLE_HZ -extern void do_timer_ticks(int ticks); -#endif - -extern unsigned int * prof_buffer; -extern unsigned long prof_len; -extern unsigned long prof_shift; - -#define CURRENT_TIME (xtime.tv_sec) - -extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr)); -extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)); -extern void FASTCALL(sleep_on(wait_queue_head_t *q)); -extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q, - signed long timeout)); -extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q)); -extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q, - signed long timeout)); -extern int FASTCALL(wake_up_process(struct task_struct * tsk)); - -#define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1) -#define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr) -#define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0) -#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1) -#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr) -#define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1) -#define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr) -#define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0) -#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1) -#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr) -asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru); - -extern int in_group_p(gid_t); -extern int in_egroup_p(gid_t); - -extern void proc_caches_init(void); -extern void flush_signals(struct task_struct *); -extern void flush_signal_handlers(struct task_struct *); -extern void sig_exit(int, int, struct siginfo *); -extern int dequeue_signal(sigset_t *, siginfo_t *); -extern void block_all_signals(int (*notifier)(void *priv), void *priv, - sigset_t *mask); -extern void unblock_all_signals(void); -extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sig_info(int, struct siginfo *, struct task_struct *); -extern int kill_pg_info(int, struct siginfo *, pid_t); -extern int kill_sl_info(int, struct siginfo *, pid_t); -extern int kill_proc_info(int, struct siginfo *, pid_t); -extern void notify_parent(struct task_struct *, int); -extern void do_notify_parent(struct task_struct *, int); -extern void force_sig(int, struct task_struct *); -extern int send_sig(int, struct task_struct *, int); -extern int kill_pg(pid_t, int, int); -extern int kill_sl(pid_t, int, int); -extern int kill_proc(pid_t, int, int); -extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *); -extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long); - -static inline int signal_pending(struct task_struct *p) -{ - return (p->sigpending != 0); -} - -/* - * Re-calculate pending state from the set of locally pending - * signals, globally pending signals, and blocked signals. - */ -static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) -{ - unsigned long ready; - long i; - - switch (_NSIG_WORDS) { - default: - for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;) - ready |= signal->sig[i] &~ blocked->sig[i]; - break; - - case 4: ready = signal->sig[3] &~ blocked->sig[3]; - ready |= signal->sig[2] &~ blocked->sig[2]; - ready |= signal->sig[1] &~ blocked->sig[1]; - ready |= signal->sig[0] &~ blocked->sig[0]; - break; - - case 2: ready = signal->sig[1] &~ blocked->sig[1]; - ready |= signal->sig[0] &~ blocked->sig[0]; - break; - - case 1: ready = signal->sig[0] &~ blocked->sig[0]; - } - return ready != 0; -} - -/* Reevaluate whether the task has signals pending delivery. - This is required every time the blocked sigset_t changes. - All callers should have t->sigmask_lock. */ - -static inline void recalc_sigpending(struct task_struct *t) -{ - t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked); -} - -/* True if we are on the alternate signal stack. */ - -static inline int on_sig_stack(unsigned long sp) -{ - return (sp - current->sas_ss_sp < current->sas_ss_size); -} - -static inline int sas_ss_flags(unsigned long sp) -{ - return (current->sas_ss_size == 0 ? SS_DISABLE - : on_sig_stack(sp) ? SS_ONSTACK : 0); -} - -extern int request_irq(unsigned int, - void (*handler)(int, void *, struct pt_regs *), - unsigned long, const char *, void *); -extern void free_irq(unsigned int, void *); - -/* - * This has now become a routine instead of a macro, it sets a flag if - * it returns true (to do BSD-style accounting where the process is flagged - * if it uses root privs). The implication of this is that you should do - * normal permissions checks first, and check suser() last. - * - * [Dec 1997 -- Chris Evans] - * For correctness, the above considerations need to be extended to - * fsuser(). This is done, along with moving fsuser() checks to be - * last. - * - * These will be removed, but in the mean time, when the SECURE_NOROOT - * flag is set, uids don't grant privilege. - */ -static inline int suser(void) -{ - if (!issecure(SECURE_NOROOT) && current->euid == 0) { - current->flags |= PF_SUPERPRIV; - return 1; - } - return 0; -} - -static inline int fsuser(void) -{ - if (!issecure(SECURE_NOROOT) && current->fsuid == 0) { - current->flags |= PF_SUPERPRIV; - return 1; - } - return 0; -} - -/* - * capable() checks for a particular capability. - * New privilege checks should use this interface, rather than suser() or - * fsuser(). See include/linux/capability.h for defined capabilities. - */ - -static inline int capable(int cap) -{ -#if 1 /* ok now */ - if (cap_raised(current->cap_effective, cap)) -#else - if (cap_is_fs_cap(cap) ? current->fsuid == 0 : current->euid == 0) -#endif - { - current->flags |= PF_SUPERPRIV; - return 1; - } - return 0; -} - -/* - * Routines for handling mm_structs - */ -extern struct mm_struct * mm_alloc(void); - -extern struct mm_struct * start_lazy_tlb(void); -extern void end_lazy_tlb(struct mm_struct *mm); - -/* mmdrop drops the mm and the page tables */ -extern void FASTCALL(__mmdrop(struct mm_struct *)); -static inline void mmdrop(struct mm_struct * mm) -{ - if (atomic_dec_and_test(&mm->mm_count)) - __mmdrop(mm); -} - -/* mmput gets rid of the mappings and all user-space */ -extern void mmput(struct mm_struct *); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(void); - -/* - * Routines for handling the fd arrays - */ -extern struct file ** alloc_fd_array(int); -extern int expand_fd_array(struct files_struct *, int nr); -extern void free_fd_array(struct file **, int); - -extern fd_set *alloc_fdset(int); -extern int expand_fdset(struct files_struct *, int nr); -extern void free_fdset(fd_set *, int); - -extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); -extern void flush_thread(void); -extern void exit_thread(void); - -extern void exit_mm(struct task_struct *); -extern void exit_files(struct task_struct *); -extern void exit_sighand(struct task_struct *); - -extern void reparent_to_init(void); -extern void daemonize(void); - -extern int do_execve(char *, char **, char **, struct pt_regs *); -extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long); - -extern void set_task_comm(struct task_struct *tsk, char *from); -extern void get_task_comm(char *to, struct task_struct *tsk); - -extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); -extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)); -extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); - -extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); - -#define __wait_event(wq, condition) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - schedule(); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_event(wq, condition) \ -do { \ - if (condition) \ - break; \ - __wait_event(wq, condition); \ -} while (0) - -#define __wait_event_interruptible(wq, condition, ret) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (!signal_pending(current)) { \ - schedule(); \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - break; \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_event_interruptible(wq, condition) \ -({ \ - int __ret = 0; \ - if (!(condition)) \ - __wait_event_interruptible(wq, condition, __ret); \ - __ret; \ -}) - -#define REMOVE_LINKS(p) do { \ - (p)->next_task->prev_task = (p)->prev_task; \ - (p)->prev_task->next_task = (p)->next_task; \ - if ((p)->p_osptr) \ - (p)->p_osptr->p_ysptr = (p)->p_ysptr; \ - if ((p)->p_ysptr) \ - (p)->p_ysptr->p_osptr = (p)->p_osptr; \ - else \ - (p)->p_pptr->p_cptr = (p)->p_osptr; \ - } while (0) - -#define SET_LINKS(p) do { \ - (p)->next_task = &init_task; \ - (p)->prev_task = init_task.prev_task; \ - init_task.prev_task->next_task = (p); \ - init_task.prev_task = (p); \ - (p)->p_ysptr = NULL; \ - if (((p)->p_osptr = (p)->p_pptr->p_cptr) != NULL) \ - (p)->p_osptr->p_ysptr = p; \ - (p)->p_pptr->p_cptr = p; \ - } while (0) - -#define for_each_task(p) \ - for (p = &init_task ; (p = p->next_task) != &init_task ; ) - -#define for_each_thread(task) \ - for (task = next_thread(current) ; task != current ; task = next_thread(task)) - -#define next_thread(p) \ - list_entry((p)->thread_group.next, struct task_struct, thread_group) - -#define thread_group_leader(p) (p->pid == p->tgid) - -static inline void del_from_runqueue(struct task_struct * p) -{ - nr_running--; - p->sleep_time = jiffies; - list_del(&p->run_list); - p->run_list.next = NULL; -} - -static inline int task_on_runqueue(struct task_struct *p) -{ - return (p->run_list.next != NULL); -} - -static inline void unhash_process(struct task_struct *p) -{ - if (task_on_runqueue(p)) - out_of_line_bug(); - write_lock_irq(&tasklist_lock); - nr_threads--; - unhash_pid(p); - REMOVE_LINKS(p); - list_del(&p->thread_group); - write_unlock_irq(&tasklist_lock); -} - -/* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */ -static inline void task_lock(struct task_struct *p) -{ - spin_lock(&p->alloc_lock); -} - -static inline void task_unlock(struct task_struct *p) -{ - spin_unlock(&p->alloc_lock); -} - -/* write full pathname into buffer and return start of pathname */ -static inline char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, - char *buf, int buflen) -{ - char *res; - struct vfsmount *rootmnt; - struct dentry *root; - read_lock(&current->fs->lock); - rootmnt = mntget(current->fs->rootmnt); - root = dget(current->fs->root); - read_unlock(&current->fs->lock); - spin_lock(&dcache_lock); - res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); - spin_unlock(&dcache_lock); - dput(root); - mntput(rootmnt); - return res; -} - -static inline int need_resched(void) -{ - return (unlikely(current->need_resched)); -} - -extern void __cond_resched(void); -static inline void cond_resched(void) -{ - if (need_resched()) - __cond_resched(); -} - -#endif /* __KERNEL__ */ -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/linux/skbuff.h --- a/linux-2.4-xen-sparse/include/linux/skbuff.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,1181 +0,0 @@ -/* - * Definitions for the 'struct sk_buff' memory handlers. - * - * Authors: - * Alan Cox, <gw4pts@xxxxxxxxxxxxxxx> - * Florian La Roche, <rzsfl@xxxxxxxxxxxx> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_SKBUFF_H -#define _LINUX_SKBUFF_H - -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/time.h> -#include <linux/cache.h> - -#include <asm/atomic.h> -#include <asm/types.h> -#include <linux/spinlock.h> -#include <linux/mm.h> -#include <linux/highmem.h> - -#define HAVE_ALLOC_SKB /* For the drivers to know */ -#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ -#define SLAB_SKB /* Slabified skbuffs */ - -#define CHECKSUM_NONE 0 -#define CHECKSUM_HW 1 -#define CHECKSUM_UNNECESSARY 2 - -#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) -#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) -#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) -#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) - -/* A. Checksumming of received packets by device. - * - * NONE: device failed to checksum this packet. - * skb->csum is undefined. - * - * UNNECESSARY: device parsed packet and wouldbe verified checksum. - * skb->csum is undefined. - * It is bad option, but, unfortunately, many of vendors do this. - * Apparently with secret goal to sell you new device, when you - * will add new protocol to your host. F.e. IPv6. 8) - * - * HW: the most generic way. Device supplied checksum of _all_ - * the packet as seen by netif_rx in skb->csum. - * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use HW, - * not UNNECESSARY. - * - * B. Checksumming on output. - * - * NONE: skb is checksummed by protocol or csum is not required. - * - * HW: device is required to csum packet as seen by hard_start_xmit - * from skb->h.raw to the end and to record the checksum - * at skb->h.raw+skb->csum. - * - * Device must show its capabilities in dev->features, set - * at device setup time. - * NETIF_F_HW_CSUM - it is clever device, it is able to checksum - * everything. - * NETIF_F_NO_CSUM - loopback or reliable single hop media. - * NETIF_F_IP_CSUM - device is dumb. It is able to csum only - * TCP/UDP over IPv4. Sigh. Vendors like this - * way by an unknown reason. Though, see comment above - * about CHECKSUM_UNNECESSARY. 8) - * - * Any questions? No questions, good. --ANK - */ - -#ifdef __i386__ -#define NET_CALLER(arg) (*(((void**)&arg)-1)) -#else -#define NET_CALLER(arg) __builtin_return_address(0) -#endif - -#ifdef CONFIG_NETFILTER -struct nf_conntrack { - atomic_t use; - void (*destroy)(struct nf_conntrack *); -}; - -struct nf_ct_info { - struct nf_conntrack *master; -}; -#endif - -struct sk_buff_head { - /* These two members must be first. */ - struct sk_buff * next; - struct sk_buff * prev; - - __u32 qlen; - spinlock_t lock; -}; - -struct sk_buff; - -#define MAX_SKB_FRAGS 6 - -typedef struct skb_frag_struct skb_frag_t; - -struct skb_frag_struct -{ - struct page *page; - __u16 page_offset; - __u16 size; -}; - -/* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ -struct skb_shared_info { - atomic_t dataref; - unsigned int nr_frags; - struct sk_buff *frag_list; - skb_frag_t frags[MAX_SKB_FRAGS]; -}; - -struct sk_buff { - /* These two members must be first. */ - struct sk_buff * next; /* Next buffer in list */ - struct sk_buff * prev; /* Previous buffer in list */ - - struct sk_buff_head * list; /* List we are on */ - struct sock *sk; /* Socket we are owned by */ - struct timeval stamp; /* Time we arrived */ - struct net_device *dev; /* Device we arrived on/are leaving by */ - struct net_device *real_dev; /* For support of point to point protocols - (e.g. 802.3ad) over bonding, we must save the - physical device that got the packet before - replacing skb->dev with the virtual device. */ - - /* Transport layer header */ - union - { - struct tcphdr *th; - struct udphdr *uh; - struct icmphdr *icmph; - struct igmphdr *igmph; - struct iphdr *ipiph; - struct spxhdr *spxh; - unsigned char *raw; - } h; - - /* Network layer header */ - union - { - struct iphdr *iph; - struct ipv6hdr *ipv6h; - struct arphdr *arph; - struct ipxhdr *ipxh; - unsigned char *raw; - } nh; - - /* Link layer header */ - union - { - struct ethhdr *ethernet; - unsigned char *raw; - } mac; - - struct dst_entry *dst; - - /* - * This is the control buffer. It is free to use for every - * layer. Please put your private variables there. If you - * want to keep them across layers you have to do a skb_clone() - * first. This is owned by whoever has the skb queued ATM. - */ - char cb[48]; - - unsigned int len; /* Length of actual data */ - unsigned int data_len; - unsigned int csum; /* Checksum */ - unsigned char __unused, /* Dead field, may be reused */ - cloned, /* head may be cloned (check refcnt to be sure). */ - pkt_type, /* Packet class */ - ip_summed; /* Driver fed us an IP checksum */ - __u32 priority; /* Packet queueing priority */ - atomic_t users; /* User count - see datagram.c,tcp.c */ - unsigned short protocol; /* Packet protocol from driver. */ - unsigned short security; /* Security level of packet */ - unsigned int truesize; /* Buffer size */ - - unsigned char *head; /* Head of buffer */ - unsigned char *data; /* Data head pointer */ - unsigned char *tail; /* Tail pointer */ - unsigned char *end; /* End pointer */ - - void (*destructor)(struct sk_buff *); /* Destruct function */ -#ifdef CONFIG_NETFILTER - /* Can be used for communication between hooks. */ - unsigned long nfmark; - /* Cache info */ - __u32 nfcache; - /* Associated connection, if any */ - struct nf_ct_info *nfct; -#ifdef CONFIG_NETFILTER_DEBUG - unsigned int nf_debug; -#endif -#endif /*CONFIG_NETFILTER*/ - -#if defined(CONFIG_HIPPI) - union{ - __u32 ifield; - } private; -#endif - -#ifdef CONFIG_NET_SCHED - __u32 tc_index; /* traffic control index */ -#endif -}; - -#ifdef __KERNEL__ -/* - * Handling routines are only of interest to the kernel - */ -#include <linux/slab.h> - -#include <asm/system.h> - -extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff * alloc_skb(unsigned int size, int priority); -extern struct sk_buff * alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, int priority); -extern void kfree_skbmem(struct sk_buff *skb); -extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); -extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); -extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); -extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); -extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); -extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, - int newheadroom, - int newtailroom, - int priority); -extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); -#define dev_kfree_skb(a) kfree_skb(a) -extern void skb_over_panic(struct sk_buff *skb, int len, void *here); -extern void skb_under_panic(struct sk_buff *skb, int len, void *here); - -/* Internal */ -#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) - -/** - * skb_queue_empty - check if a queue is empty - * @list: queue head - * - * Returns true if the queue is empty, false otherwise. - */ - -static inline int skb_queue_empty(struct sk_buff_head *list) -{ - return (list->next == (struct sk_buff *) list); -} - -/** - * skb_get - reference buffer - * @skb: buffer to reference - * - * Makes another reference to a socket buffer and returns a pointer - * to the buffer. - */ - -static inline struct sk_buff *skb_get(struct sk_buff *skb) -{ - atomic_inc(&skb->users); - return skb; -} - -/* - * If users==1, we are the only owner and are can avoid redundant - * atomic change. - */ - -/** - * kfree_skb - free an sk_buff - * @skb: buffer to free - * - * Drop a reference to the buffer and free it if the usage count has - * hit zero. - */ - -static inline void kfree_skb(struct sk_buff *skb) -{ - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) - return; - __kfree_skb(skb); -} - -/** - * skb_cloned - is the buffer a clone - * @skb: buffer to check - * - * Returns true if the buffer was generated with skb_clone() and is - * one of multiple shared copies of the buffer. Cloned buffers are - * shared data so must not be written to under normal circumstances. - */ - -static inline int skb_cloned(struct sk_buff *skb) -{ - return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; -} - -/** - * skb_shared - is the buffer shared - * @skb: buffer to check - * - * Returns true if more than one person has a reference to this - * buffer. - */ - -static inline int skb_shared(struct sk_buff *skb) -{ - return (atomic_read(&skb->users) != 1); -} - -/** - * skb_share_check - check if buffer is shared and if so clone it - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the buffer is shared the buffer is cloned and the old copy - * drops a reference. A new clone with a single reference is returned. - * If the buffer is not shared the original buffer is returned. When - * being called from interrupt status or with spinlocks held pri must - * be GFP_ATOMIC. - * - * NULL is returned on a memory allocation failure. - */ - -static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) -{ - if (skb_shared(skb)) { - struct sk_buff *nskb; - nskb = skb_clone(skb, pri); - kfree_skb(skb); - return nskb; - } - return skb; -} - - -/* - * Copy shared buffers into a new sk_buff. We effectively do COW on - * packets to handle cases where we have a local reader and forward - * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. - */ - -/** - * skb_unshare - make a copy of a shared buffer - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the socket buffer is a clone then this function creates a new - * copy of the data, drops a reference count on the old copy and returns - * the new copy with the reference count at 1. If the buffer is not a clone - * the original buffer is returned. When called with a spinlock held or - * from interrupt state @pri must be %GFP_ATOMIC - * - * %NULL is returned on a memory allocation failure. - */ - -static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) -{ - struct sk_buff *nskb; - if(!skb_cloned(skb)) - return skb; - nskb=skb_copy(skb, pri); - kfree_skb(skb); /* Free our shared copy */ - return nskb; -} - -/** - * skb_peek - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the head element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ - -static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->next; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; -} - -/** - * skb_peek_tail - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the tail element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ - -static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->prev; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; -} - -/** - * skb_queue_len - get queue length - * @list_: list to measure - * - * Return the length of an &sk_buff queue. - */ - -static inline __u32 skb_queue_len(struct sk_buff_head *list_) -{ - return(list_->qlen); -} - -static inline void skb_queue_head_init(struct sk_buff_head *list) -{ - spin_lock_init(&list->lock); - list->prev = (struct sk_buff *)list; - list->next = (struct sk_buff *)list; - list->qlen = 0; -} - -/* - * Insert an sk_buff at the start of a list. - * - * The "__skb_xxxx()" functions are the non-atomic ones that - * can only be called with interrupts disabled. - */ - -/** - * __skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) -{ - struct sk_buff *prev, *next; - - newsk->list = list; - list->qlen++; - prev = (struct sk_buff *)list; - next = prev->next; - newsk->next = next; - newsk->prev = prev; - next->prev = newsk; - prev->next = newsk; -} - - -/** - * skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_head(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * __skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the end of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ - - -static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -{ - struct sk_buff *prev, *next; - - newsk->list = list; - list->qlen++; - next = (struct sk_buff *)list; - prev = next->prev; - newsk->next = next; - newsk->prev = prev; - next->prev = newsk; - prev->next = newsk; -} - -/** - * skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the tail of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_tail(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * __skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. This function does not take any locks - * so must be used with appropriate locks held only. The head item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) -{ - struct sk_buff *next, *prev, *result; - - prev = (struct sk_buff *) list; - next = prev->next; - result = NULL; - if (next != prev) { - result = next; - next = next->next; - list->qlen--; - next->prev = prev; - prev->next = next; - result->next = NULL; - result->prev = NULL; - result->list = NULL; - } - return result; -} - -/** - * skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The head item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -/* - * Insert a packet on a list. - */ - -static inline void __skb_insert(struct sk_buff *newsk, - struct sk_buff * prev, struct sk_buff *next, - struct sk_buff_head * list) -{ - newsk->next = next; - newsk->prev = prev; - next->prev = newsk; - prev->next = newsk; - newsk->list = list; - list->qlen++; -} - -/** - * skb_insert - insert a buffer - * @old: buffer to insert before - * @newsk: buffer to insert - * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); -} - -/* - * Place a packet after a given packet in a list. - */ - -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) -{ - __skb_insert(newsk, old, old->next, old->list); -} - -/** - * skb_append - append a buffer - * @old: buffer to insert after - * @newsk: buffer to insert - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls. - * A buffer cannot be placed on two lists at the same time. - */ - - -static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); -} - -/* - * remove sk_buff from list. _Must_ be called atomically, and with - * the list known.. - */ - -static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) -{ - struct sk_buff * next, * prev; - - list->qlen--; - next = skb->next; - prev = skb->prev; - skb->next = NULL; - skb->prev = NULL; - skb->list = NULL; - next->prev = prev; - prev->next = next; -} - -/** - * skb_unlink - remove a buffer from a list - * @skb: buffer to remove - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls - * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. - */ - -static inline void skb_unlink(struct sk_buff *skb) -{ - struct sk_buff_head *list = skb->list; - - if(list) { - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - if(skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } -} - -/* XXX: more streamlined implementation */ - -/** - * __skb_dequeue_tail - remove from the tail of the queue - * @list: list to dequeue from - * - * Remove the tail of the list. This function does not take any locks - * so must be used with appropriate locks held only. The tail item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek_tail(list); - if (skb) - __skb_unlink(skb, list); - return skb; -} - -/** - * skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The tail item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue_tail(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -static inline int skb_is_nonlinear(const struct sk_buff *skb) -{ - return skb->data_len; -} - -static inline unsigned int skb_headlen(const struct sk_buff *skb) -{ - return skb->len - skb->data_len; -} - -#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0) -#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0) -#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0) - -/* - * Add data to an sk_buff - */ - -static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp=skb->tail; - SKB_LINEAR_ASSERT(skb); - skb->tail+=len; - skb->len+=len; - return tmp; -} - -/** - * skb_put - add data to a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer. If this would - * exceed the total buffer size the kernel will panic. A pointer to the - * first byte of the extra data is returned. - */ - -static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp=skb->tail; - SKB_LINEAR_ASSERT(skb); - skb->tail+=len; - skb->len+=len; - if(skb->tail>skb->end) { - skb_over_panic(skb, len, current_text_addr()); - } - return tmp; -} - -static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data-=len; - skb->len+=len; - return skb->data; -} - -/** - * skb_push - add data to the start of a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer at the buffer - * start. If this would exceed the total buffer headroom the kernel will - * panic. A pointer to the first byte of the extra data is returned. - */ - -static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data-=len; - skb->len+=len; - if(skb->data<skb->head) { - skb_under_panic(skb, len, current_text_addr()); - } - return skb->data; -} - -static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) -{ - skb->len-=len; - if (skb->len < skb->data_len) - out_of_line_bug(); - return skb->data+=len; -} - -/** - * skb_pull - remove data from the start of a buffer - * @skb: buffer to use - * @len: amount of data to remove - * - * This function removes data from the start of a buffer, returning - * the memory to the headroom. A pointer to the next data in the buffer - * is returned. Once the data has been pulled future pushes will overwrite - * the old data. - */ - -static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb->len) - return NULL; - return __skb_pull(skb,len); -} - -extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); - -static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb_headlen(skb) && - __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) - return NULL; - skb->len -= len; - return skb->data += len; -} - -static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb->len) - return NULL; - return __pskb_pull(skb,len); -} - -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) -{ - if (len <= skb_headlen(skb)) - return 1; - if (len > skb->len) - return 0; - return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); -} - -/** - * skb_headroom - bytes at buffer head - * @skb: buffer to check - * - * Return the number of bytes of free space at the head of an &sk_buff. - */ - -static inline int skb_headroom(const struct sk_buff *skb) -{ - return skb->data-skb->head; -} - -/** - * skb_tailroom - bytes at buffer end - * @skb: buffer to check - * - * Return the number of bytes of free space at the tail of an sk_buff - */ - -static inline int skb_tailroom(const struct sk_buff *skb) -{ - return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; -} - -/** - * skb_reserve - adjust headroom - * @skb: buffer to alter - * @len: bytes to move - * - * Increase the headroom of an empty &sk_buff by reducing the tail - * room. This is only allowed for an empty buffer. - */ - -static inline void skb_reserve(struct sk_buff *skb, unsigned int len) -{ - skb->data+=len; - skb->tail+=len; -} - -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); - -static inline void __skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - } else { - ___pskb_trim(skb, len, 0); - } -} - -/** - * skb_trim - remove end from a buffer - * @skb: buffer to alter - * @len: new length - * - * Cut the length of a buffer down by removing data from the tail. If - * the buffer is already under the length specified it is not modified. - */ - -static inline void skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (skb->len > len) { - __skb_trim(skb, len); - } -} - - -static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) -{ - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - return 0; - } else { - return ___pskb_trim(skb, len, 1); - } -} - -static inline int pskb_trim(struct sk_buff *skb, unsigned int len) -{ - if (len < skb->len) - return __pskb_trim(skb, len); - return 0; -} - -/** - * skb_orphan - orphan a buffer - * @skb: buffer to orphan - * - * If a buffer currently has an owner then we call the owner's - * destructor function and make the @skb unowned. The buffer continues - * to exist but is no longer charged to its former owner. - */ - - -static inline void skb_orphan(struct sk_buff *skb) -{ - if (skb->destructor) - skb->destructor(skb); - skb->destructor = NULL; - skb->sk = NULL; -} - -/** - * skb_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function takes the list - * lock and is atomic with respect to other list locking functions. - */ - - -static inline void skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb=skb_dequeue(list))!=NULL) - kfree_skb(skb); -} - -/** - * __skb_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function does not take the - * list lock and the caller must hold the relevant locks to use it. - */ - - -static inline void __skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb=__skb_dequeue(list))!=NULL) - kfree_skb(skb); -} - -/** - * __dev_alloc_skb - allocate an skbuff for sending - * @length: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned in there is no free memory. - */ -#ifndef CONFIG_XEN -static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - int gfp_mask) -{ - struct sk_buff *skb = alloc_skb(length+16, gfp_mask); - if (skb) - skb_reserve(skb,16); - return skb; -} -#else -extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask); -#endif - -/** - * dev_alloc_skb - allocate an skbuff for sending - * @length: length to allocate - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned in there is no free memory. Although this function - * allocates memory it can be called from an interrupt. - */ - -static inline struct sk_buff *dev_alloc_skb(unsigned int length) -{ - return __dev_alloc_skb(length, GFP_ATOMIC); -} - -/** - * skb_cow - copy header of skb when it is required - * @skb: buffer to cow - * @headroom: needed headroom - * - * If the skb passed lacks sufficient headroom or its data part - * is shared, data is reallocated. If reallocation fails, an error - * is returned and original skb is not changed. - * - * The result is skb with writable area skb->head...skb->tail - * and at least @headroom of space at head. - */ - -static inline int -skb_cow(struct sk_buff *skb, unsigned int headroom) -{ - int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); - - if (delta < 0) - delta = 0; - - if (delta || skb_cloned(skb)) - return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); - return 0; -} - -/** - * skb_padto - pad an skbuff up to a minimal size - * @skb: buffer to pad - * @len: minimal length - * - * Pads up a buffer to ensure the trailing bytes exist and are - * blanked. If the buffer already contains sufficient data it - * is untouched. Returns the buffer, which may be a replacement - * for the original, or NULL for out of memory - in which case - * the original buffer is still freed. - */ - -static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) -{ - unsigned int size = skb->len; - if(likely(size >= len)) - return skb; - return skb_pad(skb, len-size); -} - -/** - * skb_linearize - convert paged skb to linear one - * @skb: buffer to linarize - * @gfp: allocation mode - * - * If there is no free memory -ENOMEM is returned, otherwise zero - * is returned and the old skb data released. */ -int skb_linearize(struct sk_buff *skb, int gfp); - -static inline void *kmap_skb_frag(const skb_frag_t *frag) -{ -#ifdef CONFIG_HIGHMEM - if (in_irq()) - out_of_line_bug(); - - local_bh_disable(); -#endif - return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); -} - -static inline void kunmap_skb_frag(void *vaddr) -{ - kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); -#ifdef CONFIG_HIGHMEM - local_bh_enable(); -#endif -} - -#define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next; \ - (skb != (struct sk_buff *)(queue)); \ - skb=skb->next) - - -extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); -extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); -extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); -extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size); -extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); -extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov); -extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); - -extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); -extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); -extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); -extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); - -extern void skb_init(void); -extern void skb_add_mtu(int mtu); - -#ifdef CONFIG_NETFILTER -static inline void -nf_conntrack_put(struct nf_ct_info *nfct) -{ - if (nfct && atomic_dec_and_test(&nfct->master->use)) - nfct->master->destroy(nfct->master); -} -static inline void -nf_conntrack_get(struct nf_ct_info *nfct) -{ - if (nfct) - atomic_inc(&nfct->master->use); -} -static inline void -nf_reset(struct sk_buff *skb) -{ - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif -} -#else /* CONFIG_NETFILTER */ -static inline void nf_reset(struct sk_buff *skb) {} -#endif /* CONFIG_NETFILTER */ - -#endif /* __KERNEL__ */ -#endif /* _LINUX_SKBUFF_H */ diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/include/linux/timer.h --- a/linux-2.4-xen-sparse/include/linux/timer.h Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,77 +0,0 @@ -#ifndef _LINUX_TIMER_H -#define _LINUX_TIMER_H - -#include <linux/config.h> -#include <linux/list.h> - -/* - * In Linux 2.4, static timers have been removed from the kernel. - * Timers may be dynamically created and destroyed, and should be initialized - * by a call to init_timer() upon creation. - * - * The "data" field enables use of a common timeout function for several - * timeouts. You can use this field to distinguish between the different - * invocations. - */ -struct timer_list { - struct list_head list; - unsigned long expires; - unsigned long data; - void (*function)(unsigned long); -}; - -extern void add_timer(struct timer_list * timer); -extern int del_timer(struct timer_list * timer); -#ifdef CONFIG_NO_IDLE_HZ -extern struct timer_list *next_timer_event(void); -#endif - -#ifdef CONFIG_SMP -extern int del_timer_sync(struct timer_list * timer); -extern void sync_timers(void); -#else -#define del_timer_sync(t) del_timer(t) -#define sync_timers() do { } while (0) -#endif - -/* - * mod_timer is a more efficient way to update the expire field of an - * active timer (if the timer is inactive it will be activated) - * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a). - * If the timer is known to be not pending (ie, in the handler), mod_timer - * is less efficient than a->expires = b; add_timer(a). - */ -int mod_timer(struct timer_list *timer, unsigned long expires); - -extern void it_real_fn(unsigned long); - -static inline void init_timer(struct timer_list * timer) -{ - timer->list.next = timer->list.prev = NULL; -} - -static inline int timer_pending (const struct timer_list * timer) -{ - return timer->list.next != NULL; -} - -/* - * These inlines deal with timer wrapping correctly. You are - * strongly encouraged to use them - * 1. Because people otherwise forget - * 2. Because if the timer wrap changes in future you wont have to - * alter your driver code. - * - * time_after(a,b) returns true if the time a is after time b. - * - * Do this with "<0" and ">=0" to only test the sign of the result. A - * good compiler would generate better code (and a really good compiler - * wouldn't care). Gcc is currently neither. - */ -#define time_after(a,b) ((long)(b) - (long)(a) < 0) -#define time_before(a,b) time_after(b,a) - -#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0) -#define time_before_eq(a,b) time_after_eq(b,a) - -#endif diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/kernel/time.c --- a/linux-2.4-xen-sparse/kernel/time.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,415 +0,0 @@ -/* - * linux/kernel/time.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * This file contains the interface functions for the various - * time related system calls: time, stime, gettimeofday, settimeofday, - * adjtime - */ -/* - * Modification history kernel/time.c - * - * 1993-09-02 Philip Gladstone - * Created file with time related functions from sched.c and adjtimex() - * 1993-10-08 Torsten Duwe - * adjtime interface update and CMOS clock write code - * 1995-08-13 Torsten Duwe - * kernel PLL updated to 1994-12-13 specs (rfc-1589) - * 1999-01-16 Ulrich Windl - * Introduced error checking for many cases in adjtimex(). - * Updated NTP code according to technical memorandum Jan '96 - * "A Kernel Model for Precision Timekeeping" by Dave Mills - * Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10) - * (Even though the technical memorandum forbids it) - */ - -#include <linux/mm.h> -#include <linux/timex.h> -#include <linux/smp_lock.h> - -#include <asm/uaccess.h> - -/* - * The timezone where the local system is located. Used as a default by some - * programs who obtain this value by using gettimeofday. - */ -struct timezone sys_tz; - -/* The xtime_lock is not only serializing the xtime read/writes but it's also - serializing all accesses to the global NTP variables now. */ -extern rwlock_t xtime_lock; - -#if !defined(__alpha__) && !defined(__ia64__) - -/* - * sys_time() can be implemented in user-level using - * sys_gettimeofday(). Is this for backwards compatibility? If so, - * why not move it into the appropriate arch directory (for those - * architectures that need it). - * - * XXX This function is NOT 64-bit clean! - */ -asmlinkage long sys_time(int * tloc) -{ - struct timeval now; - int i; - - do_gettimeofday(&now); - i = now.tv_sec; - if (tloc) { - if (put_user(i,tloc)) - i = -EFAULT; - } - return i; -} - -#if !defined(CONFIG_XEN) - -/* - * sys_stime() can be implemented in user-level using - * sys_settimeofday(). Is this for backwards compatibility? If so, - * why not move it into the appropriate arch directory (for those - * architectures that need it). - */ - -asmlinkage long sys_stime(int * tptr) -{ - int value; - - if (!capable(CAP_SYS_TIME)) - return -EPERM; - if (get_user(value, tptr)) - return -EFAULT; - write_lock_irq(&xtime_lock); - vxtime_lock(); - xtime.tv_sec = value; - xtime.tv_usec = 0; - vxtime_unlock(); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; - write_unlock_irq(&xtime_lock); - return 0; -} - -#endif - -#endif - -asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (copy_to_user(tv, &ktv, sizeof(ktv))) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -/* - * Adjust the time obtained from the CMOS to be UTC time instead of - * local time. - * - * This is ugly, but preferable to the alternatives. Otherwise we - * would either need to write a program to do it in /etc/rc (and risk - * confusion if the program gets run more than once; it would also be - * hard to make the program warp the clock precisely n hours) or - * compile in the timezone information into the kernel. Bad, bad.... - * - * - TYT, 1992-01-01 - * - * The best thing to do is to keep the CMOS clock in universal time (UTC) - * as real UNIX machines always do it. This avoids all headaches about - * daylight saving times and warping kernel clocks. - */ -inline static void warp_clock(void) -{ - write_lock_irq(&xtime_lock); - vxtime_lock(); - xtime.tv_sec += sys_tz.tz_minuteswest * 60; - vxtime_unlock(); - write_unlock_irq(&xtime_lock); -} - -/* - * In case for some reason the CMOS clock has not already been running - * in UTC, but in some local time: The first time we set the timezone, - * we will warp the clock so that it is ticking UTC time instead of - * local time. Presumably, if someone is setting the timezone then we - * are running in an environment where the programs understand about - * timezones. This should be done at boot time in the /etc/rc script, - * as soon as possible, so that the clock can be set right. Otherwise, - * various programs will get confused when the clock gets warped. - */ - -int do_sys_settimeofday(struct timeval *tv, struct timezone *tz) -{ - static int firsttime = 1; - - if (!capable(CAP_SYS_TIME)) - return -EPERM; - - if (tz) { - /* SMP safe, global irq locking makes it work. */ - sys_tz = *tz; - if (firsttime) { - firsttime = 0; - if (!tv) - warp_clock(); - } - } - if (tv) - { - /* SMP safe, again the code in arch/foo/time.c should - * globally block out interrupts when it runs. - */ - do_settimeofday(tv); - } - return 0; -} - -asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz) -{ - struct timeval new_tv; - struct timezone new_tz; - - if (tv) { - if (copy_from_user(&new_tv, tv, sizeof(*tv))) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&new_tz, tz, sizeof(*tz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &new_tv : NULL, tz ? &new_tz : NULL); -} - -long pps_offset; /* pps time offset (us) */ -long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */ - -long pps_freq; /* frequency offset (scaled ppm) */ -long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ - -long pps_valid = PPS_VALID; /* pps signal watchdog counter */ - -int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ - -long pps_jitcnt; /* jitter limit exceeded */ -long pps_calcnt; /* calibration intervals */ -long pps_errcnt; /* calibration errors */ -long pps_stbcnt; /* stability limit exceeded */ - -/* hook for a loadable hardpps kernel module */ -void (*hardpps_ptr)(struct timeval *); - -/* adjtimex mainly allows reading (and writing, if superuser) of - * kernel time-keeping variables. used by xntpd. - */ -int do_adjtimex(struct timex *txc) -{ - long ltemp, mtemp, save_adjust; - int result; - - /* In order to modify anything, you gotta be super-user! */ - if (txc->modes && !capable(CAP_SYS_TIME)) - return -EPERM; - - /* Now we validate the data before disabling interrupts */ - - if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) - /* singleshot must not be used with any other mode bits */ - if (txc->modes != ADJ_OFFSET_SINGLESHOT) - return -EINVAL; - - if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) - /* adjustment Offset limited to +- .512 seconds */ - if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) - return -EINVAL; - - /* if the quartz is off by more than 10% something is VERY wrong ! */ - if (txc->modes & ADJ_TICK) - if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) - return -EINVAL; - - write_lock_irq(&xtime_lock); - result = time_state; /* mostly `TIME_OK' */ - - /* Save for later - semantics of adjtime is to return old value */ - save_adjust = time_adjust; - -#if 0 /* STA_CLOCKERR is never set yet */ - time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ -#endif - /* If there are input parameters, then process them */ - if (txc->modes) - { - if (txc->modes & ADJ_STATUS) /* only set allowed bits */ - time_status = (txc->status & ~STA_RONLY) | - (time_status & STA_RONLY); - - if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ - if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { - result = -EINVAL; - goto leave; - } - time_freq = txc->freq - pps_freq; - } - - if (txc->modes & ADJ_MAXERROR) { - if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { - result = -EINVAL; - goto leave; - } - time_maxerror = txc->maxerror; - } - - if (txc->modes & ADJ_ESTERROR) { - if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { - result = -EINVAL; - goto leave; - } - time_esterror = txc->esterror; - } - - if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ - if (txc->constant < 0) { /* NTP v4 uses values > 6 */ - result = -EINVAL; - goto leave; - } - time_constant = txc->constant; - } - - if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ - if (txc->modes == ADJ_OFFSET_SINGLESHOT) { - /* adjtime() is independent from ntp_adjtime() */ - time_adjust = txc->offset; - } - else if ( time_status & (STA_PLL | STA_PPSTIME) ) { - ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) == - (STA_PPSTIME | STA_PPSSIGNAL) ? - pps_offset : txc->offset; - - /* - * Scale the phase adjustment and - * clamp to the operating range. - */ - if (ltemp > MAXPHASE) - time_offset = MAXPHASE << SHIFT_UPDATE; - else if (ltemp < -MAXPHASE) - time_offset = -(MAXPHASE << SHIFT_UPDATE); - else - time_offset = ltemp << SHIFT_UPDATE; - - /* - * Select whether the frequency is to be controlled - * and in which mode (PLL or FLL). Clamp to the operating - * range. Ugly multiply/divide should be replaced someday. - */ - - if (time_status & STA_FREQHOLD || time_reftime == 0) - time_reftime = xtime.tv_sec; - mtemp = xtime.tv_sec - time_reftime; - time_reftime = xtime.tv_sec; - if (time_status & STA_FLL) { - if (mtemp >= MINSEC) { - ltemp = (time_offset / mtemp) << (SHIFT_USEC - - SHIFT_UPDATE); - if (ltemp < 0) - time_freq -= -ltemp >> SHIFT_KH; - else - time_freq += ltemp >> SHIFT_KH; - } else /* calibration interval too short (p. 12) */ - result = TIME_ERROR; - } else { /* PLL mode */ - if (mtemp < MAXSEC) { - ltemp *= mtemp; - if (ltemp < 0) - time_freq -= -ltemp >> (time_constant + - time_constant + - SHIFT_KF - SHIFT_USEC); - else - time_freq += ltemp >> (time_constant + - time_constant + - SHIFT_KF - SHIFT_USEC); - } else /* calibration interval too long (p. 12) */ - result = TIME_ERROR; - } - if (time_freq > time_tolerance) - time_freq = time_tolerance; - else if (time_freq < -time_tolerance) - time_freq = -time_tolerance; - } /* STA_PLL || STA_PPSTIME */ - } /* txc->modes & ADJ_OFFSET */ - if (txc->modes & ADJ_TICK) { - /* if the quartz is off by more than 10% something is - VERY wrong ! */ - if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) { - result = -EINVAL; - goto leave; - } - tick = txc->tick; - } - } /* txc->modes */ -leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 - || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0 - && (time_status & STA_PPSSIGNAL) == 0) - /* p. 24, (b) */ - || ((time_status & (STA_PPSTIME|STA_PPSJITTER)) - == (STA_PPSTIME|STA_PPSJITTER)) - /* p. 24, (c) */ - || ((time_status & STA_PPSFREQ) != 0 - && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0)) - /* p. 24, (d) */ - result = TIME_ERROR; - - if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) - txc->offset = save_adjust; - else { - if (time_offset < 0) - txc->offset = -(-time_offset >> SHIFT_UPDATE); - else - txc->offset = time_offset >> SHIFT_UPDATE; - } - txc->freq = time_freq + pps_freq; - txc->maxerror = time_maxerror; - txc->esterror = time_esterror; - txc->status = time_status; - txc->constant = time_constant; - txc->precision = time_precision; - txc->tolerance = time_tolerance; - txc->tick = tick; - txc->ppsfreq = pps_freq; - txc->jitter = pps_jitter >> PPS_AVG; - txc->shift = pps_shift; - txc->stabil = pps_stabil; - txc->jitcnt = pps_jitcnt; - txc->calcnt = pps_calcnt; - txc->errcnt = pps_errcnt; - txc->stbcnt = pps_stbcnt; - write_unlock_irq(&xtime_lock); - do_gettimeofday(&txc->time); - return(result); -} - -asmlinkage long sys_adjtimex(struct timex *txc_p) -{ - struct timex txc; /* Local copy of parameter */ - int ret; - - /* Copy the user data space into the kernel copy - * structure. But bear in mind that the structures - * may change - */ - if(copy_from_user(&txc, txc_p, sizeof(struct timex))) - return -EFAULT; - ret = do_adjtimex(&txc); - return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/kernel/timer.c --- a/linux-2.4-xen-sparse/kernel/timer.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,968 +0,0 @@ -/* - * linux/kernel/timer.c - * - * Kernel internal timers, kernel timekeeping, basic process system calls - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. - * - * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 - * "A Kernel Model for Precision Timekeeping" by Dave Mills - * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to - * serialize accesses to xtime/lost_ticks). - * Copyright (C) 1998 Andrea Arcangeli - * 1999-03-10 Improved NTP compatibility by Ulrich Windl - */ - -#include <linux/config.h> -#include <linux/mm.h> -#include <linux/timex.h> -#include <linux/delay.h> -#include <linux/smp_lock.h> -#include <linux/interrupt.h> -#include <linux/kernel_stat.h> - -#include <asm/uaccess.h> - -/* - * Timekeeping variables - */ - -long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ - -/* The current time */ -struct timeval xtime __attribute__ ((aligned (16))); - -/* Don't completely fail for HZ > 500. */ -int tickadj = 500/HZ ? : 1; /* microsecs */ - -DECLARE_TASK_QUEUE(tq_timer); -DECLARE_TASK_QUEUE(tq_immediate); - -/* - * phase-lock loop variables - */ -/* TIME_ERROR prevents overwriting the CMOS clock */ -int time_state = TIME_OK; /* clock synchronization status */ -int time_status = STA_UNSYNC; /* clock status bits */ -long time_offset; /* time adjustment (us) */ -long time_constant = 2; /* pll time constant */ -long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ -long time_precision = 1; /* clock precision (us) */ -long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ -long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ -long time_phase; /* phase offset (scaled us) */ -long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; - /* frequency offset (scaled ppm)*/ -long time_adj; /* tick adjust (scaled 1 / HZ) */ -long time_reftime; /* time at last adjustment (s) */ - -long time_adjust; -long time_adjust_step; - -unsigned long event; - -extern int do_setitimer(int, struct itimerval *, struct itimerval *); - -unsigned long volatile jiffies; - -unsigned int * prof_buffer; -unsigned long prof_len; -unsigned long prof_shift; - -/* - * Event timer code - */ -#define TVN_BITS 6 -#define TVR_BITS 8 -#define TVN_SIZE (1 << TVN_BITS) -#define TVR_SIZE (1 << TVR_BITS) -#define TVN_MASK (TVN_SIZE - 1) -#define TVR_MASK (TVR_SIZE - 1) - -struct timer_vec { - int index; - struct list_head vec[TVN_SIZE]; -}; - -struct timer_vec_root { - int index; - struct list_head vec[TVR_SIZE]; -}; - -static struct timer_vec tv5; -static struct timer_vec tv4; -static struct timer_vec tv3; -static struct timer_vec tv2; -static struct timer_vec_root tv1; - -static struct timer_vec * const tvecs[] = { - (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 -}; - -static struct list_head * run_timer_list_running; - -#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) - -void init_timervecs (void) -{ - int i; - - for (i = 0; i < TVN_SIZE; i++) { - INIT_LIST_HEAD(tv5.vec + i); - INIT_LIST_HEAD(tv4.vec + i); - INIT_LIST_HEAD(tv3.vec + i); - INIT_LIST_HEAD(tv2.vec + i); - } - for (i = 0; i < TVR_SIZE; i++) - INIT_LIST_HEAD(tv1.vec + i); -} - -static unsigned long timer_jiffies; - -static inline void internal_add_timer(struct timer_list *timer) -{ - /* - * must be cli-ed when calling this - */ - unsigned long expires = timer->expires; - unsigned long idx = expires - timer_jiffies; - struct list_head * vec; - - if (run_timer_list_running) - vec = run_timer_list_running; - else if (idx < TVR_SIZE) { - int i = expires & TVR_MASK; - vec = tv1.vec + i; - } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { - int i = (expires >> TVR_BITS) & TVN_MASK; - vec = tv2.vec + i; - } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { - int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; - vec = tv3.vec + i; - } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { - int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; - vec = tv4.vec + i; - } else if ((signed long) idx < 0) { - /* can happen if you add a timer with expires == jiffies, - * or you set a timer to go off in the past - */ - vec = tv1.vec + tv1.index; - } else if (idx <= 0xffffffffUL) { - int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; - vec = tv5.vec + i; - } else { - /* Can only get here on architectures with 64-bit jiffies */ - INIT_LIST_HEAD(&timer->list); - return; - } - /* - * Timers are FIFO! - */ - list_add(&timer->list, vec->prev); -} - -/* Initialize both explicitly - let's try to have them in the same cache line */ -spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; - -#ifdef CONFIG_SMP -volatile struct timer_list * volatile running_timer; -#define timer_enter(t) do { running_timer = t; mb(); } while (0) -#define timer_exit() do { running_timer = NULL; } while (0) -#define timer_is_running(t) (running_timer == t) -#define timer_synchronize(t) while (timer_is_running(t)) barrier() -#else -#define timer_enter(t) do { } while (0) -#define timer_exit() do { } while (0) -#endif - -void add_timer(struct timer_list *timer) -{ - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - if (timer_pending(timer)) - goto bug; - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - return; -bug: - spin_unlock_irqrestore(&timerlist_lock, flags); - printk("bug: kernel timer added twice at %p.\n", - __builtin_return_address(0)); -} - -static inline int detach_timer (struct timer_list *timer) -{ - if (!timer_pending(timer)) - return 0; - list_del(&timer->list); - return 1; -} - -int mod_timer(struct timer_list *timer, unsigned long expires) -{ - int ret; - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - timer->expires = expires; - ret = detach_timer(timer); - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - return ret; -} - -int del_timer(struct timer_list * timer) -{ - int ret; - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - ret = detach_timer(timer); - timer->list.next = timer->list.prev = NULL; - spin_unlock_irqrestore(&timerlist_lock, flags); - return ret; -} - -#ifdef CONFIG_SMP -void sync_timers(void) -{ - spin_unlock_wait(&global_bh_lock); -} - -/* - * SMP specific function to delete periodic timer. - * Caller must disable by some means restarting the timer - * for new. Upon exit the timer is not queued and handler is not running - * on any CPU. It returns number of times, which timer was deleted - * (for reference counting). - */ - -int del_timer_sync(struct timer_list * timer) -{ - int ret = 0; - - for (;;) { - unsigned long flags; - int running; - - spin_lock_irqsave(&timerlist_lock, flags); - ret += detach_timer(timer); - timer->list.next = timer->list.prev = 0; - running = timer_is_running(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - - if (!running) - break; - - timer_synchronize(timer); - } - - return ret; -} -#endif - - -static inline void cascade_timers(struct timer_vec *tv) -{ - /* cascade all the timers from tv up one level */ - struct list_head *head, *curr, *next; - - head = tv->vec + tv->index; - curr = head->next; - /* - * We are removing _all_ timers from the list, so we don't have to - * detach them individually, just clear the list afterwards. - */ - while (curr != head) { - struct timer_list *tmp; - - tmp = list_entry(curr, struct timer_list, list); - next = curr->next; - list_del(curr); // not needed - internal_add_timer(tmp); - curr = next; - } - INIT_LIST_HEAD(head); - tv->index = (tv->index + 1) & TVN_MASK; -} - -static inline void run_timer_list(void) -{ - spin_lock_irq(&timerlist_lock); - while ((long)(jiffies - timer_jiffies) >= 0) { - LIST_HEAD(queued); - struct list_head *head, *curr; - if (!tv1.index) { - int n = 1; - do { - cascade_timers(tvecs[n]); - } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); - } - run_timer_list_running = &queued; -repeat: - head = tv1.vec + tv1.index; - curr = head->next; - if (curr != head) { - struct timer_list *timer; - void (*fn)(unsigned long); - unsigned long data; - - timer = list_entry(curr, struct timer_list, list); - fn = timer->function; - data= timer->data; - - detach_timer(timer); - timer->list.next = timer->list.prev = NULL; - timer_enter(timer); - spin_unlock_irq(&timerlist_lock); - fn(data); - spin_lock_irq(&timerlist_lock); - timer_exit(); - goto repeat; - } - run_timer_list_running = NULL; - ++timer_jiffies; - tv1.index = (tv1.index + 1) & TVR_MASK; - - curr = queued.next; - while (curr != &queued) { - struct timer_list *timer; - - timer = list_entry(curr, struct timer_list, list); - curr = curr->next; - internal_add_timer(timer); - } - } - spin_unlock_irq(&timerlist_lock); -} - -#ifdef CONFIG_NO_IDLE_HZ -/* - * Find out when the next timer event is due to happen. This - * is used on S/390 to stop all activity when all cpus are idle. - * And in XenoLinux to achieve the same. - * The timerlist_lock must be acquired before calling this function. - */ -struct timer_list *next_timer_event(void) -{ - struct timer_list *nte, *tmp; - struct list_head *lst; - int i, j; - - /* Look for the next timer event in tv1. */ - i = 0; - j = tvecs[0]->index; - do { - struct list_head *head = tvecs[0]->vec + j; - if (!list_empty(head)) { - nte = list_entry(head->next, struct timer_list, list); - goto found; - } - j = (j + 1) & TVR_MASK; - } while (j != tv1.index); - - /* No event found in tv1. Check tv2-tv5. */ - for (i = 1; i < NOOF_TVECS; i++) { - j = tvecs[i]->index; - do { - nte = NULL; - list_for_each(lst, tvecs[i]->vec + j) { - tmp = list_entry(lst, struct timer_list, list); - if (nte == NULL || - time_before(tmp->expires, nte->expires)) - nte = tmp; - } - if (nte) - goto found; - j = (j + 1) & TVN_MASK; - } while (j != tvecs[i]->index); - } - return NULL; -found: - /* Found timer event in tvecs[i]->vec[j] */ - if (j < tvecs[i]->index && i < NOOF_TVECS-1) { - /* - * The search wrapped. We need to look at the next list - * from tvecs[i+1] that would cascade into tvecs[i]. - */ - list_for_each(lst, tvecs[i+1]->vec+tvecs[i+1]->index) { - tmp = list_entry(lst, struct timer_list, list); - if (time_before(tmp->expires, nte->expires)) - nte = tmp; - } - } - return nte; -} -#endif - -spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; - -void tqueue_bh(void) -{ - run_task_queue(&tq_timer); -} - -void immediate_bh(void) -{ - run_task_queue(&tq_immediate); -} - -/* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@xxxxxxxx) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. - * - */ -static void second_overflow(void) -{ - long ltemp; - - /* Bump the maxerror field */ - time_maxerror += time_tolerance >> SHIFT_USEC; - if ( time_maxerror > NTP_PHASE_LIMIT ) { - time_maxerror = NTP_PHASE_LIMIT; - time_status |= STA_UNSYNC; - } - - /* - * Leap second processing. If in leap-insert state at - * the end of the day, the system clock is set back one - * second; if in leap-delete state, the system clock is - * set ahead one second. The microtime() routine or - * external clock driver will insure that reported time - * is always monotonic. The ugly divides should be - * replaced. - */ - switch (time_state) { - - case TIME_OK: - if (time_status & STA_INS) - time_state = TIME_INS; - else if (time_status & STA_DEL) - time_state = TIME_DEL; - break; - - case TIME_INS: - if (xtime.tv_sec % 86400 == 0) { - xtime.tv_sec--; - time_state = TIME_OOP; - printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); - } - break; - - case TIME_DEL: - if ((xtime.tv_sec + 1) % 86400 == 0) { - xtime.tv_sec++; - time_state = TIME_WAIT; - printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); - } - break; - - case TIME_OOP: - time_state = TIME_WAIT; - break; - - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - } - - /* - * Compute the phase adjustment for the next second. In - * PLL mode, the offset is reduced by a fixed factor - * times the time constant. In FLL mode the offset is - * used directly. In either mode, the maximum phase - * adjustment for each second is clamped so as to spread - * the adjustment over not more than the number of - * seconds between updates. - */ - if (time_offset < 0) { - ltemp = -time_offset; - if (!(time_status & STA_FLL)) - ltemp >>= SHIFT_KG + time_constant; - if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) - ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; - time_offset += ltemp; - time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - } else { - ltemp = time_offset; - if (!(time_status & STA_FLL)) - ltemp >>= SHIFT_KG + time_constant; - if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) - ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; - time_offset -= ltemp; - time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - } - - /* - * Compute the frequency estimate and additional phase - * adjustment due to frequency error for the next - * second. When the PPS signal is engaged, gnaw on the - * watchdog counter and update the frequency computed by - * the pll and the PPS signal. - */ - pps_valid++; - if (pps_valid == PPS_VALID) { /* PPS signal lost */ - pps_jitter = MAXTIME; - pps_stabil = MAXFREQ; - time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | - STA_PPSWANDER | STA_PPSERROR); - } - ltemp = time_freq + pps_freq; - if (ltemp < 0) - time_adj -= -ltemp >> - (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); - else - time_adj += ltemp >> - (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); - -#if HZ == 100 - /* Compensate for (HZ==100) != (1 << SHIFT_HZ). - * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14) - */ - if (time_adj < 0) - time_adj -= (-time_adj >> 2) + (-time_adj >> 5); - else - time_adj += (time_adj >> 2) + (time_adj >> 5); -#endif -} - -/* in the NTP reference this is called "hardclock()" */ -static void update_wall_time_one_tick(void) -{ - if ( (time_adjust_step = time_adjust) != 0 ) { - /* We are doing an adjtime thing. - * - * Prepare time_adjust_step to be within bounds. - * Note that a positive time_adjust means we want the clock - * to run faster. - * - * Limit the amount of the step to be in the range - * -tickadj .. +tickadj - */ - if (time_adjust > tickadj) - time_adjust_step = tickadj; - else if (time_adjust < -tickadj) - time_adjust_step = -tickadj; - - /* Reduce by this step the amount of time left */ - time_adjust -= time_adjust_step; - } - xtime.tv_usec += tick + time_adjust_step; - /* - * Advance the phase, once it gets to one microsecond, then - * advance the tick more. - */ - time_phase += time_adj; - if (time_phase <= -FINEUSEC) { - long ltemp = -time_phase >> SHIFT_SCALE; - time_phase += ltemp << SHIFT_SCALE; - xtime.tv_usec -= ltemp; - } - else if (time_phase >= FINEUSEC) { - long ltemp = time_phase >> SHIFT_SCALE; - time_phase -= ltemp << SHIFT_SCALE; - xtime.tv_usec += ltemp; - } -} - -/* - * Using a loop looks inefficient, but "ticks" is - * usually just one (we shouldn't be losing ticks, - * we're doing this this way mainly for interrupt - * latency reasons, not because we think we'll - * have lots of lost timer ticks - */ -static void update_wall_time(unsigned long ticks) -{ - do { - ticks--; - update_wall_time_one_tick(); - } while (ticks); - - while (xtime.tv_usec >= 1000000) { - xtime.tv_usec -= 1000000; - xtime.tv_sec++; - second_overflow(); - } -} - -static inline void do_process_times(struct task_struct *p, - unsigned long user, unsigned long system) -{ - unsigned long psecs; - - psecs = (p->times.tms_utime += user); - psecs += (p->times.tms_stime += system); - if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) { - /* Send SIGXCPU every second.. */ - if (!(psecs % HZ)) - send_sig(SIGXCPU, p, 1); - /* and SIGKILL when we go over max.. */ - if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max) - send_sig(SIGKILL, p, 1); - } -} - -static inline void do_it_virt(struct task_struct * p, unsigned long ticks) -{ - unsigned long it_virt = p->it_virt_value; - - if (it_virt) { - it_virt -= ticks; - if (!it_virt) { - it_virt = p->it_virt_incr; - send_sig(SIGVTALRM, p, 1); - } - p->it_virt_value = it_virt; - } -} - -static inline void do_it_prof(struct task_struct *p) -{ - unsigned long it_prof = p->it_prof_value; - - if (it_prof) { - if (--it_prof == 0) { - it_prof = p->it_prof_incr; - send_sig(SIGPROF, p, 1); - } - p->it_prof_value = it_prof; - } -} - -void update_one_process(struct task_struct *p, unsigned long user, - unsigned long system, int cpu) -{ - p->per_cpu_utime[cpu] += user; - p->per_cpu_stime[cpu] += system; - do_process_times(p, user, system); - do_it_virt(p, user); - do_it_prof(p); -} - -/* - * Called from the timer interrupt handler to charge one tick to the current - * process. user_tick is 1 if the tick is user time, 0 for system. - */ -void update_process_times(int user_tick) -{ - struct task_struct *p = current; - int cpu = smp_processor_id(), system = user_tick ^ 1; - - update_one_process(p, user_tick, system, cpu); - if (p->pid) { - if (--p->counter <= 0) { - p->counter = 0; - /* - * SCHED_FIFO is priority preemption, so this is - * not the place to decide whether to reschedule a - * SCHED_FIFO task or not - Bhavesh Davda - */ - if (p->policy != SCHED_FIFO) { - p->need_resched = 1; - } - } - if (p->nice > 0) - kstat.per_cpu_nice[cpu] += user_tick; - else - kstat.per_cpu_user[cpu] += user_tick; - kstat.per_cpu_system[cpu] += system; - } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1) - kstat.per_cpu_system[cpu] += system; -} - -/* - * Called from the timer interrupt handler to charge a couple of ticks - * to the current process. - */ -void update_process_times_us(int user_ticks, int system_ticks) -{ - struct task_struct *p = current; - int cpu = smp_processor_id(); - - update_one_process(p, user_ticks, system_ticks, cpu); - if (p->pid) { - p->counter -= user_ticks + system_ticks; - if (p->counter <= 0) { - p->counter = 0; - p->need_resched = 1; - } - if (p->nice > 0) - kstat.per_cpu_nice[cpu] += user_ticks; - else - kstat.per_cpu_user[cpu] += user_ticks; - kstat.per_cpu_system[cpu] += system_ticks; - } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1) - kstat.per_cpu_system[cpu] += system_ticks; -} - -/* - * Nr of active tasks - counted in fixed-point numbers - */ -static unsigned long count_active_tasks(void) -{ - struct task_struct *p; - unsigned long nr = 0; - - read_lock(&tasklist_lock); - for_each_task(p) { - if ((p->state == TASK_RUNNING || - (p->state & TASK_UNINTERRUPTIBLE))) - nr += FIXED_1; - } - read_unlock(&tasklist_lock); - return nr; -} - -/* - * Hmm.. Changed this, as the GNU make sources (load.c) seems to - * imply that avenrun[] is the standard name for this kind of thing. - * Nothing else seems to be standardized: the fractional size etc - * all seem to differ on different machines. - */ -unsigned long avenrun[3]; - -static inline void calc_load(unsigned long ticks) -{ - unsigned long active_tasks; /* fixed-point */ - static int count = LOAD_FREQ; - - count -= ticks; - while (count < 0) { - count += LOAD_FREQ; - active_tasks = count_active_tasks(); - CALC_LOAD(avenrun[0], EXP_1, active_tasks); - CALC_LOAD(avenrun[1], EXP_5, active_tasks); - CALC_LOAD(avenrun[2], EXP_15, active_tasks); - } -} - -/* jiffies at the most recent update of wall time */ -unsigned long wall_jiffies; - -/* - * This spinlock protect us from races in SMP while playing with xtime. -arca - */ -rwlock_t xtime_lock = RW_LOCK_UNLOCKED; - -static inline void update_times(void) -{ - unsigned long ticks; - - /* - * update_times() is run from the raw timer_bh handler so we - * just know that the irqs are locally enabled and so we don't - * need to save/restore the flags of the local CPU here. -arca - */ - write_lock_irq(&xtime_lock); - vxtime_lock(); - - ticks = jiffies - wall_jiffies; - if (ticks) { - wall_jiffies += ticks; - update_wall_time(ticks); - } - vxtime_unlock(); - write_unlock_irq(&xtime_lock); - calc_load(ticks); -} - -void timer_bh(void) -{ - update_times(); - run_timer_list(); -} - -void do_timer(struct pt_regs *regs) -{ - (*(unsigned long *)&jiffies)++; -#ifndef CONFIG_SMP - /* SMP process accounting uses the local APIC timer */ - - update_process_times(user_mode(regs)); -#endif - mark_bh(TIMER_BH); - if (TQ_ACTIVE(tq_timer)) - mark_bh(TQUEUE_BH); -} - -void do_timer_ticks(int ticks) -{ - (*(unsigned long *)&jiffies) += ticks; - mark_bh(TIMER_BH); - if (TQ_ACTIVE(tq_timer)) - mark_bh(TQUEUE_BH); -} - -#if !defined(__alpha__) && !defined(__ia64__) - -/* - * For backwards compatibility? This can be done in libc so Alpha - * and all newer ports shouldn't need it. - */ -asmlinkage unsigned long sys_alarm(unsigned int seconds) -{ - struct itimerval it_new, it_old; - unsigned int oldalarm; - - it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; - it_new.it_value.tv_sec = seconds; - it_new.it_value.tv_usec = 0; - do_setitimer(ITIMER_REAL, &it_new, &it_old); - oldalarm = it_old.it_value.tv_sec; - /* ehhh.. We can't return 0 if we have an alarm pending.. */ - /* And we'd better return too much than too little anyway */ - if (it_old.it_value.tv_usec) - oldalarm++; - return oldalarm; -} - -#endif - -#ifndef __alpha__ - -/* - * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this - * should be moved into arch/i386 instead? - */ - -/** - * sys_getpid - return the thread group id of the current process - * - * Note, despite the name, this returns the tgid not the pid. The tgid and - * the pid are identical unless CLONE_THREAD was specified on clone() in - * which case the tgid is the same in all threads of the same group. - * - * This is SMP safe as current->tgid does not change. - */ -asmlinkage long sys_getpid(void) -{ - return current->tgid; -} - -/* - * This is not strictly SMP safe: p_opptr could change - * from under us. However, rather than getting any lock - * we can use an optimistic algorithm: get the parent - * pid, and go back and check that the parent is still - * the same. If it has changed (which is extremely unlikely - * indeed), we just try again.. - * - * NOTE! This depends on the fact that even if we _do_ - * get an old value of "parent", we can happily dereference - * the pointer: we just can't necessarily trust the result - * until we know that the parent pointer is valid. - * - * The "mb()" macro is a memory barrier - a synchronizing - * event. It also makes sure that gcc doesn't optimize - * away the necessary memory references.. The barrier doesn't - * have to have all that strong semantics: on x86 we don't - * really require a synchronizing instruction, for example. - * The barrier is more important for code generation than - * for any real memory ordering semantics (even if there is - * a small window for a race, using the old pointer is - * harmless for a while). - */ -asmlinkage long sys_getppid(void) -{ - int pid; - struct task_struct * me = current; - struct task_struct * parent; - - parent = me->p_opptr; - for (;;) { - pid = parent->pid; -#if CONFIG_SMP -{ - struct task_struct *old = parent; - mb(); - parent = me->p_opptr; - if (old != parent) - continue; -} -#endif - break; - } - return pid; -} - -asmlinkage long sys_getuid(void) -{ - /* Only we change this so SMP safe */ - return current->uid; -} - -asmlinkage long sys_geteuid(void) -{ - /* Only we change this so SMP safe */ - return current->euid; -} - -asmlinkage long sys_getgid(void) -{ - /* Only we change this so SMP safe */ - return current->gid; -} - -asmlinkage long sys_getegid(void) -{ - /* Only we change this so SMP safe */ - return current->egid; -} - -#endif - -/* Thread ID - the internal kernel "pid" */ -asmlinkage long sys_gettid(void) -{ - return current->pid; -} - -asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) -{ - struct timespec t; - unsigned long expire; - - if(copy_from_user(&t, rqtp, sizeof(struct timespec))) - return -EFAULT; - - if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0) - return -EINVAL; - - - if (t.tv_sec == 0 && t.tv_nsec <= 2000000L && - current->policy != SCHED_OTHER) - { - /* - * Short delay requests up to 2 ms will be handled with - * high precision by a busy wait for all real-time processes. - * - * Its important on SMP not to do this holding locks. - */ - udelay((t.tv_nsec + 999) / 1000); - return 0; - } - - expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); - - current->state = TASK_INTERRUPTIBLE; - expire = schedule_timeout(expire); - - if (expire) { - if (rmtp) { - jiffies_to_timespec(expire, &t); - if (copy_to_user(rmtp, &t, sizeof(struct timespec))) - return -EFAULT; - } - return -EINTR; - } - return 0; -} - diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/mkbuildtree --- a/linux-2.4-xen-sparse/mkbuildtree Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,291 +0,0 @@ -#!/bin/bash - -# mkbuildtree <build tree> -# -# Creates symbolic links in <build tree> for the sparse tree -# in the current directory. - -# Script to determine the relative path between two directories. -# Copyright (c) D. J. Hawkey Jr. 2002 -# Fixed for Xen project by K. Fraser in 2003. -abs_to_rel () -{ - local CWD SRCPATH - - if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then - SRCPATH=${1%?} - else - SRCPATH=$1 - fi - if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then - DESTPATH=${2%?} - else - DESTPATH=$2 - fi - - CWD=$PWD - [ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD - [ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD - [ "$CWD" != "$PWD" ] && cd $CWD - - BASEPATH=$SRCPATH - - [ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return - [ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return - - while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do - BASEPATH=${BASEPATH%/*} - done - - SRCPATH=${SRCPATH#$BASEPATH} - DESTPATH=${DESTPATH#$BASEPATH} - DESTPATH=${DESTPATH#?} - while [ -n "$SRCPATH" ]; do - SRCPATH=${SRCPATH%/*} - DESTPATH="../$DESTPATH" - done - - [ -z "$BASEPATH" ] && BASEPATH="/" - [ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?} -} - -# relative_lndir <target_dir> -# Creates a tree of symlinks in the current working directory that mirror -# real files in <target_dir>. <target_dir> should be relative to the current -# working directory. Symlinks in <target_dir> are ignored. Source-control files -# are ignored. -relative_lndir () -{ - local SYMLINK_DIR REAL_DIR pref i j - SYMLINK_DIR=$PWD - REAL_DIR=$1 - ( - cd $REAL_DIR - for i in `find . -type d | grep -v SCCS`; do - [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i - ( - cd $i - pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'` - for j in `find . -maxdepth 1 -type f -o -type l`; do - ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j - done - ) - done - ) -} - -[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; } - -# Get absolute path to the destination directory -pushd . >/dev/null -cd ${1} || { echo "cannot cd to ${1}"; exit 1; } -AD=$PWD -popd >/dev/null - -# Get absolute path to the source directory -AS=`pwd` - -# Get path to source, relative to destination -abs_to_rel ${AD} ${AS} -RS=$DESTPATH - -# Remove old copies of files and directories at the destination -for i in `find . -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done - -# We now work from the destination directory -cd ${AD} || { echo "cannot cd to ${AD}"; exit 1; } - -# Remove old symlinks -for i in `find . -type l`; do rm -f $i; done - -# Create symlinks of files and directories which exist in the sparse source -relative_lndir ${RS} -rm -f mkbuildtree - -LINUX_26=${RS}/../linux-2.6-xen-sparse -[ -d $LINUX_26 ] || { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } - - -# Create links to the shared definitions of the Xen interfaces. -rm -rf ${AD}/include/asm-xen/xen-public -mkdir ${AD}/include/asm-xen/xen-public -cd ${AD}/include/asm-xen/xen-public -relative_lndir ../../../${RS}/../xen/include/public - -# Create a link to the shared definitions for the control interface -cd ${AD}/include/asm-xen - -## Symlinks for files: -## - which are identical in the i386 and xen-i386 architecture-dependent -## subdirectories. -## - which are identical in the Linux 2.6 and Linux 2.4 ports. - -cd ${AD}/include/asm-xen -ln -sf ../asm-i386/a.out.h -ln -sf ../asm-i386/apicdef.h -ln -sf ../asm-i386/apic.h -ln -sf ../asm-i386/atomic.h -ln -sf ../asm-i386/bitops.h -ln -sf ../asm-i386/boot.h -ln -sf ../asm-i386/byteorder.h -ln -sf ../asm-i386/cache.h -ln -sf ../asm-i386/checksum.h -ln -sf ../asm-i386/cpufeature.h -ln -sf ../asm-i386/current.h -ln -sf ../asm-i386/debugreg.h -ln -sf ../asm-i386/delay.h -ln -sf ../asm-i386/div64.h -ln -sf ../asm-i386/dma.h -ln -sf ../asm-i386/elf.h -ln -sf ../asm-i386/errno.h -ln -sf ../asm-i386/fcntl.h -ln -sf ../asm-i386/floppy.h -ln -sf ../asm-i386/hardirq.h -ln -sf ../asm-i386/hdreg.h -ln -sf ../asm-i386/i387.h -ln -sf ../asm-i386/ide.h -ln -sf ../asm-i386/init.h -ln -sf ../asm-i386/io_apic.h -ln -sf ../asm-i386/ioctl.h -ln -sf ../asm-i386/ioctls.h -ln -sf ../asm-i386/ipcbuf.h -ln -sf ../asm-i386/ipc.h -ln -sf ../asm-i386/kmap_types.h -ln -sf ../asm-i386/ldt.h -ln -sf ../asm-i386/linux_logo.h -ln -sf ../asm-i386/locks.h -ln -sf ../asm-i386/math_emu.h -ln -sf ../asm-i386/mc146818rtc.h -ln -sf ../asm-i386/mca_dma.h -ln -sf ../asm-i386/mman.h -ln -sf ../asm-i386/mmu.h -ln -sf ../asm-i386/mmx.h -ln -sf ../asm-i386/mpspec.h -ln -sf ../asm-i386/msgbuf.h -ln -sf ../asm-i386/msr.h -ln -sf ../asm-i386/mtrr.h -ln -sf ../asm-i386/namei.h -ln -sf ../asm-i386/param.h -ln -sf ../asm-i386/parport.h -ln -sf ../asm-i386/pgtable-3level.h -ln -sf ../asm-i386/poll.h -ln -sf ../asm-i386/posix_types.h -ln -sf ../asm-i386/ptrace.h -ln -sf ../asm-i386/resource.h -ln -sf ../asm-i386/rwlock.h -ln -sf ../asm-i386/rwsem.h -ln -sf ../asm-i386/scatterlist.h -ln -sf ../asm-i386/semaphore.h -ln -sf ../asm-i386/sembuf.h -ln -sf ../asm-i386/serial.h -ln -sf ../asm-i386/setup.h -ln -sf ../asm-i386/shmbuf.h -ln -sf ../asm-i386/shmparam.h -ln -sf ../asm-i386/sigcontext.h -ln -sf ../asm-i386/siginfo.h -ln -sf ../asm-i386/signal.h -ln -sf ../asm-i386/smplock.h -ln -sf ../asm-i386/socket.h -ln -sf ../asm-i386/sockios.h -ln -sf ../asm-i386/softirq.h -ln -sf ../asm-i386/spinlock.h -ln -sf ../asm-i386/statfs.h -ln -sf ../asm-i386/stat.h -ln -sf ../asm-i386/string-486.h -ln -sf ../asm-i386/string.h -ln -sf ../asm-i386/termbits.h -ln -sf ../asm-i386/termios.h -ln -sf ../asm-i386/timex.h -ln -sf ../asm-i386/tlb.h -ln -sf ../asm-i386/types.h -ln -sf ../asm-i386/uaccess.h -ln -sf ../asm-i386/ucontext.h -ln -sf ../asm-i386/unaligned.h -ln -sf ../asm-i386/unistd.h -ln -sf ../asm-i386/user.h -ln -sf ../asm-i386/vm86.h -ln -sf ../../${LINUX_26}/include/asm-xen/balloon.h -ln -sf ../../${LINUX_26}/include/asm-xen/ctrl_if.h -ln -sf ../../${LINUX_26}/include/asm-xen/evtchn.h -ln -sf ../../${LINUX_26}/include/asm-xen/gnttab.h -ln -sf ../../${LINUX_26}/include/asm-xen/hypervisor.h -ln -sf ../../${LINUX_26}/include/asm-xen/xen_proc.h -ln -sf ../../${LINUX_26}/include/asm-xen/asm-i386/synch_bitops.h -ln -sf ../../${LINUX_26}/include/asm-xen/asm-i386/hypercall.h - -mkdir -p linux-public && cd linux-public -ln -sf ../../../${LINUX_26}/include/asm-xen/linux-public/privcmd.h -ln -sf ../../../${LINUX_26}/include/asm-xen/linux-public/suspend.h - -cd ${AD}/arch/xen/kernel -ln -sf ../../i386/kernel/i387.c -ln -sf ../../i386/kernel/init_task.c -ln -sf ../../i386/kernel/pci-i386.c -ln -sf ../../i386/kernel/pci-i386.h -ln -sf ../../i386/kernel/ptrace.c -ln -sf ../../i386/kernel/semaphore.c -ln -sf ../../i386/kernel/sys_i386.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/ctrl_if.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/evtchn.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/fixup.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/gnttab.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/reboot.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/skbuff.c - -cd ${AD}/arch/xen/lib -ln -sf ../../i386/lib/checksum.S -ln -sf ../../i386/lib/dec_and_lock.c -ln -sf ../../i386/lib/getuser.S -ln -sf ../../i386/lib/iodebug.c -ln -sf ../../i386/lib/memcpy.c -ln -sf ../../i386/lib/mmx.c -ln -sf ../../i386/lib/old-checksum.c -ln -sf ../../i386/lib/strstr.c -ln -sf ../../i386/lib/usercopy.c -ln -sf ../../../${LINUX_26}/arch/xen/kernel/xen_proc.c - -cd ${AD}/arch/xen/mm -ln -sf ../../i386/mm/extable.c -ln -sf ../../i386/mm/pageattr.c -ln -sf ../../../${LINUX_26}/arch/xen/i386/mm/hypervisor.c - -cd ${AD}/arch/xen/drivers/balloon -ln -sf ../../../../${LINUX_26}/drivers/xen/balloon/balloon.c - -cd ${AD}/arch/xen/drivers/console -ln -sf ../../../../${LINUX_26}/drivers/xen/console/console.c - -cd ${AD}/arch/xen/drivers/dom0 -ln -sf ../../../../${LINUX_26}/drivers/xen/privcmd/privcmd.c core.c - -cd ${AD}/arch/xen/drivers/evtchn -ln -sf ../../../../${LINUX_26}/drivers/xen/evtchn/evtchn.c - -cd ${AD}/arch/xen/drivers/netif/frontend -ln -sf ../../../../../${LINUX_26}/drivers/xen/netfront/netfront.c main.c - -cd ${AD}/arch/xen/drivers/netif/backend -ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/common.h -ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/control.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/interface.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/netback/netback.c main.c - -cd ${AD}/arch/xen/drivers/blkif/backend -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/common.h -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/blkback.c main.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/control.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/interface.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/vbd.c - -cd ${AD}/arch/xen/drivers/blkif/frontend -ln -sf ../../../../../${LINUX_26}/drivers/xen/blkfront/blkfront.c - -cd ${AD}/arch/xen/drivers/usbif/frontend -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbfront/usbfront.c main.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbfront/xhci.h - -cd ${AD}/arch/xen/drivers/usbif/backend -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/common.h -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/control.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/interface.c -ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/usbback.c main.c diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/mm/highmem.c --- a/linux-2.4-xen-sparse/mm/highmem.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,461 +0,0 @@ -/* - * High memory handling common code and variables. - * - * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@xxxxxxx - * Gerhard Wichert, Siemens AG, Gerhard.Wichert@xxxxxxxxxxxxxx - * - * - * Redesigned the x86 32-bit VM architecture to deal with - * 64-bit physical space. With current x86 CPUs this - * means up to 64 Gigabytes physical RAM. - * - * Rewrote high memory support to move the page cache into - * high memory. Implemented permanent (schedulable) kmaps - * based on Linus' idea. - * - * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx> - */ - -#include <linux/mm.h> -#include <linux/pagemap.h> -#include <linux/highmem.h> -#include <linux/swap.h> -#include <linux/slab.h> - -/* - * Virtual_count is not a pure "count". - * 0 means that it is not mapped, and has not been mapped - * since a TLB flush - it is usable. - * 1 means that there are no users, but it has been mapped - * since the last TLB flush - so we can't use it. - * n means that there are (n-1) current users of it. - */ -static int pkmap_count[LAST_PKMAP]; -static unsigned int last_pkmap_nr; -static spinlock_cacheline_t kmap_lock_cacheline = {SPIN_LOCK_UNLOCKED}; -#define kmap_lock kmap_lock_cacheline.lock - -pte_t * pkmap_page_table; - -static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); - -static void flush_all_zero_pkmaps(void) -{ - int i; - - flush_cache_all(); - - for (i = 0; i < LAST_PKMAP; i++) { - struct page *page; - - /* - * zero means we don't have anything to do, - * >1 means that it is still in use. Only - * a count of 1 means that it is free but - * needs to be unmapped - */ - if (pkmap_count[i] != 1) - continue; - pkmap_count[i] = 0; - - /* sanity check */ - if (pte_none(pkmap_page_table[i])) - BUG(); - - /* - * Don't need an atomic fetch-and-clear op here; - * no-one has the page mapped, and cannot get at - * its virtual address (and hence PTE) without first - * getting the kmap_lock (which is held here). - * So no dangers, even with speculative execution. - */ - page = pte_page(pkmap_page_table[i]); - pte_clear(&pkmap_page_table[i]); - - page->virtual = NULL; - } - flush_tlb_all(); -} - -static inline unsigned long map_new_virtual(struct page *page, int nonblocking) -{ - unsigned long vaddr; - int count; - -start: - count = LAST_PKMAP; - /* Find an empty entry */ - for (;;) { - last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; - if (!last_pkmap_nr) { - flush_all_zero_pkmaps(); - count = LAST_PKMAP; - } - if (!pkmap_count[last_pkmap_nr]) - break; /* Found a usable entry */ - if (--count) - continue; - - if (nonblocking) - return 0; - - /* - * Sleep for somebody else to unmap their entries - */ - { - DECLARE_WAITQUEUE(wait, current); - - current->state = TASK_UNINTERRUPTIBLE; - add_wait_queue(&pkmap_map_wait, &wait); - spin_unlock(&kmap_lock); - schedule(); - remove_wait_queue(&pkmap_map_wait, &wait); - spin_lock(&kmap_lock); - - /* Somebody else might have mapped it while we slept */ - if (page->virtual) - return (unsigned long) page->virtual; - - /* Re-start */ - goto start; - } - } - vaddr = PKMAP_ADDR(last_pkmap_nr); - set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); - - pkmap_count[last_pkmap_nr] = 1; - page->virtual = (void *) vaddr; - - return vaddr; -} - -void kmap_flush_unused(void) -{ - spin_lock(&kmap_lock); - flush_all_zero_pkmaps(); - spin_unlock(&kmap_lock); -} - -void fastcall *kmap_high(struct page *page, int nonblocking) -{ - unsigned long vaddr; - - /* - * For highmem pages, we can't trust "virtual" until - * after we have the lock. - * - * We cannot call this from interrupts, as it may block - */ - spin_lock(&kmap_lock); - vaddr = (unsigned long) page->virtual; - if (!vaddr) { - vaddr = map_new_virtual(page, nonblocking); - if (!vaddr) - goto out; - } - pkmap_count[PKMAP_NR(vaddr)]++; - if (pkmap_count[PKMAP_NR(vaddr)] < 2) - BUG(); - out: - spin_unlock(&kmap_lock); - return (void*) vaddr; -} - -void fastcall kunmap_high(struct page *page) -{ - unsigned long vaddr; - unsigned long nr; - int need_wakeup; - - spin_lock(&kmap_lock); - vaddr = (unsigned long) page->virtual; - if (!vaddr) - BUG(); - nr = PKMAP_NR(vaddr); - - /* - * A count must never go down to zero - * without a TLB flush! - */ - need_wakeup = 0; - switch (--pkmap_count[nr]) { - case 0: - BUG(); - case 1: - /* - * Avoid an unnecessary wake_up() function call. - * The common case is pkmap_count[] == 1, but - * no waiters. - * The tasks queued in the wait-queue are guarded - * by both the lock in the wait-queue-head and by - * the kmap_lock. As the kmap_lock is held here, - * no need for the wait-queue-head's lock. Simply - * test if the queue is empty. - */ - need_wakeup = waitqueue_active(&pkmap_map_wait); - } - spin_unlock(&kmap_lock); - - /* do wake-up, if needed, race-free outside of the spin lock */ - if (need_wakeup) - wake_up(&pkmap_map_wait); -} - -#define POOL_SIZE 32 - -/* - * This lock gets no contention at all, normally. - */ -static spinlock_t emergency_lock = SPIN_LOCK_UNLOCKED; - -int nr_emergency_pages; -static LIST_HEAD(emergency_pages); - -int nr_emergency_bhs; -static LIST_HEAD(emergency_bhs); - -/* - * Simple bounce buffer support for highmem pages. - * This will be moved to the block layer in 2.5. - */ - -static inline void copy_from_high_bh (struct buffer_head *to, - struct buffer_head *from) -{ - struct page *p_from; - char *vfrom; - - p_from = from->b_page; - - vfrom = kmap_atomic(p_from, KM_USER0); - memcpy(to->b_data, vfrom + bh_offset(from), to->b_size); - kunmap_atomic(vfrom, KM_USER0); -} - -static inline void copy_to_high_bh_irq (struct buffer_head *to, - struct buffer_head *from) -{ - struct page *p_to; - char *vto; - unsigned long flags; - - p_to = to->b_page; - __save_flags(flags); - __cli(); - vto = kmap_atomic(p_to, KM_BOUNCE_READ); - memcpy(vto + bh_offset(to), from->b_data, to->b_size); - kunmap_atomic(vto, KM_BOUNCE_READ); - __restore_flags(flags); -} - -static inline void bounce_end_io (struct buffer_head *bh, int uptodate) -{ - struct page *page; - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); - unsigned long flags; - - bh_orig->b_end_io(bh_orig, uptodate); - - page = bh->b_page; - - spin_lock_irqsave(&emergency_lock, flags); - if (nr_emergency_pages >= POOL_SIZE) - __free_page(page); - else { - /* - * We are abusing page->list to manage - * the highmem emergency pool: - */ - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - - if (nr_emergency_bhs >= POOL_SIZE) { -#ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); -#endif - kmem_cache_free(bh_cachep, bh); - } else { - /* - * Ditto in the bh case, here we abuse b_inode_buffers: - */ - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } - spin_unlock_irqrestore(&emergency_lock, flags); -} - -static __init int init_emergency_pool(void) -{ - struct sysinfo i; - si_meminfo(&i); - si_swapinfo(&i); - - if (!i.totalhigh) - return 0; - - spin_lock_irq(&emergency_lock); - while (nr_emergency_pages < POOL_SIZE) { - struct page * page = alloc_page(GFP_ATOMIC); - if (!page) { - printk("couldn't refill highmem emergency pages"); - break; - } - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - while (nr_emergency_bhs < POOL_SIZE) { - struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); - if (!bh) { - printk("couldn't refill highmem emergency bhs"); - break; - } - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } - spin_unlock_irq(&emergency_lock); - printk("allocated %d pages and %d bhs reserved for the highmem bounces\n", - nr_emergency_pages, nr_emergency_bhs); - - return 0; -} - -__initcall(init_emergency_pool); - -static void bounce_end_io_write (struct buffer_head *bh, int uptodate) -{ - bounce_end_io(bh, uptodate); -} - -static void bounce_end_io_read (struct buffer_head *bh, int uptodate) -{ - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); - - if (uptodate) - copy_to_high_bh_irq(bh_orig, bh); - bounce_end_io(bh, uptodate); -} - -struct page *alloc_bounce_page (void) -{ - struct list_head *tmp; - struct page *page; - - page = alloc_page(GFP_NOHIGHIO); - if (page) - return page; - /* - * No luck. First, kick the VM so it doesn't idle around while - * we are using up our emergency rations. - */ - wakeup_bdflush(); - -repeat_alloc: - /* - * Try to allocate from the emergency pool. - */ - tmp = &emergency_pages; - spin_lock_irq(&emergency_lock); - if (!list_empty(tmp)) { - page = list_entry(tmp->next, struct page, list); - list_del(tmp->next); - nr_emergency_pages--; - } - spin_unlock_irq(&emergency_lock); - if (page) - return page; - - /* we need to wait I/O completion */ - run_task_queue(&tq_disk); - - yield(); - goto repeat_alloc; -} - -struct buffer_head *alloc_bounce_bh (void) -{ - struct list_head *tmp; - struct buffer_head *bh; - - bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO); - if (bh) - return bh; - /* - * No luck. First, kick the VM so it doesn't idle around while - * we are using up our emergency rations. - */ - wakeup_bdflush(); - -repeat_alloc: - /* - * Try to allocate from the emergency pool. - */ - tmp = &emergency_bhs; - spin_lock_irq(&emergency_lock); - if (!list_empty(tmp)) { - bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); - list_del(tmp->next); - nr_emergency_bhs--; - } - spin_unlock_irq(&emergency_lock); - if (bh) - return bh; - - /* we need to wait I/O completion */ - run_task_queue(&tq_disk); - - yield(); - goto repeat_alloc; -} - -struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig) -{ - struct page *page; - struct buffer_head *bh; - - if (!PageHighMem(bh_orig->b_page)) - return bh_orig; - - bh = alloc_bounce_bh(); - /* - * This is wasteful for 1k buffers, but this is a stopgap measure - * and we are being ineffective anyway. This approach simplifies - * things immensly. On boxes with more than 4GB RAM this should - * not be an issue anyway. - */ - page = alloc_bounce_page(); - - set_bh_page(bh, page, 0); - - bh->b_next = NULL; - bh->b_blocknr = bh_orig->b_blocknr; - bh->b_size = bh_orig->b_size; - bh->b_list = -1; - bh->b_dev = bh_orig->b_dev; - bh->b_count = bh_orig->b_count; - bh->b_rdev = bh_orig->b_rdev; - bh->b_state = bh_orig->b_state; -#ifdef HIGHMEM_DEBUG - bh->b_flushtime = jiffies; - bh->b_next_free = NULL; - bh->b_prev_free = NULL; - /* bh->b_this_page */ - bh->b_reqnext = NULL; - bh->b_pprev = NULL; -#endif - /* bh->b_page */ - if (rw == WRITE) { - bh->b_end_io = bounce_end_io_write; - copy_from_high_bh(bh, bh_orig); - } else - bh->b_end_io = bounce_end_io_read; - bh->b_private = (void *)bh_orig; - bh->b_rsector = bh_orig->b_rsector; -#ifdef HIGHMEM_DEBUG - memset(&bh->b_wait, -1, sizeof(bh->b_wait)); -#endif - - return bh; -} - diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/mm/memory.c --- a/linux-2.4-xen-sparse/mm/memory.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,1534 +0,0 @@ -/* - * linux/mm/memory.c - * - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - */ - -/* - * demand-loading started 01.12.91 - seems it is high on the list of - * things wanted, and it should be easy to implement. - Linus - */ - -/* - * Ok, demand-loading was easy, shared pages a little bit tricker. Shared - * pages started 02.12.91, seems to work. - Linus. - * - * Tested sharing by executing about 30 /bin/sh: under the old kernel it - * would have taken more than the 6M I have free, but it worked well as - * far as I could see. - * - * Also corrected some "invalidate()"s - I wasn't doing enough of them. - */ - -/* - * Real VM (paging to/from disk) started 18.12.91. Much more work and - * thought has to go into this. Oh, well.. - * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why. - * Found it. Everything seems to work now. - * 20.12.91 - Ok, making the swap-device changeable like the root. - */ - -/* - * 05.04.94 - Multi-page memory management added for v1.1. - * Idea by Alex Bligh (alex@xxxxxxxxxxxxxxx) - * - * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG - * (Gerhard.Wichert@xxxxxxxxxxxxxx) - */ - -#include <linux/mm.h> -#include <linux/mman.h> -#include <linux/swap.h> -#include <linux/smp_lock.h> -#include <linux/swapctl.h> -#include <linux/iobuf.h> -#include <linux/highmem.h> -#include <linux/pagemap.h> -#include <linux/module.h> - -#include <asm/pgalloc.h> -#include <asm/uaccess.h> -#include <asm/tlb.h> - -unsigned long max_mapnr; -unsigned long num_physpages; -unsigned long num_mappedpages; -void * high_memory; -struct page *highmem_start_page; - -/* - * We special-case the C-O-W ZERO_PAGE, because it's such - * a common occurrence (no need to read the page to know - * that it's zero - better for the cache and memory subsystem). - */ -static inline void copy_cow_page(struct page * from, struct page * to, unsigned long address) -{ - if (from == ZERO_PAGE(address)) { - clear_user_highpage(to, address); - return; - } - copy_user_highpage(to, from, address); -} - -mem_map_t * mem_map; - -/* - * Called by TLB shootdown - */ -void __free_pte(pte_t pte) -{ - struct page *page = pte_page(pte); - if ((!VALID_PAGE(page)) || PageReserved(page)) - return; - if (pte_dirty(pte)) - set_page_dirty(page); - free_page_and_swap_cache(page); -} - - -/* - * Note: this doesn't free the actual pages themselves. That - * has been handled earlier when unmapping all the memory regions. - */ -static inline void free_one_pmd(pmd_t * dir) -{ - pte_t * pte; - - if (pmd_none(*dir)) - return; - if (pmd_bad(*dir)) { - pmd_ERROR(*dir); - pmd_clear(dir); - return; - } - pte = pte_offset(dir, 0); - pmd_clear(dir); - pte_free(pte); -} - -static inline void free_one_pgd(pgd_t * dir) -{ - int j; - pmd_t * pmd; - - if (pgd_none(*dir)) - return; - if (pgd_bad(*dir)) { - pgd_ERROR(*dir); - pgd_clear(dir); - return; - } - pmd = pmd_offset(dir, 0); - pgd_clear(dir); - for (j = 0; j < PTRS_PER_PMD ; j++) { - prefetchw(pmd+j+(PREFETCH_STRIDE/16)); - free_one_pmd(pmd+j); - } - pmd_free(pmd); -} - -/* Low and high watermarks for page table cache. - The system should try to have pgt_water[0] <= cache elements <= pgt_water[1] - */ -int pgt_cache_water[2] = { 25, 50 }; - -/* Returns the number of pages freed */ -int check_pgt_cache(void) -{ - return do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]); -} - - -/* - * This function clears all user-level page tables of a process - this - * is needed by execve(), so that old pages aren't in the way. - */ -void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr) -{ - pgd_t * page_dir = mm->pgd; - - spin_lock(&mm->page_table_lock); - page_dir += first; - do { - free_one_pgd(page_dir); - page_dir++; - } while (--nr); - spin_unlock(&mm->page_table_lock); - - /* keep the page table cache within bounds */ - check_pgt_cache(); -} - -#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t)) -#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t)) - -/* - * copy one vm_area from one task to the other. Assumes the page tables - * already present in the new task to be cleared in the whole range - * covered by this vma. - * - * 08Jan98 Merged into one routine from several inline routines to reduce - * variable count and make things faster. -jj - * - * dst->page_table_lock is held on entry and exit, - * but may be dropped within pmd_alloc() and pte_alloc(). - */ -int copy_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma) -{ - pgd_t * src_pgd, * dst_pgd; - unsigned long address = vma->vm_start; - unsigned long end = vma->vm_end; - unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; - - src_pgd = pgd_offset(src, address)-1; - dst_pgd = pgd_offset(dst, address)-1; - - for (;;) { - pmd_t * src_pmd, * dst_pmd; - - src_pgd++; dst_pgd++; - - /* copy_pmd_range */ - - if (pgd_none(*src_pgd)) - goto skip_copy_pmd_range; - if (pgd_bad(*src_pgd)) { - pgd_ERROR(*src_pgd); - pgd_clear(src_pgd); -skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; - if (!address || (address >= end)) - goto out; - continue; - } - - src_pmd = pmd_offset(src_pgd, address); - dst_pmd = pmd_alloc(dst, dst_pgd, address); - if (!dst_pmd) - goto nomem; - - do { - pte_t * src_pte, * dst_pte; - - /* copy_pte_range */ - - if (pmd_none(*src_pmd)) - goto skip_copy_pte_range; - if (pmd_bad(*src_pmd)) { - pmd_ERROR(*src_pmd); - pmd_clear(src_pmd); -skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; - if (address >= end) - goto out; - goto cont_copy_pmd_range; - } - - src_pte = pte_offset(src_pmd, address); - dst_pte = pte_alloc(dst, dst_pmd, address); - if (!dst_pte) - goto nomem; - - spin_lock(&src->page_table_lock); - do { - pte_t pte = *src_pte; - struct page *ptepage; - - /* copy_one_pte */ - - if (pte_none(pte)) - goto cont_copy_pte_range_noset; - if (!pte_present(pte)) { - swap_duplicate(pte_to_swp_entry(pte)); - goto cont_copy_pte_range; - } - ptepage = pte_page(pte); - if ((!VALID_PAGE(ptepage)) || - PageReserved(ptepage)) - goto cont_copy_pte_range; - - /* If it's a COW mapping, write protect it both in the parent and the child */ - if (cow && pte_write(pte)) { - ptep_set_wrprotect(src_pte); - pte = *src_pte; - } - - /* If it's a shared mapping, mark it clean in the child */ - if (vma->vm_flags & VM_SHARED) - pte = pte_mkclean(pte); - pte = pte_mkold(pte); - get_page(ptepage); - dst->rss++; - -cont_copy_pte_range: set_pte(dst_pte, pte); -cont_copy_pte_range_noset: address += PAGE_SIZE; - if (address >= end) - goto out_unlock; - src_pte++; - dst_pte++; - } while ((unsigned long)src_pte & PTE_TABLE_MASK); - spin_unlock(&src->page_table_lock); - -cont_copy_pmd_range: src_pmd++; - dst_pmd++; - } while ((unsigned long)src_pmd & PMD_TABLE_MASK); - } -out_unlock: - spin_unlock(&src->page_table_lock); -out: - return 0; -nomem: - return -ENOMEM; -} - -/* - * Return indicates whether a page was freed so caller can adjust rss - */ -static inline void forget_pte(pte_t page) -{ - if (!pte_none(page)) { - printk("forget_pte: old mapping existed!\n"); - BUG(); - } -} - -static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) -{ - unsigned long offset; - pte_t * ptep; - int freed = 0; - - if (pmd_none(*pmd)) - return 0; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - return 0; - } - ptep = pte_offset(pmd, address); - offset = address & ~PMD_MASK; - if (offset + size > PMD_SIZE) - size = PMD_SIZE - offset; - size &= PAGE_MASK; - for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { - pte_t pte = *ptep; - if (pte_none(pte)) - continue; - if (pte_present(pte)) { - struct page *page = pte_page(pte); - if (VALID_PAGE(page) && !PageReserved(page)) - freed ++; - /* This will eventually call __free_pte on the pte. */ - tlb_remove_page(tlb, ptep, address + offset); - } else { - free_swap_and_cache(pte_to_swp_entry(pte)); - pte_clear(ptep); - } - } - - return freed; -} - -static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size) -{ - pmd_t * pmd; - unsigned long end; - int freed; - - if (pgd_none(*dir)) - return 0; - if (pgd_bad(*dir)) { - pgd_ERROR(*dir); - pgd_clear(dir); - return 0; - } - pmd = pmd_offset(dir, address); - end = address + size; - if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) - end = ((address + PGDIR_SIZE) & PGDIR_MASK); - freed = 0; - do { - freed += zap_pte_range(tlb, pmd, address, end - address); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address < end); - return freed; -} - -/* - * remove user pages in a given range. - */ -void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) -{ - mmu_gather_t *tlb; - pgd_t * dir; - unsigned long start = address, end = address + size; - int freed = 0; - - dir = pgd_offset(mm, address); - - /* - * This is a long-lived spinlock. That's fine. - * There's no contention, because the page table - * lock only protects against kswapd anyway, and - * even if kswapd happened to be looking at this - * process we _want_ it to get stuck. - */ - if (address >= end) - BUG(); - spin_lock(&mm->page_table_lock); - flush_cache_range(mm, address, end); - tlb = tlb_gather_mmu(mm); - - do { - freed += zap_pmd_range(tlb, dir, address, end - address); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - - /* this will flush any remaining tlb entries */ - tlb_finish_mmu(tlb, start, end); - - /* - * Update rss for the mm_struct (not necessarily current->mm) - * Notice that rss is an unsigned long. - */ - if (mm->rss > freed) - mm->rss -= freed; - else - mm->rss = 0; - spin_unlock(&mm->page_table_lock); -} - -/* - * Do a quick page-table lookup for a single page. - */ -static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *ptep, pte; - - pgd = pgd_offset(mm, address); - if (pgd_none(*pgd) || pgd_bad(*pgd)) - goto out; - - pmd = pmd_offset(pgd, address); - if (pmd_none(*pmd) || pmd_bad(*pmd)) - goto out; - - ptep = pte_offset(pmd, address); - if (!ptep) - goto out; - - pte = *ptep; - if (pte_present(pte)) { - if (!write || - (pte_write(pte) && pte_dirty(pte))) - return pte_page(pte); - } - -out: - return 0; -} - -/* - * Given a physical address, is there a useful struct page pointing to - * it? This may become more complex in the future if we start dealing - * with IO-aperture pages in kiobufs. - */ - -static inline struct page * get_page_map(struct page *page) -{ - if (!VALID_PAGE(page)) - return 0; - return page; -} - -/* - * Please read Documentation/cachetlb.txt before using this function, - * accessing foreign memory spaces can cause cache coherency problems. - * - * Accessing a VM_IO area is even more dangerous, therefore the function - * fails if pages is != NULL and a VM_IO area is found. - */ -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) -{ - int i; - unsigned int flags; - - /* - * Require read or write permissions. - * If 'force' is set, we only require the "MAY" flags. - */ - flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); - flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); - i = 0; - - do { - struct vm_area_struct * vma; - - vma = find_extend_vma(mm, start); - - if ( !vma || (pages && vma->vm_flags & VM_IO) || !(flags & vma->vm_flags) ) - return i ? : -EFAULT; - - spin_lock(&mm->page_table_lock); - do { - struct page *map; - while (!(map = follow_page(mm, start, write))) { - spin_unlock(&mm->page_table_lock); - switch (handle_mm_fault(mm, vma, start, write)) { - case 1: - tsk->min_flt++; - break; - case 2: - tsk->maj_flt++; - break; - case 0: - if (i) return i; - return -EFAULT; - default: - if (i) return i; - return -ENOMEM; - } - spin_lock(&mm->page_table_lock); - } - if (pages) { - pages[i] = get_page_map(map); - /* FIXME: call the correct function, - * depending on the type of the found page - */ - if (!pages[i] || PageReserved(pages[i])) { - if (pages[i] != ZERO_PAGE(start)) - goto bad_page; - } else - page_cache_get(pages[i]); - } - if (vmas) - vmas[i] = vma; - i++; - start += PAGE_SIZE; - len--; - } while(len && start < vma->vm_end); - spin_unlock(&mm->page_table_lock); - } while(len); -out: - return i; - - /* - * We found an invalid page in the VMA. Release all we have - * so far and fail. - */ -bad_page: - spin_unlock(&mm->page_table_lock); - while (i--) - page_cache_release(pages[i]); - i = -EFAULT; - goto out; -} - -EXPORT_SYMBOL(get_user_pages); - -/* - * Force in an entire range of pages from the current process's user VA, - * and pin them in physical memory. - */ -#define dprintk(x...) - -int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) -{ - int pgcount, err; - struct mm_struct * mm; - - /* Make sure the iobuf is not already mapped somewhere. */ - if (iobuf->nr_pages) - return -EINVAL; - - mm = current->mm; - dprintk ("map_user_kiobuf: begin\n"); - - pgcount = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE; - /* mapping 0 bytes is not permitted */ - if (!pgcount) BUG(); - err = expand_kiobuf(iobuf, pgcount); - if (err) - return err; - - iobuf->locked = 0; - iobuf->offset = va & (PAGE_SIZE-1); - iobuf->length = len; - - /* Try to fault in all of the necessary pages */ - down_read(&mm->mmap_sem); - /* rw==READ means read from disk, write into memory area */ - err = get_user_pages(current, mm, va, pgcount, - (rw==READ), 0, iobuf->maplist, NULL); - up_read(&mm->mmap_sem); - if (err < 0) { - unmap_kiobuf(iobuf); - dprintk ("map_user_kiobuf: end %d\n", err); - return err; - } - iobuf->nr_pages = err; - while (pgcount--) { - /* FIXME: flush superflous for rw==READ, - * probably wrong function for rw==WRITE - */ - flush_dcache_page(iobuf->maplist[pgcount]); - } - dprintk ("map_user_kiobuf: end OK\n"); - return 0; -} - -/* - * Mark all of the pages in a kiobuf as dirty - * - * We need to be able to deal with short reads from disk: if an IO error - * occurs, the number of bytes read into memory may be less than the - * size of the kiobuf, so we have to stop marking pages dirty once the - * requested byte count has been reached. - * - * Must be called from process context - set_page_dirty() takes VFS locks. - */ - -void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes) -{ - int index, offset, remaining; - struct page *page; - - index = iobuf->offset >> PAGE_SHIFT; - offset = iobuf->offset & ~PAGE_MASK; - remaining = bytes; - if (remaining > iobuf->length) - remaining = iobuf->length; - - while (remaining > 0 && index < iobuf->nr_pages) { - page = iobuf->maplist[index]; - - if (!PageReserved(page)) - set_page_dirty(page); - - remaining -= (PAGE_SIZE - offset); - offset = 0; - index++; - } -} - -/* - * Unmap all of the pages referenced by a kiobuf. We release the pages, - * and unlock them if they were locked. - */ - -void unmap_kiobuf (struct kiobuf *iobuf) -{ - int i; - struct page *map; - - for (i = 0; i < iobuf->nr_pages; i++) { - map = iobuf->maplist[i]; - if (map) { - if (iobuf->locked) - UnlockPage(map); - /* FIXME: cache flush missing for rw==READ - * FIXME: call the correct reference counting function - */ - page_cache_release(map); - } - } - - iobuf->nr_pages = 0; - iobuf->locked = 0; -} - - -/* - * Lock down all of the pages of a kiovec for IO. - * - * If any page is mapped twice in the kiovec, we return the error -EINVAL. - * - * The optional wait parameter causes the lock call to block until all - * pages can be locked if set. If wait==0, the lock operation is - * aborted if any locked pages are found and -EAGAIN is returned. - */ - -int lock_kiovec(int nr, struct kiobuf *iovec[], int wait) -{ - struct kiobuf *iobuf; - int i, j; - struct page *page, **ppage; - int doublepage = 0; - int repeat = 0; - - repeat: - - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - - if (iobuf->locked) - continue; - - ppage = iobuf->maplist; - for (j = 0; j < iobuf->nr_pages; ppage++, j++) { - page = *ppage; - if (!page) - continue; - - if (TryLockPage(page)) { - while (j--) { - struct page *tmp = *--ppage; - if (tmp) - UnlockPage(tmp); - } - goto retry; - } - } - iobuf->locked = 1; - } - - return 0; - - retry: - - /* - * We couldn't lock one of the pages. Undo the locking so far, - * wait on the page we got to, and try again. - */ - - unlock_kiovec(nr, iovec); - if (!wait) - return -EAGAIN; - - /* - * Did the release also unlock the page we got stuck on? - */ - if (!PageLocked(page)) { - /* - * If so, we may well have the page mapped twice - * in the IO address range. Bad news. Of - * course, it _might_ just be a coincidence, - * but if it happens more than once, chances - * are we have a double-mapped page. - */ - if (++doublepage >= 3) - return -EINVAL; - - /* Try again... */ - wait_on_page(page); - } - - if (++repeat < 16) - goto repeat; - return -EAGAIN; -} - -/* - * Unlock all of the pages of a kiovec after IO. - */ - -int unlock_kiovec(int nr, struct kiobuf *iovec[]) -{ - struct kiobuf *iobuf; - int i, j; - struct page *page, **ppage; - - for (i = 0; i < nr; i++) { - iobuf = iovec[i]; - - if (!iobuf->locked) - continue; - iobuf->locked = 0; - - ppage = iobuf->maplist; - for (j = 0; j < iobuf->nr_pages; ppage++, j++) { - page = *ppage; - if (!page) - continue; - UnlockPage(page); - } - } - return 0; -} - -static inline void zeromap_pte_range(pte_t * pte, unsigned long address, - unsigned long size, pgprot_t prot) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot)); - pte_t oldpage = ptep_get_and_clear(pte); - set_pte(pte, zero_pte); - forget_pte(oldpage); - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, - unsigned long size, pgprot_t prot) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - do { - pte_t * pte = pte_alloc(mm, pmd, address); - if (!pte) - return -ENOMEM; - zeromap_pte_range(pte, address, end - address, prot); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot) -{ - int error = 0; - pgd_t * dir; - unsigned long beg = address; - unsigned long end = address + size; - struct mm_struct *mm = current->mm; - - dir = pgd_offset(mm, address); - flush_cache_range(mm, beg, end); - if (address >= end) - BUG(); - - spin_lock(&mm->page_table_lock); - do { - pmd_t *pmd = pmd_alloc(mm, dir, address); - error = -ENOMEM; - if (!pmd) - break; - error = zeromap_pmd_range(mm, pmd, address, end - address, prot); - if (error) - break; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - spin_unlock(&mm->page_table_lock); - flush_tlb_range(mm, beg, end); - return error; -} - -/* - * maps a range of physical memory into the requested pages. the old - * mappings are removed. any references to nonexistent pages results - * in null mappings (currently treated as "copy-on-access") - */ -static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size, - unsigned long phys_addr, pgprot_t prot) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - struct page *page; - pte_t oldpage; - oldpage = ptep_get_and_clear(pte); - - page = virt_to_page(__va(phys_addr)); - if ((!VALID_PAGE(page)) || PageReserved(page)) - set_pte(pte, mk_pte_phys(phys_addr, prot)); - forget_pte(oldpage); - address += PAGE_SIZE; - phys_addr += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, pgprot_t prot) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - phys_addr -= address; - do { - pte_t * pte = pte_alloc(mm, pmd, address); - if (!pte) - return -ENOMEM; - remap_pte_range(pte, address, end - address, address + phys_addr, prot); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -/* Note: this is only safe if the mm semaphore is held when called. */ -int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot) -{ - int error = 0; - pgd_t * dir; - unsigned long beg = from; - unsigned long end = from + size; - struct mm_struct *mm = current->mm; - - phys_addr -= from; - dir = pgd_offset(mm, from); - flush_cache_range(mm, beg, end); - if (from >= end) - BUG(); - - spin_lock(&mm->page_table_lock); - do { - pmd_t *pmd = pmd_alloc(mm, dir, from); - error = -ENOMEM; - if (!pmd) - break; - error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot); - if (error) - break; - from = (from + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (from && (from < end)); - spin_unlock(&mm->page_table_lock); - flush_tlb_range(mm, beg, end); - return error; -} - -/* - * Establish a new mapping: - * - flush the old one - * - update the page tables - * - inform the TLB about the new one - * - * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock - */ -static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry) -{ -#ifdef CONFIG_XEN - if ( likely(vma->vm_mm == current->mm) ) { - HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG|UVMF_LOCAL); - } else { - set_pte(page_table, entry); - flush_tlb_page(vma, address); - } -#else - set_pte(page_table, entry); - flush_tlb_page(vma, address); -#endif - update_mmu_cache(vma, address, entry); -} - -/* - * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock - */ -static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, - pte_t *page_table) -{ - flush_page_to_ram(new_page); - flush_cache_page(vma, address); - establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); -} - -/* - * This routine handles present pages, when users try to write - * to a shared page. It is done by copying the page to a new address - * and decrementing the shared-page counter for the old page. - * - * Goto-purists beware: the only reason for goto's here is that it results - * in better assembly code.. The "default" path will see no jumps at all. - * - * Note that this routine assumes that the protection checks have been - * done by the caller (the low-level page fault routine in most cases). - * Thus we can safely just mark it writable once we've done any necessary - * COW. - * - * We also mark the page dirty at this point even though the page will - * change only once the write actually happens. This avoids a few races, - * and potentially makes it more efficient. - * - * We hold the mm semaphore and the page_table_lock on entry and exit - * with the page_table_lock released. - */ -static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, - unsigned long address, pte_t *page_table, pte_t pte) -{ - struct page *old_page, *new_page; - - old_page = pte_page(pte); - if (!VALID_PAGE(old_page)) - goto bad_wp_page; - - if (!TryLockPage(old_page)) { - int reuse = can_share_swap_page(old_page); - unlock_page(old_page); - if (reuse) { - flush_cache_page(vma, address); - establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); - spin_unlock(&mm->page_table_lock); - return 1; /* Minor fault */ - } - } - - /* - * Ok, we need to copy. Oh, well.. - */ - page_cache_get(old_page); - spin_unlock(&mm->page_table_lock); - - new_page = alloc_page(GFP_HIGHUSER); - if (!new_page) - goto no_mem; - copy_cow_page(old_page,new_page,address); - - /* - * Re-check the pte - we dropped the lock - */ - spin_lock(&mm->page_table_lock); - if (pte_same(*page_table, pte)) { - if (PageReserved(old_page)) - ++mm->rss; - break_cow(vma, new_page, address, page_table); - if (vm_anon_lru) - lru_cache_add(new_page); - - /* Free the old page.. */ - new_page = old_page; - } - spin_unlock(&mm->page_table_lock); - page_cache_release(new_page); - page_cache_release(old_page); - return 1; /* Minor fault */ - -bad_wp_page: - spin_unlock(&mm->page_table_lock); - printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); - return -1; -no_mem: - page_cache_release(old_page); - return -1; -} - -static void vmtruncate_list(struct vm_area_struct *mpnt, unsigned long pgoff) -{ - do { - struct mm_struct *mm = mpnt->vm_mm; - unsigned long start = mpnt->vm_start; - unsigned long end = mpnt->vm_end; - unsigned long len = end - start; - unsigned long diff; - - /* mapping wholly truncated? */ - if (mpnt->vm_pgoff >= pgoff) { - zap_page_range(mm, start, len); - continue; - } - - /* mapping wholly unaffected? */ - len = len >> PAGE_SHIFT; - diff = pgoff - mpnt->vm_pgoff; - if (diff >= len) - continue; - - /* Ok, partially affected.. */ - start += diff << PAGE_SHIFT; - len = (len - diff) << PAGE_SHIFT; - zap_page_range(mm, start, len); - } while ((mpnt = mpnt->vm_next_share) != NULL); -} - -/* - * Handle all mappings that got truncated by a "truncate()" - * system call. - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode * inode, loff_t offset) -{ - unsigned long pgoff; - struct address_space *mapping = inode->i_mapping; - unsigned long limit; - - if (inode->i_size < offset) - goto do_expand; - inode->i_size = offset; - spin_lock(&mapping->i_shared_lock); - if (!mapping->i_mmap && !mapping->i_mmap_shared) - goto out_unlock; - - pgoff = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (mapping->i_mmap != NULL) - vmtruncate_list(mapping->i_mmap, pgoff); - if (mapping->i_mmap_shared != NULL) - vmtruncate_list(mapping->i_mmap_shared, pgoff); - -out_unlock: - spin_unlock(&mapping->i_shared_lock); - truncate_inode_pages(mapping, offset); - goto out_truncate; - -do_expand: - limit = current->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out; - inode->i_size = offset; - -out_truncate: - if (inode->i_op && inode->i_op->truncate) { - lock_kernel(); - inode->i_op->truncate(inode); - unlock_kernel(); - } - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out: - return -EFBIG; -} - -/* - * Primitive swap readahead code. We simply read an aligned block of - * (1 << page_cluster) entries in the swap area. This method is chosen - * because it doesn't cost us any seek time. We also make sure to queue - * the 'original' request together with the readahead ones... - */ -void swapin_readahead(swp_entry_t entry) -{ - int i, num; - struct page *new_page; - unsigned long offset; - - /* - * Get the number of handles we should do readahead io to. - */ - num = valid_swaphandles(entry, &offset); - for (i = 0; i < num; offset++, i++) { - /* Ok, do the async read-ahead now */ - new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset)); - if (!new_page) - break; - page_cache_release(new_page); - } - return; -} - -/* - * We hold the mm semaphore and the page_table_lock on entry and - * should release the pagetable lock on exit.. - */ -static int do_swap_page(struct mm_struct * mm, - struct vm_area_struct * vma, unsigned long address, - pte_t * page_table, pte_t orig_pte, int write_access) -{ - struct page *page; - swp_entry_t entry = pte_to_swp_entry(orig_pte); - pte_t pte; - int ret = 1; - - spin_unlock(&mm->page_table_lock); - page = lookup_swap_cache(entry); - if (!page) { - swapin_readahead(entry); - page = read_swap_cache_async(entry); - if (!page) { - /* - * Back out if somebody else faulted in this pte while - * we released the page table lock. - */ - int retval; - spin_lock(&mm->page_table_lock); - retval = pte_same(*page_table, orig_pte) ? -1 : 1; - spin_unlock(&mm->page_table_lock); - return retval; - } - - /* Had to read the page from swap area: Major fault */ - ret = 2; - } - - mark_page_accessed(page); - - lock_page(page); - - /* - * Back out if somebody else faulted in this pte while we - * released the page table lock. - */ - spin_lock(&mm->page_table_lock); - if (!pte_same(*page_table, orig_pte)) { - spin_unlock(&mm->page_table_lock); - unlock_page(page); - page_cache_release(page); - return 1; - } - - /* The page isn't present yet, go ahead with the fault. */ - - swap_free(entry); - if (vm_swap_full()) - remove_exclusive_swap_page(page); - - mm->rss++; - pte = mk_pte(page, vma->vm_page_prot); - if (write_access && can_share_swap_page(page)) - pte = pte_mkdirty(pte_mkwrite(pte)); - unlock_page(page); - - flush_page_to_ram(page); - flush_icache_page(vma, page); -#ifdef CONFIG_XEN - if ( likely(vma->vm_mm == current->mm) ) - HYPERVISOR_update_va_mapping(address, pte, 0); - else - set_pte(page_table, pte); -#else - set_pte(page_table, pte); -#endif - - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, address, pte); - spin_unlock(&mm->page_table_lock); - return ret; -} - -/* - * We are called with the MM semaphore and page_table_lock - * spinlock held to protect against concurrent faults in - * multithreaded programs. - */ -static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) -{ - pte_t entry; - - /* Read-only mapping of ZERO_PAGE. */ - entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); - - /* ..except if it's a write access */ - if (write_access) { - struct page *page; - - /* Allocate our own private page. */ - spin_unlock(&mm->page_table_lock); - - page = alloc_page(GFP_HIGHUSER); - if (!page) - goto no_mem; - clear_user_highpage(page, addr); - - spin_lock(&mm->page_table_lock); - if (!pte_none(*page_table)) { - page_cache_release(page); - spin_unlock(&mm->page_table_lock); - return 1; - } - mm->rss++; - flush_page_to_ram(page); - entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); - if (vm_anon_lru) - lru_cache_add(page); - mark_page_accessed(page); - } - -#ifdef CONFIG_XEN - if ( likely(vma->vm_mm == current->mm) ) - HYPERVISOR_update_va_mapping(addr, entry, 0); - else - set_pte(page_table, entry); -#else - set_pte(page_table, entry); -#endif - - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, addr, entry); - spin_unlock(&mm->page_table_lock); - return 1; /* Minor fault */ - -no_mem: - return -1; -} - -/* - * do_no_page() tries to create a new page mapping. It aggressively - * tries to share with existing pages, but makes a separate copy if - * the "write_access" parameter is true in order to avoid the next - * page fault. - * - * As this is called only for pages that do not currently exist, we - * do not need to flush old virtual caches or the TLB. - * - * This is called with the MM semaphore held and the page table - * spinlock held. Exit with the spinlock released. - */ -static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, - unsigned long address, int write_access, pte_t *page_table) -{ - struct page * new_page; - pte_t entry; - - if (!vma->vm_ops || !vma->vm_ops->nopage) - return do_anonymous_page(mm, vma, page_table, write_access, address); - spin_unlock(&mm->page_table_lock); - - new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); - - if (new_page == NULL) /* no page was available -- SIGBUS */ - return 0; - if (new_page == NOPAGE_OOM) - return -1; - - /* - * Should we do an early C-O-W break? - */ - if (write_access && !(vma->vm_flags & VM_SHARED)) { - struct page * page = alloc_page(GFP_HIGHUSER); - if (!page) { - page_cache_release(new_page); - return -1; - } - copy_user_highpage(page, new_page, address); - page_cache_release(new_page); - if (vm_anon_lru) - lru_cache_add(page); - new_page = page; - } - - spin_lock(&mm->page_table_lock); - /* - * This silly early PAGE_DIRTY setting removes a race - * due to the bad i386 page protection. But it's valid - * for other architectures too. - * - * Note that if write_access is true, we either now have - * an exclusive copy of the page, or this is a shared mapping, - * so we can make it writable and dirty to avoid having to - * handle that later. - */ - /* Only go through if we didn't race with anybody else... */ - if (pte_none(*page_table)) { - if (!PageReserved(new_page)) - ++mm->rss; - flush_page_to_ram(new_page); - flush_icache_page(vma, new_page); - entry = mk_pte(new_page, vma->vm_page_prot); - if (write_access) - entry = pte_mkwrite(pte_mkdirty(entry)); -#ifdef CONFIG_XEN - if ( likely(vma->vm_mm == current->mm) ) - HYPERVISOR_update_va_mapping(address, entry, 0); - else - set_pte(page_table, entry); -#else - set_pte(page_table, entry); -#endif - } else { - /* One of our sibling threads was faster, back out. */ - page_cache_release(new_page); - spin_unlock(&mm->page_table_lock); - return 1; - } - - /* no need to invalidate: a not-present page shouldn't be cached */ - update_mmu_cache(vma, address, entry); - spin_unlock(&mm->page_table_lock); - return 2; /* Major fault */ -} - -/* - * These routines also need to handle stuff like marking pages dirty - * and/or accessed for architectures that don't do it in hardware (most - * RISC architectures). The early dirtying is also good on the i386. - * - * There is also a hook called "update_mmu_cache()" that architectures - * with external mmu caches can use to update those (ie the Sparc or - * PowerPC hashed page tables that act as extended TLBs). - * - * Note the "page_table_lock". It is to protect against kswapd removing - * pages from under us. Note that kswapd only ever _removes_ pages, never - * adds them. As such, once we have noticed that the page is not present, - * we can drop the lock early. - * - * The adding of pages is protected by the MM semaphore (which we hold), - * so we don't need to worry about a page being suddenly been added into - * our VM. - * - * We enter with the pagetable spinlock held, we are supposed to - * release it when done. - */ -static inline int handle_pte_fault(struct mm_struct *mm, - struct vm_area_struct * vma, unsigned long address, - int write_access, pte_t * pte) -{ - pte_t entry; - - entry = *pte; - if (!pte_present(entry)) { - /* - * If it truly wasn't present, we know that kswapd - * and the PTE updates will not touch it later. So - * drop the lock. - */ - if (pte_none(entry)) - return do_no_page(mm, vma, address, write_access, pte); - return do_swap_page(mm, vma, address, pte, entry, write_access); - } - - if (write_access) { - if (!pte_write(entry)) - return do_wp_page(mm, vma, address, pte, entry); - - entry = pte_mkdirty(entry); - } - entry = pte_mkyoung(entry); - establish_pte(vma, address, pte, entry); - spin_unlock(&mm->page_table_lock); - return 1; -} - -/* - * By the time we get here, we already hold the mm semaphore - */ -int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, - unsigned long address, int write_access) -{ - pgd_t *pgd; - pmd_t *pmd; - - current->state = TASK_RUNNING; - pgd = pgd_offset(mm, address); - - /* - * We need the page table lock to synchronize with kswapd - * and the SMP-safe atomic PTE updates. - */ - spin_lock(&mm->page_table_lock); - pmd = pmd_alloc(mm, pgd, address); - - if (pmd) { - pte_t * pte = pte_alloc(mm, pmd, address); - if (pte) - return handle_pte_fault(mm, vma, address, write_access, pte); - } - spin_unlock(&mm->page_table_lock); - return -1; -} - -/* - * Allocate page middle directory. - * - * We've already handled the fast-path in-line, and we own the - * page table lock. - * - * On a two-level page table, this ends up actually being entirely - * optimized away. - */ -pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) -{ - pmd_t *new; - - /* "fast" allocation can happen without dropping the lock.. */ - new = pmd_alloc_one_fast(mm, address); - if (!new) { - spin_unlock(&mm->page_table_lock); - new = pmd_alloc_one(mm, address); - spin_lock(&mm->page_table_lock); - if (!new) - return NULL; - - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (!pgd_none(*pgd)) { - pmd_free(new); - check_pgt_cache(); - goto out; - } - } - pgd_populate(mm, pgd, new); -out: - return pmd_offset(pgd, address); -} - -/* - * Allocate the page table directory. - * - * We've already handled the fast-path in-line, and we own the - * page table lock. - */ -pte_t fastcall *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) -{ - if (pmd_none(*pmd)) { - pte_t *new; - - /* "fast" allocation can happen without dropping the lock.. */ - new = pte_alloc_one_fast(mm, address); - if (!new) { - spin_unlock(&mm->page_table_lock); - new = pte_alloc_one(mm, address); - spin_lock(&mm->page_table_lock); - if (!new) - return NULL; - - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (!pmd_none(*pmd)) { - pte_free(new); - check_pgt_cache(); - goto out; - } - } - pmd_populate(mm, pmd, new); - } -out: - return pte_offset(pmd, address); -} - -int make_pages_present(unsigned long addr, unsigned long end) -{ - int ret, len, write; - struct vm_area_struct * vma; - - vma = find_vma(current->mm, addr); - write = (vma->vm_flags & VM_WRITE) != 0; - if (addr >= end) - BUG(); - if (end > vma->vm_end) - BUG(); - len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE; - ret = get_user_pages(current, current->mm, addr, - len, write, 0, NULL, NULL); - return ret == len ? 0 : -1; -} - -struct page * vmalloc_to_page(void * vmalloc_addr) -{ - unsigned long addr = (unsigned long) vmalloc_addr; - struct page *page = NULL; - pmd_t *pmd; - pte_t *pte; - pgd_t *pgd; - - pgd = pgd_offset_k(addr); - if (!pgd_none(*pgd)) { - pmd = pmd_offset(pgd, addr); - if (!pmd_none(*pmd)) { - pte = pte_offset(pmd, addr); - if (pte_present(*pte)) { - page = pte_page(*pte); - } - } - } - return page; -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/mm/mprotect.c --- a/linux-2.4-xen-sparse/mm/mprotect.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,344 +0,0 @@ -/* - * linux/mm/mprotect.c - * - * (C) Copyright 1994 Linus Torvalds - */ -#include <linux/slab.h> -#include <linux/smp_lock.h> -#include <linux/shm.h> -#include <linux/mman.h> - -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#include <asm/pgtable.h> - -static inline void change_pte_range(pmd_t * pmd, unsigned long address, - unsigned long size, pgprot_t newprot) -{ - pte_t * pte; - unsigned long end; - - if (pmd_none(*pmd)) - return; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - return; - } - pte = pte_offset(pmd, address); - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - if (pte_present(*pte)) { - pte_t entry; - - /* Avoid an SMP race with hardware updated dirty/clean - * bits by wiping the pte and then setting the new pte - * into place. - */ - entry = ptep_get_and_clear(pte); - set_pte(pte, pte_modify(entry, newprot)); - } - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline void change_pmd_range(pgd_t * pgd, unsigned long address, - unsigned long size, pgprot_t newprot) -{ - pmd_t * pmd; - unsigned long end; - - if (pgd_none(*pgd)) - return; - if (pgd_bad(*pgd)) { - pgd_ERROR(*pgd); - pgd_clear(pgd); - return; - } - pmd = pmd_offset(pgd, address); - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - do { - change_pte_range(pmd, address, end - address, newprot); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); -} - -static void change_protection(unsigned long start, unsigned long end, pgprot_t newprot) -{ - pgd_t *dir; - unsigned long beg = start; - - dir = pgd_offset(current->mm, start); - flush_cache_range(current->mm, beg, end); - if (start >= end) - BUG(); - spin_lock(&current->mm->page_table_lock); - do { - change_pmd_range(dir, start, end - start, newprot); - start = (start + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (start && (start < end)); - spin_unlock(&current->mm->page_table_lock); - flush_tlb_range(current->mm, beg, end); - return; -} - -static inline int mprotect_fixup_all(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * prev = *pprev; - struct mm_struct * mm = vma->vm_mm; - - if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) && - !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&mm->page_table_lock); - prev->vm_end = vma->vm_end; - __vma_unlink(mm, vma, prev); - spin_unlock(&mm->page_table_lock); - - kmem_cache_free(vm_area_cachep, vma); - mm->map_count--; - - return 0; - } - - spin_lock(&mm->page_table_lock); - vma->vm_flags = newflags; - vma->vm_page_prot = prot; - spin_unlock(&mm->page_table_lock); - - *pprev = vma; - - return 0; -} - -static inline int mprotect_fixup_start(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long end, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * n, * prev = *pprev; - - *pprev = vma; - - if (prev && prev->vm_end == vma->vm_start && can_vma_merge(prev, newflags) && - !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&vma->vm_mm->page_table_lock); - prev->vm_end = end; - vma->vm_start = end; - spin_unlock(&vma->vm_mm->page_table_lock); - - return 0; - } - n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!n) - return -ENOMEM; - *n = *vma; - n->vm_end = end; - n->vm_flags = newflags; - n->vm_raend = 0; - n->vm_page_prot = prot; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT; - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_start = end; - __insert_vm_struct(current->mm, n); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - - return 0; -} - -static inline int mprotect_fixup_end(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long start, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * n; - - n = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - if (!n) - return -ENOMEM; - *n = *vma; - n->vm_start = start; - n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; - n->vm_flags = newflags; - n->vm_raend = 0; - n->vm_page_prot = prot; - if (n->vm_file) - get_file(n->vm_file); - if (n->vm_ops && n->vm_ops->open) - n->vm_ops->open(n); - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_end = start; - __insert_vm_struct(current->mm, n); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - - *pprev = n; - - return 0; -} - -static inline int mprotect_fixup_middle(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long start, unsigned long end, - int newflags, pgprot_t prot) -{ - struct vm_area_struct * left, * right; - - left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!left) - return -ENOMEM; - right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!right) { - kmem_cache_free(vm_area_cachep, left); - return -ENOMEM; - } - *left = *vma; - *right = *vma; - left->vm_end = start; - right->vm_start = end; - right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; - left->vm_raend = 0; - right->vm_raend = 0; - if (vma->vm_file) - atomic_add(2,&vma->vm_file->f_count); - if (vma->vm_ops && vma->vm_ops->open) { - vma->vm_ops->open(left); - vma->vm_ops->open(right); - } - vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT; - vma->vm_raend = 0; - vma->vm_page_prot = prot; - lock_vma_mappings(vma); - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_start = start; - vma->vm_end = end; - vma->vm_flags = newflags; - __insert_vm_struct(current->mm, left); - __insert_vm_struct(current->mm, right); - spin_unlock(&vma->vm_mm->page_table_lock); - unlock_vma_mappings(vma); - - *pprev = right; - - return 0; -} - -static int mprotect_fixup(struct vm_area_struct * vma, struct vm_area_struct ** pprev, - unsigned long start, unsigned long end, unsigned int newflags) -{ - pgprot_t newprot; - int error; - - if (newflags == vma->vm_flags) { - *pprev = vma; - return 0; - } - newprot = protection_map[newflags & 0xf]; - if (start == vma->vm_start) { - if (end == vma->vm_end) - error = mprotect_fixup_all(vma, pprev, newflags, newprot); - else - error = mprotect_fixup_start(vma, pprev, end, newflags, newprot); - } else if (end == vma->vm_end) - error = mprotect_fixup_end(vma, pprev, start, newflags, newprot); - else - error = mprotect_fixup_middle(vma, pprev, start, end, newflags, newprot); - - if (error) - return error; - - change_protection(start, end, newprot); - return 0; -} - -asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) -{ - unsigned long nstart, end, tmp; - struct vm_area_struct * vma, * next, * prev; - int error = -EINVAL; - - if (start & ~PAGE_MASK) - return -EINVAL; - len = PAGE_ALIGN(len); - end = start + len; - if (end < start) - return -ENOMEM; - if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) - return -EINVAL; - if (end == start) - return 0; - - down_write(&current->mm->mmap_sem); - - vma = find_vma_prev(current->mm, start, &prev); - error = -ENOMEM; - if (!vma || vma->vm_start > start) - goto out; - -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) - /* mprotect() unsupported for I/O mappings in Xenolinux. */ - error = -EINVAL; - if (vma->vm_flags & VM_IO) - goto out; -#endif - - for (nstart = start ; ; ) { - unsigned int newflags; - int last = 0; - - /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ - - newflags = prot | (vma->vm_flags & ~(PROT_READ | PROT_WRITE | PROT_EXEC)); - if ((newflags & ~(newflags >> 4)) & 0xf) { - error = -EACCES; - goto out; - } - - if (vma->vm_end > end) { - error = mprotect_fixup(vma, &prev, nstart, end, newflags); - goto out; - } - if (vma->vm_end == end) - last = 1; - - tmp = vma->vm_end; - next = vma->vm_next; - error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); - if (error) - goto out; - if (last) - break; - nstart = tmp; - vma = next; - if (!vma || vma->vm_start != nstart) { - error = -ENOMEM; - goto out; - } - } - if (next && prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags) && - !prev->vm_file && !(prev->vm_flags & VM_SHARED)) { - spin_lock(&prev->vm_mm->page_table_lock); - prev->vm_end = next->vm_end; - __vma_unlink(prev->vm_mm, next, prev); - spin_unlock(&prev->vm_mm->page_table_lock); - - kmem_cache_free(vm_area_cachep, next); - prev->vm_mm->map_count--; - } -out: - up_write(&current->mm->mmap_sem); - return error; -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/mm/mremap.c --- a/linux-2.4-xen-sparse/mm/mremap.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,390 +0,0 @@ -/* - * linux/mm/remap.c - * - * (C) Copyright 1996 Linus Torvalds - */ - -#include <linux/slab.h> -#include <linux/smp_lock.h> -#include <linux/shm.h> -#include <linux/mman.h> -#include <linux/swap.h> - -#include <asm/uaccess.h> -#include <asm/pgalloc.h> - -extern int vm_enough_memory(long pages); - -static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) -{ - pgd_t * pgd; - pmd_t * pmd; - pte_t * pte = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_none(*pgd)) - goto end; - if (pgd_bad(*pgd)) { - pgd_ERROR(*pgd); - pgd_clear(pgd); - goto end; - } - - pmd = pmd_offset(pgd, addr); - if (pmd_none(*pmd)) - goto end; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - goto end; - } - - pte = pte_offset(pmd, addr); - if (pte_none(*pte)) - pte = NULL; -end: - return pte; -} - -static inline pte_t *alloc_one_pte(struct mm_struct *mm, unsigned long addr) -{ - pmd_t * pmd; - pte_t * pte = NULL; - - pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr); - if (pmd) - pte = pte_alloc(mm, pmd, addr); - return pte; -} - -static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst) -{ - int error = 0; - pte_t pte; - - if (!pte_none(*src)) { - pte = ptep_get_and_clear(src); - if (!dst) { - /* No dest? We must put it back. */ - dst = src; - error++; - } - set_pte(dst, pte); - } - return error; -} - -static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned long new_addr) -{ - int error = 0; - pte_t * src, * dst; - - spin_lock(&mm->page_table_lock); - src = get_one_pte(mm, old_addr); - if (src) { - dst = alloc_one_pte(mm, new_addr); - src = get_one_pte(mm, old_addr); - if (src) - error = copy_one_pte(mm, src, dst); - } - spin_unlock(&mm->page_table_lock); - return error; -} - -static int move_page_tables(struct mm_struct * mm, - unsigned long new_addr, unsigned long old_addr, unsigned long len) -{ - unsigned long offset = len; - - flush_cache_range(mm, old_addr, old_addr + len); - - /* - * This is not the clever way to do this, but we're taking the - * easy way out on the assumption that most remappings will be - * only a few pages.. This also makes error recovery easier. - */ - while (offset) { - offset -= PAGE_SIZE; - if (move_one_page(mm, old_addr + offset, new_addr + offset)) - goto oops_we_failed; - } - flush_tlb_range(mm, old_addr, old_addr + len); - return 0; - - /* - * Ok, the move failed because we didn't have enough pages for - * the new page table tree. This is unlikely, but we have to - * take the possibility into account. In that case we just move - * all the pages back (this will work, because we still have - * the old page tables) - */ -oops_we_failed: - flush_cache_range(mm, new_addr, new_addr + len); - while ((offset += PAGE_SIZE) < len) - move_one_page(mm, new_addr + offset, old_addr + offset); - zap_page_range(mm, new_addr, len); - return -1; -} - -static inline unsigned long move_vma(struct vm_area_struct * vma, - unsigned long addr, unsigned long old_len, unsigned long new_len, - unsigned long new_addr) -{ - struct mm_struct * mm = vma->vm_mm; - struct vm_area_struct * new_vma, * next, * prev; - int allocated_vma; - - new_vma = NULL; - next = find_vma_prev(mm, new_addr, &prev); - if (next) { - if (prev && prev->vm_end == new_addr && - can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&mm->page_table_lock); - prev->vm_end = new_addr + new_len; - spin_unlock(&mm->page_table_lock); - new_vma = prev; - if (next != prev->vm_next) - BUG(); - if (prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags)) { - spin_lock(&mm->page_table_lock); - prev->vm_end = next->vm_end; - __vma_unlink(mm, next, prev); - spin_unlock(&mm->page_table_lock); - - mm->map_count--; - kmem_cache_free(vm_area_cachep, next); - } - } else if (next->vm_start == new_addr + new_len && - can_vma_merge(next, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&mm->page_table_lock); - next->vm_start = new_addr; - spin_unlock(&mm->page_table_lock); - new_vma = next; - } - } else { - prev = find_vma(mm, new_addr-1); - if (prev && prev->vm_end == new_addr && - can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) { - spin_lock(&mm->page_table_lock); - prev->vm_end = new_addr + new_len; - spin_unlock(&mm->page_table_lock); - new_vma = prev; - } - } - - allocated_vma = 0; - if (!new_vma) { - new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!new_vma) - goto out; - allocated_vma = 1; - } - - if (!move_page_tables(current->mm, new_addr, addr, old_len)) { - unsigned long vm_locked = vma->vm_flags & VM_LOCKED; - - if (allocated_vma) { - *new_vma = *vma; - new_vma->vm_start = new_addr; - new_vma->vm_end = new_addr+new_len; - new_vma->vm_pgoff += (addr-vma->vm_start) >> PAGE_SHIFT; - new_vma->vm_raend = 0; - if (new_vma->vm_file) - get_file(new_vma->vm_file); - if (new_vma->vm_ops && new_vma->vm_ops->open) - new_vma->vm_ops->open(new_vma); - insert_vm_struct(current->mm, new_vma); - } - - /* XXX: possible errors masked, mapping might remain */ - do_munmap(current->mm, addr, old_len); - - current->mm->total_vm += new_len >> PAGE_SHIFT; - if (vm_locked) { - current->mm->locked_vm += new_len >> PAGE_SHIFT; - if (new_len > old_len) - make_pages_present(new_addr + old_len, - new_addr + new_len); - } - return new_addr; - } - if (allocated_vma) - kmem_cache_free(vm_area_cachep, new_vma); - out: - return -ENOMEM; -} - -/* - * Expand (or shrink) an existing mapping, potentially moving it at the - * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) - * - * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise - * This option implies MREMAP_MAYMOVE. - */ -unsigned long do_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr) -{ - struct vm_area_struct *vma; - unsigned long ret = -EINVAL; - - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) - goto out; - - if (addr & ~PAGE_MASK) - goto out; - - old_len = PAGE_ALIGN(old_len); - new_len = PAGE_ALIGN(new_len); - - if (old_len > TASK_SIZE || addr > TASK_SIZE - old_len) - goto out; - - if (addr >= TASK_SIZE) - goto out; - - /* new_addr is only valid if MREMAP_FIXED is specified */ - if (flags & MREMAP_FIXED) { - if (new_addr & ~PAGE_MASK) - goto out; - if (!(flags & MREMAP_MAYMOVE)) - goto out; - - if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) - goto out; - - if (new_addr >= TASK_SIZE) - goto out; - - /* - * Allow new_len == 0 only if new_addr == addr - * to preserve truncation in place (that was working - * safe and some app may depend on it). - */ - if (unlikely(!new_len && new_addr != addr)) - goto out; - - /* Check if the location we're moving into overlaps the - * old location at all, and fail if it does. - */ - if ((new_addr <= addr) && (new_addr+new_len) > addr) - goto out; - - if ((addr <= new_addr) && (addr+old_len) > new_addr) - goto out; - - ret = do_munmap(current->mm, new_addr, new_len); - if (ret && new_len) - goto out; - } - - /* - * Always allow a shrinking remap: that just unmaps - * the unnecessary pages.. - */ - if (old_len >= new_len) { - ret = do_munmap(current->mm, addr+new_len, old_len - new_len); - if (ret && old_len != new_len) - goto out; - ret = addr; - if (!(flags & MREMAP_FIXED) || (new_addr == addr)) - goto out; - } - - /* - * Ok, we need to grow.. or relocate. - */ - ret = -EFAULT; - vma = find_vma(current->mm, addr); - if (!vma || vma->vm_start > addr) - goto out; - /* We can't remap across vm area boundaries */ - if (old_len > vma->vm_end - addr) - goto out; - if (vma->vm_flags & VM_DONTEXPAND) { - if (new_len > old_len) - goto out; - } - if (vma->vm_flags & VM_LOCKED) { - unsigned long locked = current->mm->locked_vm << PAGE_SHIFT; - locked += new_len - old_len; - ret = -EAGAIN; - if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) - goto out; - } - ret = -ENOMEM; - if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len) - > current->rlim[RLIMIT_AS].rlim_cur) - goto out; - /* Private writable mapping? Check memory availability.. */ - if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE && - !(flags & MAP_NORESERVE) && - !vm_enough_memory((new_len - old_len) >> PAGE_SHIFT)) - goto out; - -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) - /* mremap() unsupported for I/O mappings in Xenolinux. */ - ret = -EINVAL; - if (vma->vm_flags & VM_IO) - goto out; -#endif - - /* old_len exactly to the end of the area.. - * And we're not relocating the area. - */ - if (old_len == vma->vm_end - addr && - !((flags & MREMAP_FIXED) && (addr != new_addr)) && - (old_len != new_len || !(flags & MREMAP_MAYMOVE))) { - unsigned long max_addr = TASK_SIZE; - if (vma->vm_next) - max_addr = vma->vm_next->vm_start; - /* can we just expand the current mapping? */ - if (max_addr - addr >= new_len) { - int pages = (new_len - old_len) >> PAGE_SHIFT; - spin_lock(&vma->vm_mm->page_table_lock); - vma->vm_end = addr + new_len; - spin_unlock(&vma->vm_mm->page_table_lock); - current->mm->total_vm += pages; - if (vma->vm_flags & VM_LOCKED) { - current->mm->locked_vm += pages; - make_pages_present(addr + old_len, - addr + new_len); - } - ret = addr; - goto out; - } - } - - /* - * We weren't able to just expand or shrink the area, - * we need to create a new one and move it.. - */ - ret = -ENOMEM; - if (flags & MREMAP_MAYMOVE) { - if (!(flags & MREMAP_FIXED)) { - unsigned long map_flags = 0; - if (vma->vm_flags & VM_SHARED) - map_flags |= MAP_SHARED; - - new_addr = get_unmapped_area(vma->vm_file, 0, new_len, vma->vm_pgoff, map_flags); - ret = new_addr; - if (new_addr & ~PAGE_MASK) - goto out; - } - ret = move_vma(vma, addr, old_len, new_len, new_addr); - } -out: - return ret; -} - -asmlinkage unsigned long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr) -{ - unsigned long ret; - - down_write(&current->mm->mmap_sem); - ret = do_mremap(addr, old_len, new_len, flags, new_addr); - up_write(&current->mm->mmap_sem); - return ret; -} diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/mm/page_alloc.c --- a/linux-2.4-xen-sparse/mm/page_alloc.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,972 +0,0 @@ -/* - * linux/mm/page_alloc.c - * - * Manages the free list, the system allocates free pages here. - * Note that kmalloc() lives in slab.c - * - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * Swap reorganised 29.12.95, Stephen Tweedie - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999 - * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 - * Zone balancing, Kanoj Sarcar, SGI, Jan 2000 - */ - -#include <linux/config.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/swapctl.h> -#include <linux/interrupt.h> -#include <linux/pagemap.h> -#include <linux/bootmem.h> -#include <linux/slab.h> -#include <linux/module.h> - -int nr_swap_pages; -int nr_active_pages; -int nr_inactive_pages; -LIST_HEAD(inactive_list); -LIST_HEAD(active_list); -pg_data_t *pgdat_list; - -/* - * - * The zone_table array is used to look up the address of the - * struct zone corresponding to a given zone number (ZONE_DMA, - * ZONE_NORMAL, or ZONE_HIGHMEM). - */ -zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES]; -EXPORT_SYMBOL(zone_table); - -static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; -static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, }; -static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, }; -static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, }; -static int lower_zone_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 }; - -int vm_gfp_debug = 0; - -static void FASTCALL(__free_pages_ok (struct page *page, unsigned int order)); - -static spinlock_t free_pages_ok_no_irq_lock = SPIN_LOCK_UNLOCKED; -struct page * free_pages_ok_no_irq_head; - -static void do_free_pages_ok_no_irq(void * arg) -{ - struct page * page, * __page; - - spin_lock_irq(&free_pages_ok_no_irq_lock); - - page = free_pages_ok_no_irq_head; - free_pages_ok_no_irq_head = NULL; - - spin_unlock_irq(&free_pages_ok_no_irq_lock); - - while (page) { - __page = page; - page = page->next_hash; - __free_pages_ok(__page, __page->index); - } -} - -static struct tq_struct free_pages_ok_no_irq_task = { - .routine = do_free_pages_ok_no_irq, -}; - - -/* - * Temporary debugging check. - */ -#define BAD_RANGE(zone, page) \ -( \ - (((page) - mem_map) >= ((zone)->zone_start_mapnr+(zone)->size)) \ - || (((page) - mem_map) < (zone)->zone_start_mapnr) \ - || ((zone) != page_zone(page)) \ -) - -/* - * Freeing function for a buddy system allocator. - * Contrary to prior comments, this is *NOT* hairy, and there - * is no reason for anyone not to understand it. - * - * The concept of a buddy system is to maintain direct-mapped tables - * (containing bit values) for memory blocks of various "orders". - * The bottom level table contains the map for the smallest allocatable - * units of memory (here, pages), and each level above it describes - * pairs of units from the levels below, hence, "buddies". - * At a high level, all that happens here is marking the table entry - * at the bottom level available, and propagating the changes upward - * as necessary, plus some accounting needed to play nicely with other - * parts of the VM system. - * At each level, we keep one bit for each pair of blocks, which - * is set to 1 iff only one of the pair is allocated. So when we - * are allocating or freeing one, we can derive the state of the - * other. That is, if we allocate a small block, and both were - * free, the remainder of the region must be split into blocks. - * If a block is freed, and its buddy is also free, then this - * triggers coalescing into a block of larger size. - * - * -- wli - */ - -static void fastcall __free_pages_ok (struct page *page, unsigned int order) -{ - unsigned long index, page_idx, mask, flags; - free_area_t *area; - struct page *base; - zone_t *zone; - - if (PageForeign(page)) - return (PageForeignDestructor(page))(page); - - /* - * Yes, think what happens when other parts of the kernel take - * a reference to a page in order to pin it for io. -ben - */ - if (PageLRU(page)) { - if (unlikely(in_interrupt())) { - unsigned long flags; - - spin_lock_irqsave(&free_pages_ok_no_irq_lock, flags); - page->next_hash = free_pages_ok_no_irq_head; - free_pages_ok_no_irq_head = page; - page->index = order; - - spin_unlock_irqrestore(&free_pages_ok_no_irq_lock, flags); - - schedule_task(&free_pages_ok_no_irq_task); - return; - } - - lru_cache_del(page); - } - - if (page->buffers) - BUG(); - if (page->mapping) - BUG(); - if (!VALID_PAGE(page)) - BUG(); - if (PageLocked(page)) - BUG(); - if (PageActive(page)) - BUG(); - ClearPageReferenced(page); - ClearPageDirty(page); - - if (current->flags & PF_FREE_PAGES) - goto local_freelist; - back_local_freelist: - - zone = page_zone(page); - - mask = (~0UL) << order; - base = zone->zone_mem_map; - page_idx = page - base; - if (page_idx & ~mask) - BUG(); - index = page_idx >> (1 + order); - - area = zone->free_area + order; - - spin_lock_irqsave(&zone->lock, flags); - - zone->free_pages -= mask; - - while (mask + (1 << (MAX_ORDER-1))) { - struct page *buddy1, *buddy2; - - if (area >= zone->free_area + MAX_ORDER) - BUG(); - if (!__test_and_change_bit(index, area->map)) - /* - * the buddy page is still allocated. - */ - break; - /* - * Move the buddy up one level. - * This code is taking advantage of the identity: - * -mask = 1+~mask - */ - buddy1 = base + (page_idx ^ -mask); - buddy2 = base + page_idx; - if (BAD_RANGE(zone,buddy1)) - BUG(); - if (BAD_RANGE(zone,buddy2)) - BUG(); - - list_del(&buddy1->list); - mask <<= 1; - area++; - index >>= 1; - page_idx &= mask; - } - list_add(&(base + page_idx)->list, &area->free_list); - - spin_unlock_irqrestore(&zone->lock, flags); - return; - - local_freelist: - if (current->nr_local_pages) - goto back_local_freelist; - if (in_interrupt()) - goto back_local_freelist; - - list_add(&page->list, &current->local_pages); - page->index = order; - current->nr_local_pages++; -} - -#define MARK_USED(index, order, area) \ - __change_bit((index) >> (1+(order)), (area)->map) - -static inline struct page * expand (zone_t *zone, struct page *page, - unsigned long index, int low, int high, free_area_t * area) -{ - unsigned long size = 1 << high; - - while (high > low) { - if (BAD_RANGE(zone,page)) - BUG(); - area--; - high--; - size >>= 1; - list_add(&(page)->list, &(area)->free_list); - MARK_USED(index, high, area); - index += size; - page += size; - } - if (BAD_RANGE(zone,page)) - BUG(); - return page; -} - -static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order)); -static struct page * fastcall rmqueue(zone_t *zone, unsigned int order) -{ - free_area_t * area = zone->free_area + order; - unsigned int curr_order = order; - struct list_head *head, *curr; - unsigned long flags; - struct page *page; - - spin_lock_irqsave(&zone->lock, flags); - do { - head = &area->free_list; - curr = head->next; - - if (curr != head) { - unsigned int index; - - page = list_entry(curr, struct page, list); - if (BAD_RANGE(zone,page)) - BUG(); - list_del(curr); - index = page - zone->zone_mem_map; - if (curr_order != MAX_ORDER-1) - MARK_USED(index, curr_order, area); - zone->free_pages -= 1UL << order; - - page = expand(zone, page, index, order, curr_order, area); - spin_unlock_irqrestore(&zone->lock, flags); - - set_page_count(page, 1); - if (BAD_RANGE(zone,page)) - BUG(); - if (PageLRU(page)) - BUG(); - if (PageActive(page)) - BUG(); - return page; - } - curr_order++; - area++; - } while (curr_order < MAX_ORDER); - spin_unlock_irqrestore(&zone->lock, flags); - - return NULL; -} - -#ifndef CONFIG_DISCONTIGMEM -struct page * fastcall _alloc_pages(unsigned int gfp_mask, unsigned int order) -{ - return __alloc_pages(gfp_mask, order, - contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK)); -} -#endif - -static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *)); -static struct page * fastcall balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed) -{ - struct page * page = NULL; - int __freed; - - if (in_interrupt()) - BUG(); - - current->allocation_order = order; - current->flags |= PF_MEMALLOC | PF_FREE_PAGES; - - __freed = try_to_free_pages_zone(classzone, gfp_mask); - - current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES); - - if (current->nr_local_pages) { - struct list_head * entry, * local_pages; - struct page * tmp; - int nr_pages; - - local_pages = &current->local_pages; - - if (likely(__freed)) { - /* pick from the last inserted so we're lifo */ - entry = local_pages->next; - do { - tmp = list_entry(entry, struct page, list); - if (tmp->index == order && memclass(page_zone(tmp), classzone)) { - list_del(entry); - current->nr_local_pages--; - set_page_count(tmp, 1); - page = tmp; - - if (page->buffers) - BUG(); - if (page->mapping) - BUG(); - if (!VALID_PAGE(page)) - BUG(); - if (PageLocked(page)) - BUG(); - if (PageLRU(page)) - BUG(); - if (PageActive(page)) - BUG(); - if (PageDirty(page)) - BUG(); - - break; - } - } while ((entry = entry->next) != local_pages); - } - - nr_pages = current->nr_local_pages; - /* free in reverse order so that the global order will be lifo */ - while ((entry = local_pages->prev) != local_pages) { - list_del(entry); - tmp = list_entry(entry, struct page, list); - __free_pages_ok(tmp, tmp->index); - if (!nr_pages--) - BUG(); - } - current->nr_local_pages = 0; - } - - *freed = __freed; - return page; -} - -static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order) -{ - long free = zone->free_pages - (1UL << order); - return free >= 0 ? free : 0; -} - -/* - * This is the 'heart' of the zoned buddy allocator: - */ -struct page * fastcall __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist) -{ - zone_t **zone, * classzone; - struct page * page; - int freed, class_idx; - - zone = zonelist->zones; - classzone = *zone; - class_idx = zone_idx(classzone); - - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - - if (zone_free_pages(z, order) > z->watermarks[class_idx].low) { - page = rmqueue(z, order); - if (page) - return page; - } - } - - classzone->need_balance = 1; - mb(); - if (waitqueue_active(&kswapd_wait)) - wake_up_interruptible(&kswapd_wait); - - zone = zonelist->zones; - for (;;) { - unsigned long min; - zone_t *z = *(zone++); - if (!z) - break; - - min = z->watermarks[class_idx].min; - if (!(gfp_mask & __GFP_WAIT)) - min >>= 2; - if (zone_free_pages(z, order) > min) { - page = rmqueue(z, order); - if (page) - return page; - } - } - - /* here we're in the low on memory slow path */ - - if ((current->flags & PF_MEMALLOC) && - (!in_interrupt() || (current->flags & PF_MEMDIE))) { - zone = zonelist->zones; - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - - page = rmqueue(z, order); - if (page) - return page; - } - return NULL; - } - - /* Atomic allocations - we can't balance anything */ - if (!(gfp_mask & __GFP_WAIT)) - goto out; - - rebalance: - page = balance_classzone(classzone, gfp_mask, order, &freed); - if (page) - return page; - - zone = zonelist->zones; - if (likely(freed)) { - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - - if (zone_free_pages(z, order) > z->watermarks[class_idx].min) { - page = rmqueue(z, order); - if (page) - return page; - } - } - goto rebalance; - } else { - /* - * Check that no other task is been killed meanwhile, - * in such a case we can succeed the allocation. - */ - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - - if (zone_free_pages(z, order) > z->watermarks[class_idx].high) { - page = rmqueue(z, order); - if (page) - return page; - } - } - } - - out: - printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n", - order, gfp_mask, !!(current->flags & PF_MEMALLOC)); - if (unlikely(vm_gfp_debug)) - dump_stack(); - return NULL; -} - -/* - * Common helper functions. - */ -fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order) -{ - struct page * page; - - page = alloc_pages(gfp_mask, order); - if (!page) - return 0; - return (unsigned long) page_address(page); -} - -fastcall unsigned long get_zeroed_page(unsigned int gfp_mask) -{ - struct page * page; - - page = alloc_pages(gfp_mask, 0); - if (page) { - void *address = page_address(page); - clear_page(address); - return (unsigned long) address; - } - return 0; -} - -fastcall void __free_pages(struct page *page, unsigned int order) -{ - if (!PageReserved(page) && put_page_testzero(page)) - __free_pages_ok(page, order); -} - -fastcall void free_pages(unsigned long addr, unsigned int order) -{ - if (addr != 0) - __free_pages(virt_to_page(addr), order); -} - -/* - * Total amount of free (allocatable) RAM: - */ -unsigned int nr_free_pages (void) -{ - unsigned int sum = 0; - zone_t *zone; - - for_each_zone(zone) - sum += zone->free_pages; - - return sum; -} - -/* - * Amount of free RAM allocatable as buffer memory: - */ -unsigned int nr_free_buffer_pages (void) -{ - pg_data_t *pgdat; - unsigned int sum = 0; - zonelist_t *zonelist; - zone_t **zonep, *zone; - - for_each_pgdat(pgdat) { - int class_idx; - zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); - zonep = zonelist->zones; - zone = *zonep; - class_idx = zone_idx(zone); - - sum += zone->nr_cache_pages; - for (; zone; zone = *zonep++) { - int free = zone->free_pages - zone->watermarks[class_idx].high; - if (free <= 0) - continue; - sum += free; - } - } - - return sum; -} - -#if CONFIG_HIGHMEM -unsigned int nr_free_highpages (void) -{ - pg_data_t *pgdat; - unsigned int pages = 0; - - for_each_pgdat(pgdat) - pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; - - return pages; -} - -unsigned int freeable_lowmem(void) -{ - unsigned int pages = 0; - pg_data_t *pgdat; - - for_each_pgdat(pgdat) { - pages += pgdat->node_zones[ZONE_DMA].free_pages; - pages += pgdat->node_zones[ZONE_DMA].nr_active_pages; - pages += pgdat->node_zones[ZONE_DMA].nr_inactive_pages; - pages += pgdat->node_zones[ZONE_NORMAL].free_pages; - pages += pgdat->node_zones[ZONE_NORMAL].nr_active_pages; - pages += pgdat->node_zones[ZONE_NORMAL].nr_inactive_pages; - } - - return pages; -} -#endif - -#define K(x) ((x) << (PAGE_SHIFT-10)) - -/* - * Show free area list (used inside shift_scroll-lock stuff) - * We also calculate the percentage fragmentation. We do this by counting the - * memory on each free list with the exception of the first item on the list. - */ -void show_free_areas_core(pg_data_t *pgdat) -{ - unsigned int order; - unsigned type; - pg_data_t *tmpdat = pgdat; - - printk("Free pages: %6dkB (%6dkB HighMem)\n", - K(nr_free_pages()), - K(nr_free_highpages())); - - while (tmpdat) { - zone_t *zone; - for (zone = tmpdat->node_zones; - zone < tmpdat->node_zones + MAX_NR_ZONES; zone++) - printk("Zone:%s freepages:%6lukB\n", - zone->name, - K(zone->free_pages)); - - tmpdat = tmpdat->node_next; - } - - printk("( Active: %d, inactive: %d, free: %d )\n", - nr_active_pages, - nr_inactive_pages, - nr_free_pages()); - - for (type = 0; type < MAX_NR_ZONES; type++) { - struct list_head *head, *curr; - zone_t *zone = pgdat->node_zones + type; - unsigned long nr, total, flags; - - total = 0; - if (zone->size) { - spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order < MAX_ORDER; order++) { - head = &(zone->free_area + order)->free_list; - curr = head; - nr = 0; - for (;;) { - if ((curr = curr->next) == head) - break; - nr++; - } - total += nr * (1 << order); - printk("%lu*%lukB ", nr, K(1UL) << order); - } - spin_unlock_irqrestore(&zone->lock, flags); - } - printk("= %lukB)\n", K(total)); - } - -#ifdef SWAP_CACHE_INFO - show_swap_cache_info(); -#endif -} - -void show_free_areas(void) -{ - show_free_areas_core(pgdat_list); -} - -/* - * Builds allocation fallback zone lists. - */ -static inline void build_zonelists(pg_data_t *pgdat) -{ - int i, j, k; - - for (i = 0; i <= GFP_ZONEMASK; i++) { - zonelist_t *zonelist; - zone_t *zone; - - zonelist = pgdat->node_zonelists + i; - memset(zonelist, 0, sizeof(*zonelist)); - - j = 0; - k = ZONE_NORMAL; - if (i & __GFP_HIGHMEM) - k = ZONE_HIGHMEM; - if (i & __GFP_DMA) - k = ZONE_DMA; - - switch (k) { - default: - BUG(); - /* - * fallthrough: - */ - case ZONE_HIGHMEM: - zone = pgdat->node_zones + ZONE_HIGHMEM; - if (zone->size) { -#ifndef CONFIG_HIGHMEM - BUG(); -#endif - zonelist->zones[j++] = zone; - } - case ZONE_NORMAL: - zone = pgdat->node_zones + ZONE_NORMAL; - if (zone->size) - zonelist->zones[j++] = zone; - case ZONE_DMA: - zone = pgdat->node_zones + ZONE_DMA; - if (zone->size) - zonelist->zones[j++] = zone; - } - zonelist->zones[j++] = NULL; - } -} - -/* - * Helper functions to size the waitqueue hash table. - * Essentially these want to choose hash table sizes sufficiently - * large so that collisions trying to wait on pages are rare. - * But in fact, the number of active page waitqueues on typical - * systems is ridiculously low, less than 200. So this is even - * conservative, even though it seems large. - * - * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to - * waitqueues, i.e. the size of the waitq table given the number of pages. - */ -#define PAGES_PER_WAITQUEUE 256 - -static inline unsigned long wait_table_size(unsigned long pages) -{ - unsigned long size = 1; - - pages /= PAGES_PER_WAITQUEUE; - - while (size < pages) - size <<= 1; - - /* - * Once we have dozens or even hundreds of threads sleeping - * on IO we've got bigger problems than wait queue collision. - * Limit the size of the wait table to a reasonable size. - */ - size = min(size, 4096UL); - - return size; -} - -/* - * This is an integer logarithm so that shifts can be used later - * to extract the more random high bits from the multiplicative - * hash function before the remainder is taken. - */ -static inline unsigned long wait_table_bits(unsigned long size) -{ - return ffz(~size); -} - -#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) - -/* - * Set up the zone data structures: - * - mark all pages reserved - * - mark all memory queues empty - * - clear the memory bitmaps - */ -void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, - unsigned long *zones_size, unsigned long zone_start_paddr, - unsigned long *zholes_size, struct page *lmem_map) -{ - unsigned long i, j; - unsigned long map_size; - unsigned long totalpages, offset, realtotalpages; - const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); - - if (zone_start_paddr & ~PAGE_MASK) - BUG(); - - totalpages = 0; - for (i = 0; i < MAX_NR_ZONES; i++) { - unsigned long size = zones_size[i]; - totalpages += size; - } - realtotalpages = totalpages; - if (zholes_size) - for (i = 0; i < MAX_NR_ZONES; i++) - realtotalpages -= zholes_size[i]; - - printk("On node %d totalpages: %lu\n", nid, realtotalpages); - - /* - * Some architectures (with lots of mem and discontinous memory - * maps) have to search for a good mem_map area: - * For discontigmem, the conceptual mem map array starts from - * PAGE_OFFSET, we need to align the actual array onto a mem map - * boundary, so that MAP_NR works. - */ - map_size = (totalpages + 1)*sizeof(struct page); - if (lmem_map == (struct page *)0) { - lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size); - lmem_map = (struct page *)(PAGE_OFFSET + - MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET)); - } - *gmap = pgdat->node_mem_map = lmem_map; - pgdat->node_size = totalpages; - pgdat->node_start_paddr = zone_start_paddr; - pgdat->node_start_mapnr = (lmem_map - mem_map); - pgdat->nr_zones = 0; - - offset = lmem_map - mem_map; - for (j = 0; j < MAX_NR_ZONES; j++) { - zone_t *zone = pgdat->node_zones + j; - unsigned long mask; - unsigned long size, realsize; - int idx; - - zone_table[nid * MAX_NR_ZONES + j] = zone; - realsize = size = zones_size[j]; - if (zholes_size) - realsize -= zholes_size[j]; - - printk("zone(%lu): %lu pages.\n", j, size); - zone->size = size; - zone->realsize = realsize; - zone->name = zone_names[j]; - zone->lock = SPIN_LOCK_UNLOCKED; - zone->zone_pgdat = pgdat; - zone->free_pages = 0; - zone->need_balance = 0; - zone->nr_active_pages = zone->nr_inactive_pages = 0; - - - if (!size) - continue; - - /* - * The per-page waitqueue mechanism uses hashed waitqueues - * per zone. - */ - zone->wait_table_size = wait_table_size(size); - zone->wait_table_shift = - BITS_PER_LONG - wait_table_bits(zone->wait_table_size); - zone->wait_table = (wait_queue_head_t *) - alloc_bootmem_node(pgdat, zone->wait_table_size - * sizeof(wait_queue_head_t)); - - for(i = 0; i < zone->wait_table_size; ++i) - init_waitqueue_head(zone->wait_table + i); - - pgdat->nr_zones = j+1; - - mask = (realsize / zone_balance_ratio[j]); - if (mask < zone_balance_min[j]) - mask = zone_balance_min[j]; - else if (mask > zone_balance_max[j]) - mask = zone_balance_max[j]; - zone->watermarks[j].min = mask; - zone->watermarks[j].low = mask*2; - zone->watermarks[j].high = mask*3; - /* now set the watermarks of the lower zones in the "j" classzone */ - for (idx = j-1; idx >= 0; idx--) { - zone_t * lower_zone = pgdat->node_zones + idx; - unsigned long lower_zone_reserve; - if (!lower_zone->size) - continue; - - mask = lower_zone->watermarks[idx].min; - lower_zone->watermarks[j].min = mask; - lower_zone->watermarks[j].low = mask*2; - lower_zone->watermarks[j].high = mask*3; - - /* now the brainer part */ - lower_zone_reserve = realsize / lower_zone_reserve_ratio[idx]; - lower_zone->watermarks[j].min += lower_zone_reserve; - lower_zone->watermarks[j].low += lower_zone_reserve; - lower_zone->watermarks[j].high += lower_zone_reserve; - - realsize += lower_zone->realsize; - } - - zone->zone_mem_map = mem_map + offset; - zone->zone_start_mapnr = offset; - zone->zone_start_paddr = zone_start_paddr; - - if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1)) - printk("BUG: wrong zone alignment, it will crash\n"); - - /* - * Initially all pages are reserved - free ones are freed - * up by free_all_bootmem() once the early boot process is - * done. Non-atomic initialization, single-pass. - */ - for (i = 0; i < size; i++) { - struct page *page = mem_map + offset + i; - set_page_zone(page, nid * MAX_NR_ZONES + j); - set_page_count(page, 0); - SetPageReserved(page); - INIT_LIST_HEAD(&page->list); - if (j != ZONE_HIGHMEM) - set_page_address(page, __va(zone_start_paddr)); - zone_start_paddr += PAGE_SIZE; - } - - offset += size; - for (i = 0; ; i++) { - unsigned long bitmap_size; - - INIT_LIST_HEAD(&zone->free_area[i].free_list); - if (i == MAX_ORDER-1) { - zone->free_area[i].map = NULL; - break; - } - - /* - * Page buddy system uses "index >> (i+1)", - * where "index" is at most "size-1". - * - * The extra "+3" is to round down to byte - * size (8 bits per byte assumption). Thus - * we get "(size-1) >> (i+4)" as the last byte - * we can access. - * - * The "+1" is because we want to round the - * byte allocation up rather than down. So - * we should have had a "+7" before we shifted - * down by three. Also, we have to add one as - * we actually _use_ the last bit (it's [0,n] - * inclusive, not [0,n[). - * - * So we actually had +7+1 before we shift - * down by 3. But (n+8) >> 3 == (n >> 3) + 1 - * (modulo overflows, which we do not have). - * - * Finally, we LONG_ALIGN because all bitmap - * operations are on longs. - */ - bitmap_size = (size-1) >> (i+4); - bitmap_size = LONG_ALIGN(bitmap_size+1); - zone->free_area[i].map = - (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size); - } - } - build_zonelists(pgdat); -} - -void __init free_area_init(unsigned long *zones_size) -{ - free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0); -} - -static int __init setup_mem_frac(char *str) -{ - int j = 0; - - while (get_option(&str, &zone_balance_ratio[j++]) == 2); - printk("setup_mem_frac: "); - for (j = 0; j < MAX_NR_ZONES; j++) printk("%d ", zone_balance_ratio[j]); - printk("\n"); - return 1; -} - -__setup("memfrac=", setup_mem_frac); - -static int __init setup_lower_zone_reserve(char *str) -{ - int j = 0; - - while (get_option(&str, &lower_zone_reserve_ratio[j++]) == 2); - printk("setup_lower_zone_reserve: "); - for (j = 0; j < MAX_NR_ZONES-1; j++) printk("%d ", lower_zone_reserve_ratio[j]); - printk("\n"); - return 1; -} - -__setup("lower_zone_reserve=", setup_lower_zone_reserve); diff -r bf07490fab19 -r f1abe953e401 linux-2.4-xen-sparse/net/core/skbuff.c --- a/linux-2.4-xen-sparse/net/core/skbuff.c Fri Oct 7 22:22:35 2005 +++ /dev/null Fri Oct 7 22:36:26 2005 @@ -1,1309 +0,0 @@ -/* - * Routines having to do with the 'struct sk_buff' memory handlers. - * - * Authors: Alan Cox <iiitac@xxxxxxxxxxxxxx> - * Florian La Roche <rzsfl@xxxxxxxxxxxx> - * - * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ - * - * Fixes: - * Alan Cox : Fixed the worst of the load balancer bugs. - * Dave Platt : Interrupt stacking fix. - * Richard Kooijman : Timestamp fixes. - * Alan Cox : Changed buffer format. - * Alan Cox : destructor hook for AF_UNIX etc. - * Linus Torvalds : Better skb_clone. - * Alan Cox : Added skb_copy. - * Alan Cox : Added all the changed routines Linus - * only put in the headers - * Ray VanTassle : Fixed --skb->lock in free - * Alan Cox : skb_copy copy arp field - * Andi Kleen : slabified it. - * - * NOTE: - * The __skb_ routines should be called with interrupts - * disabled, or you better be *real* sure that the operation is atomic - * with respect to whatever list is being frobbed (e.g. via lock_sock() - * or via disabling bottom half handlers, etc). - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * The functions in this file will not compile correctly with gcc 2.4.x - */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/slab.h> -#include <linux/netdevice.h> -#include <linux/string.h> -#include <linux/skbuff.h> -#include <linux/cache.h> -#include <linux/rtnetlink.h> -#include <linux/init.h> -#include <linux/highmem.h> - -#include <net/protocol.h> -#include <net/dst.h> -#include <net/sock.h> -#include <net/checksum.h> - -#include <asm/uaccess.h> -#include <asm/system.h> - -int sysctl_hot_list_len = 128; - -static kmem_cache_t *skbuff_head_cache; - -static union { - struct sk_buff_head list; - char pad[SMP_CACHE_BYTES]; -} skb_head_pool[NR_CPUS]; - -/* - * Keep out-of-line to prevent kernel bloat. - * __builtin_return_address is not used because it is not always - * reliable. - */ - -/** - * skb_over_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_put(). Not user callable. - */ - -void skb_over_panic(struct sk_buff *skb, int sz, void *here) -{ - printk("skput:over: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); - BUG(); -} - -/** - * skb_under_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_push(). Not user callable. - */ - - -void skb_under_panic(struct sk_buff *skb, int sz, void *here) -{ - printk("skput:under: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); - BUG(); -} - -static __inline__ struct sk_buff *skb_head_from_pool(void) -{ - struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; - - if (skb_queue_len(list)) { - struct sk_buff *skb; - unsigned long flags; - - local_irq_save(flags); - skb = __skb_dequeue(list); - local_irq_restore(flags); - return skb; - } - return NULL; -} - -static __inline__ void skb_head_to_pool(struct sk_buff *skb) -{ - struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; - - if (skb_queue_len(list) < sysctl_hot_list_len) { - unsigned long flags; - - local_irq_save(flags); - __skb_queue_head(list, skb); - local_irq_restore(flags); - - return; - } - kmem_cache_free(skbuff_head_cache, skb); -} - - -/* Allocate a new skbuff. We do this ourselves so we can fill in a few - * 'private' fields and also do memory statistics to find all the - * [BEEP] leaks. - * - */ - -/** - * alloc_skb - allocate a network buffer - * @size: size to allocate - * @gfp_mask: allocation mask - * - * Allocate a new &sk_buff. The returned buffer has no headroom and a - * tail room of size bytes. The object has a reference count of one. - * The return is the buffer. On a failure the return is %NULL. - * - * Buffers may only be allocated from interrupts using a @gfp_mask of - * %GFP_ATOMIC. - */ - -struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) -{ - struct sk_buff *skb; - u8 *data; - - if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { - static int count = 0; - if (++count < 5) { - printk(KERN_ERR "alloc_skb called nonatomically " - "from interrupt %p\n", NET_CALLER(size)); - BUG(); - } - gfp_mask &= ~__GFP_WAIT; - } - - /* Get the HEAD */ - skb = skb_head_from_pool(); - if (skb == NULL) { - skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); - if (skb == NULL) - goto nohead; - } - - /* Get the DATA. Size must match skb_add_mtu(). */ - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (data == NULL) - goto nodata; - - /* XXX: does not include slab overhead */ - skb->truesize = size + sizeof(struct sk_buff); - - /* Load the data pointers. */ - skb->head = data; - skb->data = data; - skb->tail = data; - skb->end = data + size; - - /* Set up other state */ - skb->len = 0; - skb->cloned = 0; - skb->data_len = 0; - - atomic_set(&skb->users, 1); - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - return skb; - -nodata: - skb_head_to_pool(skb); -nohead: - return NULL; -} - -/** - * alloc_skb_from_cache - allocate a network buffer - * @cp: kmem_cache from which to allocate the data area - * (object size must be big enough for @size bytes + skb overheads) - * @size: size to allocate - * @gfp_mask: allocation mask - * - * Allocate a new &sk_buff. The returned buffer has no headroom and a - * tail room of size bytes. The object has a reference count of one. - * The return is the buffer. On a failure the return is %NULL. - * - * Buffers may only be allocated from interrupts using a @gfp_mask of - * %GFP_ATOMIC. - */ - -struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, - unsigned int size, int gfp_mask) -{ - struct sk_buff *skb; - u8 *data; - - if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { - static int count = 0; - if (++count < 5) { - printk(KERN_ERR "alloc_skb called nonatomically " - "from interrupt %p\n", NET_CALLER(size)); - BUG(); - } - gfp_mask &= ~__GFP_WAIT; - } - - /* Get the HEAD */ - skb = skb_head_from_pool(); - if (skb == NULL) { - skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); - if (skb == NULL) - goto nohead; - } - - /* Get the DATA. */ - size = SKB_DATA_ALIGN(size); - data = kmem_cache_alloc(cp, gfp_mask); - if (data == NULL) - goto nodata; - - /* XXX: does not include slab overhead */ - skb->truesize = size + sizeof(struct sk_buff); - - /* Load the data pointers. */ - skb->head = data; - skb->data = data; - skb->tail = data; - skb->end = data + size; - - /* Set up other state */ - skb->len = 0; - skb->cloned = 0; - skb->data_len = 0; - - atomic_set(&skb->users, 1); - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - return skb; - -nodata: - skb_head_to_pool(skb); -nohead: - return NULL; -} - - -/* - * Slab constructor for a skb head. - */ -static inline void skb_headerinit(void *p, kmem_cache_t *cache, - unsigned long flags) -{ - struct sk_buff *skb = p; - - skb->next = NULL; - skb->prev = NULL; - skb->list = NULL; - skb->sk = NULL; - skb->stamp.tv_sec=0; /* No idea about time */ - skb->dev = NULL; - skb->real_dev = NULL; - skb->dst = NULL; - memset(skb->cb, 0, sizeof(skb->cb)); - skb->pkt_type = PACKET_HOST; /* Default type */ - skb->ip_summed = 0; - skb->priority = 0; - skb->security = 0; /* By default packets are insecure */ - skb->destructor = NULL; - -#ifdef CONFIG_NETFILTER - skb->nfmark = skb->nfcache = 0; - skb->nfct = NULL; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif -#endif -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#endif -} - -static void skb_drop_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - skb_shinfo(skb)->frag_list = NULL; - - do { - struct sk_buff *this = list; - list = list->next; - kfree_skb(this); - } while (list); -} - -static void skb_clone_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) - skb_get(list); -} - -static void skb_release_data(struct sk_buff *skb) -{ - if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); - } - - if (skb_shinfo(skb)->frag_list) - skb_drop_fraglist(skb); - - kfree(skb->head); - } -} - -/* - * Free an skbuff by memory without cleaning the state. - */ -void kfree_skbmem(struct sk_buff *skb) -{ - skb_release_data(skb); - skb_head_to_pool(skb); -} - -/** - * __kfree_skb - private function - * @skb: buffer - * - * Free an sk_buff. Release anything attached to the buffer. - * Clean the state. This is an internal helper function. Users should - * always call kfree_skb - */ - -void __kfree_skb(struct sk_buff *skb) -{ - if (skb->list) { - printk(KERN_WARNING "Warning: kfree_skb passed an skb still " - "on a list (from %p).\n", NET_CALLER(skb)); - BUG(); - } - - dst_release(skb->dst); - if(skb->destructor) { - if (in_irq()) { - printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", - NET_CALLER(skb)); - } - skb->destructor(skb); - } -#ifdef CONFIG_NETFILTER - nf_conntrack_put(skb->nfct); -#endif - skb_headerinit(skb, NULL, 0); /* clean state */ - kfree_skbmem(skb); -} - -/** - * skb_clone - duplicate an sk_buff - * @skb: buffer to clone - * @gfp_mask: allocation priority - * - * Duplicate an &sk_buff. The new one is not owned by a socket. Both - * copies share the same packet data but not structure. The new - * buffer has a reference count of 1. If the allocation fails the - * function returns %NULL otherwise the new buffer is returned. - * - * If this function is called from an interrupt gfp_mask() must be - * %GFP_ATOMIC. - */ - -struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) -{ - struct sk_buff *n; - - n = skb_head_from_pool(); - if (!n) { - n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - if (!n) - return NULL; - } - -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->list = NULL; - n->sk = NULL; - C(stamp); - C(dev); - C(real_dev); - C(h); - C(nh); - C(mac); - C(dst); - dst_clone(n->dst); - memcpy(n->cb, skb->cb, sizeof(skb->cb)); - C(len); - C(data_len); - C(csum); - n->cloned = 1; - C(pkt_type); - C(ip_summed); - C(priority); - atomic_set(&n->users, 1); - C(protocol); - C(security); - C(truesize); - C(head); - C(data); - C(tail); - C(end); - n->destructor = NULL; -#ifdef CONFIG_NETFILTER - C(nfmark); - C(nfcache); - C(nfct); -#ifdef CONFIG_NETFILTER_DEBUG - C(nf_debug); -#endif -#endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif -#ifdef CONFIG_NET_SCHED - C(tc_index); -#endif - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; -#ifdef CONFIG_NETFILTER - nf_conntrack_get(skb->nfct); -#endif - return n; -} - -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -{ - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; - - new->list=NULL; - new->sk=NULL; - new->dev=old->dev; - new->real_dev=old->real_dev; - new->priority=old->priority; - new->protocol=old->protocol; - new->dst=dst_clone(old->dst); - new->h.raw=old->h.raw+offset; - new->nh.raw=old->nh.raw+offset; - new->mac.raw=old->mac.raw+offset; - memcpy(new->cb, old->cb, sizeof(old->cb)); - atomic_set(&new->users, 1); - new->pkt_type=old->pkt_type; - new->stamp=old->stamp; - new->destructor = NULL; - new->security=old->security; -#ifdef CONFIG_NETFILTER - new->nfmark=old->nfmark; - new->nfcache=old->nfcache; - new->nfct=old->nfct; - nf_conntrack_get(new->nfct); -#ifdef CONFIG_NETFILTER_DEBUG - new->nf_debug=old->nf_debug; -#endif -#endif -#ifdef CONFIG_NET_SCHED - new->tc_index = old->tc_index; -#endif -} - -/** - * skb_copy - create private copy of an sk_buff - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data. This is used when the - * caller wishes to modify the data and needs a private copy of the - * data to alter. Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * As by-product this function converts non-linear &sk_buff to linear - * one, so that &sk_buff becomes completely private and caller is allowed - * to modify all the data of returned buffer. This means that this - * function is not recommended for use in circumstances when only - * header is going to be modified. Use pskb_copy() instead. - */ - -struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) -{ - struct sk_buff *n; - int headerlen = skb->data-skb->head; - - /* - * Allocate the copy buffer - */ - n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); - if(n==NULL) - return NULL; - - /* Set the data pointer */ - skb_reserve(n,headerlen); - /* Set the tail pointer and length */ - skb_put(n,skb->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; - - if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) - BUG(); - - copy_skb_header(n, skb); - - return n; -} - -/* Keep head the same: replace data */ -int skb_linearize(struct sk_buff *skb, int gfp_mask) -{ - unsigned int size; - u8 *data; - long offset; - int headerlen = skb->data - skb->head; - int expand = (skb->tail+skb->data_len) - skb->end; - - if (skb_shared(skb)) - BUG(); - - if (expand <= 0) - expand = 0; - - size = (skb->end - skb->head + expand); - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (data == NULL) - return -ENOMEM; - - /* Copy entire thing */ - if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) - BUG(); - - /* Offset between the two in bytes */ - offset = data - skb->head; - - /* Free old data. */ - skb_release_data(skb); - - skb->head = data; - skb->end = data + size; - - /* Set up new pointers */ - skb->h.raw += offset; - skb->nh.raw += offset; - skb->mac.raw += offset; - skb->tail += offset; - skb->data += offset; - - /* Set up shinfo */ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; - - skb->tail += skb->data_len; - skb->data_len = 0; - return 0; -} - - -/** - * pskb_copy - create copy of an sk_buff with private head. - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and part of its data, located - * in header. Fragmented data remain shared. This is used when - * the caller wishes to modify only header of &sk_buff and needs - * private copy of the header to alter. Returns %NULL on failure - * or the pointer to the buffer on success. - * The returned buffer has a reference count of 1. - */ - -struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) -{ - struct sk_buff *n; - - /* - * Allocate the copy buffer - */ - n=alloc_skb(skb->end - skb->head, gfp_mask); - if(n==NULL) - return NULL; - - /* Set the data pointer */ - skb_reserve(n,skb->data-skb->head); - /* Set the tail pointer and length */ - skb_put(n,skb_headlen(skb)); - /* Copy the bytes */ - memcpy(n->data, skb->data, n->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; - - n->data_len = skb->data_len; - n->len = skb->len; - - if (skb_shinfo(skb)->nr_frags) { - int i; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); - } - skb_shinfo(n)->nr_frags = i; - } - - if (skb_shinfo(skb)->frag_list) { - skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; - skb_clone_fraglist(n); - } - - copy_skb_header(n, skb); - - return n; -} - -/** - * pskb_expand_head - reallocate header of &sk_buff - * @skb: buffer to reallocate - * @nhead: room to add at head - * @ntail: room to add at tail - * @gfp_mask: allocation priority - * - * Expands (or creates identical copy, if &nhead and &ntail are zero) - * header of skb. &sk_buff itself is not changed. &sk_buff MUST have - * reference count of 1. Returns zero in the case of success or error, - * if expansion failed. In the last case, &sk_buff is not changed. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) -{ - int i; - u8 *data; - int size = nhead + (skb->end - skb->head) + ntail; - long off; - - if (skb_shared(skb)) - BUG(); - - size = SKB_DATA_ALIGN(size); - - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (data == NULL) - goto nodata; - - /* Copy only real data... and, alas, header. This should be - * optimized for the cases when header is void. */ - memcpy(data+nhead, skb->head, skb->tail-skb->head); - memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); - - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - - if (skb_shinfo(skb)->frag_list) - skb_clone_fraglist(skb); - - skb_release_data(skb); - - off = (data+nhead) - skb->head; - - skb->head = data; - skb->end = data+size; - - skb->data += off; - skb->tail += off; - skb->mac.raw += off; - skb->h.raw += off; - skb->nh.raw += off; - skb->cloned = 0; - atomic_set(&skb_shinfo(skb)->dataref, 1); - return 0; - -nodata: - return -ENOMEM; -} - -/* Make private copy of skb with writable head and some headroom */ - -struct sk_buff * -skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) -{ - struct sk_buff *skb2; - int delta = headroom - skb_headroom(skb); - - if (delta <= 0) - return pskb_copy(skb, GFP_ATOMIC); - - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL || - !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) - return skb2; - - kfree_skb(skb2); - return NULL; -} - - -/** - * skb_copy_expand - copy and expand sk_buff - * @skb: buffer to copy - * @newheadroom: new free bytes at head - * @newtailroom: new free bytes at tail - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data and while doing so - * allocate additional space. - * - * This is used when the caller wishes to modify the data and needs a - * private copy of the data to alter as well as more space for new fields. - * Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * You must pass %GFP_ATOMIC as the allocation priority if this function - * is called from an interrupt. - */ - - -struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, - int newtailroom, - int gfp_mask) -{ - struct sk_buff *n; - - /* - * Allocate the copy buffer - */ - - n=alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); - if(n==NULL) - return NULL; - - skb_reserve(n,newheadroom); - - /* Set the tail pointer and length */ - skb_put(n,skb->len); - - /* Copy the data only. */ - if (skb_copy_bits(skb, 0, n->data, skb->len)) - BUG(); - - copy_skb_header(n, skb); - return n; -} - -/** - * skb_pad - zero pad the tail of an skb - * @skb: buffer to pad - * @pad: space to pad - * - * Ensure that a buffer is followed by a padding area that is zero - * filled. Used by network drivers which may DMA or transfer data - * beyond the buffer end onto the wire. - * - * May return NULL in out of memory cases. - */ - -struct sk_buff *skb_pad(struct sk_buff *skb, int pad) -{ - struct sk_buff *nskb; - - /* If the skbuff is non linear tailroom is always zero.. */ - if(skb_tailroom(skb) >= pad) - { - memset(skb->data+skb->len, 0, pad); - return skb; - } - - nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); - kfree_skb(skb); - if(nskb) - memset(nskb->data+nskb->len, 0, pad); - return nskb; -} - -/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. - * If realloc==0 and trimming is impossible without change of data, - * it is BUG(). - */ - -int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) -{ - int offset = skb_headlen(skb); - int nfrags = skb_shinfo(skb)->nr_frags; - int i; - - for (i=0; i<nfrags; i++) { - int end = offset + skb_shinfo(skb)->frags[i].size; - if (end > len) { - if (skb_cloned(skb)) { - if (!realloc) - BUG(); - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; - } - if (len <= offset) { - put_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[i].size = len-offset; - } - } - offset = end; - } - - if (offset < len) { - skb->data_len -= skb->len - len; - skb->len = len; - } else { - if (len <= skb_headlen(skb)) { - skb->len = len; - skb->data_len = 0; - skb->tail = skb->data + len; - if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) - skb_drop_fraglist(skb); - } else { - skb->data_len -= skb->len - len; - skb->len = len; - } - } - - return 0; -} - -/** - * __pskb_pull_tail - advance tail of skb header - * @skb: buffer to reallocate - * @delta: number of bytes to advance tail - * - * The function makes a sense only on a fragmented &sk_buff, - * it expands header moving its tail forward and copying necessary - * data from fragmented part. - * - * &sk_buff MUST have reference count of 1. - * - * Returns %NULL (and &sk_buff does not change) if pull failed - * or value of new tail of skb in the case of success. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -/* Moves tail of skb head forward, copying data from fragmented part, - * when it is necessary. - * 1. It may fail due to malloc failure. - * 2. It may change skb pointers. - * - * It is pretty complicated. Luckily, it is called only in exceptional cases. - */ -unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) -{ - int i, k, eat; - - /* If skb has not enough free space at tail, get new one - * plus 128 bytes for future expansions. If we have enough - * room at tail, reallocate without expansion only if skb is cloned. - */ - eat = (skb->tail+delta) - skb->end; - - if (eat > 0 || skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) - return NULL; - } - - if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) - BUG(); - - /* Optimization: no fragments, no reasons to preestimate - * size of pulled pages. Superb. - */ - if (skb_shinfo(skb)->frag_list == NULL) - goto pull_pages; - - /* Estimate size of pulled pages. */ - eat = delta; - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size >= eat) - goto pull_pages; - eat -= skb_shinfo(skb)->frags[i].size; - } - - /* If we need update frag list, we are in troubles. - * Certainly, it possible to add an offset to skb data, - * but taking into account that pulling is expected to - * be very rare operation, it is worth to fight against - * further bloating skb head and crucify ourselves here instead. - * Pure masohism, indeed. 8)8) - */ - if (eat) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - struct sk_buff *clone = NULL; - struct sk_buff *insp = NULL; - - do { - if (list == NULL) - BUG(); - - if (list->len <= eat) { - /* Eaten as whole. */ - eat -= list->len; - list = list->next; - insp = list; - } else { - /* Eaten partially. */ - - if (skb_shared(list)) { - /* Sucks! We need to fork list. :-( */ - clone = skb_clone(list, GFP_ATOMIC); - if (clone == NULL) - return NULL; - insp = list->next; - list = clone; - } else { - /* This may be pulled without - * problems. */ - insp = list; - } - if (pskb_pull(list, eat) == NULL) { - if (clone) - kfree_skb(clone); - return NULL; - } - break; - } - } while (eat); - - /* Free pulled out fragments. */ - while ((list = skb_shinfo(skb)->frag_list) != insp) { - skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); - } - /* And insert new clone at head. */ - if (clone) { - clone->next = list; - skb_shinfo(skb)->frag_list = clone; - } - } - /* Success! Now we may commit changes to skb data. */ - -pull_pages: - eat = delta; - k = 0; - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); - eat -= skb_shinfo(skb)->frags[i].size; - } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; - if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; - eat = 0; - } - k++; - } - } - skb_shinfo(skb)->nr_frags = k; - - skb->tail += delta; - skb->data_len -= delta; - - return skb->tail; -} - -/* Copy some data bits from skb to kernel buffer. */ - -int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) -{ - int i, copy; - int start = skb->len - skb->data_len; - - if (offset > (int)skb->len-len) - goto fault; - - /* Copy header. */ - if ((copy = start-offset) > 0) { - if (copy > len) - copy = len; - memcpy(to, skb->data + offset, copy); - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end-offset) > 0) { - u8 *vaddr; - - if (copy > len) - copy = len; - - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); - memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ - offset-start, copy); - kunmap_skb_frag(vaddr); - - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + list->len; - if ((copy = end-offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_bits(list, offset-start, to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - } - if (len == 0) - return 0; - -fault: - return -EFAULT; -} - -/* Checksum skb data. */ - -unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) -{ - int i, copy; - int start = skb->len - skb->data_len; - int pos = 0; - - /* Checksum header. */ - if ((copy = start-offset) > 0) { - if (copy > len) - copy = len; - csum = csum_partial(skb->data+offset, copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos = copy; - } - - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end-offset) > 0) { - unsigned int csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial(vaddr + frag->page_offset + - offset-start, copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + list->len; - if ((copy = end-offset) > 0) { - unsigned int csum2; - if (copy > len) - copy = len; - csum2 = skb_checksum(list, offset-start, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - } - if (len == 0) - return csum; - - BUG(); - return csum; -} - -/* Both of above in one bottle. */ - -unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) -{ - int i, copy; - int start = skb->len - skb->data_len; - int pos = 0; - - /* Copy header. */ - if ((copy = start-offset) > 0) { - if (copy > len) - copy = len; - csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos = copy; - } - - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end-offset) > 0) { - unsigned int csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + - offset-start, to, copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { - unsigned int csum2; - int end; - - BUG_TRAP(start <= offset+len); - - end = start + list->len; - if ((copy = end-offset) > 0) { - if (copy > len) - copy = len; - csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - } - if (len == 0) - return csum; - - BUG(); - return csum; -} - -void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) -{ - unsigned int csum; - long csstart; - - if (skb->ip_summed == CHECKSUM_HW) - csstart = skb->h.raw - skb->data; - else - csstart = skb->len - skb->data_len; - - if (csstart > skb->len - skb->data_len) - BUG(); - - memcpy(to, skb->data, csstart); - - csum = 0; - if (csstart != skb->len) - csum = skb_copy_and_csum_bits(skb, csstart, to+csstart, - skb->len-csstart, 0); - - if (skb->ip_summed == CHECKSUM_HW) { - long csstuff = csstart + skb->csum; - - *((unsigned short *)(to + csstuff)) = csum_fold(csum); - } -} - -#if 0 -/* - * Tune the memory allocator for a new MTU size. - */ -void skb_add_mtu(int mtu) -{ - /* Must match allocation in alloc_skb */ - mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); - - kmem_add_cache_size(mtu); -} -#endif - -void __init skb_init(void) -{ - int i; - - skbuff_head_cache = kmem_cache_create("skbuff_head_cache", - sizeof(struct sk_buff), - 0, - SLAB_HWCACHE_ALIGN, - skb_headerinit, NULL); - if (!skbuff_head_cache) - panic("cannot create skbuff cache"); - - for (i=0; i<NR_CPUS; i++) - skb_queue_head_init(&skb_head_pool[i].list); -} _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.