[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Temporarily remove NetBSD and FreeBSD sparse trees to avoid user confusion.
# HG changeset patch # User cl349@xxxxxxxxxxxxxxxxxxxx # Node ID 0255f48b757fc4a69846356e8f42e9a4ed410c8c # Parent 64cd054aa1432b44c66e72c0c0179827aa5772a9 Temporarily remove NetBSD and FreeBSD sparse trees to avoid user confusion. The NetBSD and FreeBSD currently don't build against the final Xen 3.0 API. Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxx> diff -r 64cd054aa143 -r 0255f48b757f buildconfigs/mk.netbsd-2.0-xenU --- a/buildconfigs/mk.netbsd-2.0-xenU Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,45 +0,0 @@ - -OS = netbsd - -NETBSD_RELEASE ?= 2.0 -NETBSD_CVSSNAP ?= 20050309 - -EXTRAVERSION = xenU - -FULLVERSION = $(NETBSD_VER)-$(EXTRAVERSION) - -NETBSD_DIR = $(OS)-$(FULLVERSION) - -.PHONY: build clean mrproper mkpatch - -include buildconfigs/Rules.mk - -build: $(OS)-$(EXTRAVERSION) - -netbsd-%-tools.tar.bz2: - @echo "Cannot find netbsd-$(NETBSD_VER)-tools.tar.gz in path $(NETBSD_SRC_PATH)" - wget http://www.cl.cam.ac.uk/Research/SRG/netos/xen/downloads/netbsd-$*-tools.tar.bz2 -O./$@ - -netbsd-%-tools: netbsd-%-tools.tar.bz2 - tar -jxf $< - touch $@ # update timestamp to avoid rebuild - -$(NETBSD_DIR)/.valid: ref-$(OS)-$(NETBSD_VER)/.valid-ref - $(RM) -rf $(NETBSD_DIR) - cp -al $(<D) $(NETBSD_DIR) - # Apply arch-xen patches - ( cd netbsd-$(NETBSD_VER)-xen-sparse ; \ - ./mkbuildtree ../$(NETBSD_DIR) ) - @touch $(NETBSD_DIR)/.valid - -# build the specified netbsd tree -netbsd-xen%: $(NETBSD_DIR)/.valid netbsd-$(NETBSD_RELEASE)-tools - $(MAKE) -C netbsd-$(FULLVERSION) config - $(MAKE) -C netbsd-$(FULLVERSION) netbsd - $(MAKE) -C netbsd-$(FULLVERSION) INSTALL_PATH=$(DESTDIR) INSTALL_NAME=boot/netbsd-$(NETBSD_VER)-xen$* install - -clean:: - $(MAKE) -C netbsd-$(FULLVERSION) clean - -delete: - rm -rf tmp-$(OS)-$(NETBSD_VER) $(NETBSD_DIR) diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/conf/Makefile.i386-xen --- a/freebsd-5.3-xen-sparse/conf/Makefile.i386-xen Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,51 +0,0 @@ -# Makefile.i386 -- with config changes. -# Copyright 1990 W. Jolitz -# from: @(#)Makefile.i386 7.1 5/10/91 -# $FreeBSD: src/sys/conf/Makefile.i386,v 1.259 2003/04/15 21:29:11 phk Exp $ -# -# Makefile for FreeBSD -# -# This makefile is constructed from a machine description: -# config machineid -# Most changes should be made in the machine description -# /sys/i386/conf/``machineid'' -# after which you should do -# config machineid -# Generic makefile changes should be made in -# /sys/conf/Makefile.i386 -# after which config should be rerun for all machines. -# - -# Which version of config(8) is required. -%VERSREQ= 500013 - -STD8X16FONT?= iso - - - -.if !defined(S) -.if exists(./@/.) -S= ./@ -.else -S= ../../.. -.endif -.endif -.include "$S/conf/kern.pre.mk" -M= i386-xen -MKMODULESENV+= MACHINE=i386-xen -INCLUDES+= -I../../include/xen-public -%BEFORE_DEPEND - -%OBJS - -%FILES.c - -%FILES.s - -%FILES.m - -%CLEAN - -%RULES - -.include "$S/conf/kern.post.mk" diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/conf/files.i386-xen --- a/freebsd-5.3-xen-sparse/conf/files.i386-xen Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,295 +0,0 @@ -# This file tells config what files go into building a kernel, -# files marked standard are always included. -# -# $FreeBSD: src/sys/conf/files.i386,v 1.457 2003/12/03 23:06:30 imp Exp $ -# -# The long compile-with and dependency lines are required because of -# limitations in config: backslash-newline doesn't work in strings, and -# dependency lines other than the first are silently ignored. -# -linux_genassym.o optional compat_linux \ - dependency "$S/i386/linux/linux_genassym.c" \ - compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ - no-obj no-implicit-rule \ - clean "linux_genassym.o" -# -linux_assym.h optional compat_linux \ - dependency "$S/kern/genassym.sh linux_genassym.o" \ - compile-with "sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \ - no-obj no-implicit-rule before-depend \ - clean "linux_assym.h" -# -svr4_genassym.o optional compat_svr4 \ - dependency "$S/i386/svr4/svr4_genassym.c" \ - compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ - no-obj no-implicit-rule \ - clean "svr4_genassym.o" -# -svr4_assym.h optional compat_svr4 \ - dependency "$S/kern/genassym.sh svr4_genassym.o" \ - compile-with "sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \ - no-obj no-implicit-rule before-depend \ - clean "svr4_assym.h" -# -font.h optional sc_dflt_font \ - compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ - no-obj no-implicit-rule before-depend \ - clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" -# -atkbdmap.h optional atkbd_dflt_keymap \ - compile-with "/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ - no-obj no-implicit-rule before-depend \ - clean "atkbdmap.h" -# -ukbdmap.h optional ukbd_dflt_keymap \ - compile-with "/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ - no-obj no-implicit-rule before-depend \ - clean "ukbdmap.h" -# -msysosak.o optional fla \ - dependency "$S/contrib/dev/fla/i386/msysosak.o.uu" \ - compile-with "uudecode < $S/contrib/dev/fla/i386/msysosak.o.uu" \ - no-implicit-rule -# -trlld.o optional oltr \ - dependency "$S/contrib/dev/oltr/i386-elf.trlld.o.uu" \ - compile-with "uudecode < $S/contrib/dev/oltr/i386-elf.trlld.o.uu" \ - no-implicit-rule -# -hal.o optional ath_hal \ - dependency "$S/contrib/dev/ath/freebsd/i386-elf.hal.o.uu" \ - compile-with "uudecode < $S/contrib/dev/ath/freebsd/i386-elf.hal.o.uu" \ - no-implicit-rule -# -# -compat/linux/linux_file.c optional compat_linux -compat/linux/linux_getcwd.c optional compat_linux -compat/linux/linux_ioctl.c optional compat_linux -compat/linux/linux_ipc.c optional compat_linux -compat/linux/linux_mib.c optional compat_linux -compat/linux/linux_misc.c optional compat_linux -compat/linux/linux_signal.c optional compat_linux -compat/linux/linux_socket.c optional compat_linux -compat/linux/linux_stats.c optional compat_linux -compat/linux/linux_sysctl.c optional compat_linux -compat/linux/linux_uid16.c optional compat_linux -compat/linux/linux_util.c optional compat_linux -compat/pecoff/imgact_pecoff.c optional pecoff_support -compat/svr4/imgact_svr4.c optional compat_svr4 -compat/svr4/svr4_fcntl.c optional compat_svr4 -compat/svr4/svr4_filio.c optional compat_svr4 -compat/svr4/svr4_ioctl.c optional compat_svr4 -compat/svr4/svr4_ipc.c optional compat_svr4 -compat/svr4/svr4_misc.c optional compat_svr4 -compat/svr4/svr4_resource.c optional compat_svr4 -compat/svr4/svr4_signal.c optional compat_svr4 -compat/svr4/svr4_socket.c optional compat_svr4 -compat/svr4/svr4_sockio.c optional compat_svr4 -compat/svr4/svr4_stat.c optional compat_svr4 -compat/svr4/svr4_stream.c optional compat_svr4 -compat/svr4/svr4_syscallnames.c optional compat_svr4 -compat/svr4/svr4_sysent.c optional compat_svr4 -compat/svr4/svr4_sysvec.c optional compat_svr4 -compat/svr4/svr4_termios.c optional compat_svr4 -compat/svr4/svr4_ttold.c optional compat_svr4 -contrib/dev/fla/fla.c optional fla -contrib/dev/oltr/if_oltr.c optional oltr -contrib/dev/oltr/trlldbm.c optional oltr -contrib/dev/oltr/trlldhm.c optional oltr -contrib/dev/oltr/trlldmac.c optional oltr -bf_enc.o optional ipsec ipsec_esp \ - dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ - compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ - no-implicit-rule -crypto/des/arch/i386/des_enc.S optional ipsec ipsec_esp -crypto/des/des_ecb.c optional netsmbcrypto -crypto/des/arch/i386/des_enc.S optional netsmbcrypto -crypto/des/des_setkey.c optional netsmbcrypto -bf_enc.o optional crypto \ - dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ - compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ - no-implicit-rule -crypto/des/arch/i386/des_enc.S optional crypto -crypto/des/des_ecb.c optional crypto -crypto/des/des_setkey.c optional crypto -dev/ar/if_ar.c optional ar -dev/ar/if_ar_pci.c optional ar pci -dev/cx/csigma.c optional cx -dev/cx/cxddk.c optional cx -dev/cx/if_cx.c optional cx -dev/dgb/dgb.c count dgb -dev/fb/fb.c optional fb -dev/fb/fb.c optional vga -dev/fb/splash.c optional splash -dev/fb/vga.c optional vga -dev/kbd/atkbd.c optional atkbd -dev/kbd/atkbdc.c optional atkbdc -dev/kbd/kbd.c optional atkbd -dev/kbd/kbd.c optional kbd -dev/kbd/kbd.c optional sc -dev/kbd/kbd.c optional ukbd -dev/kbd/kbd.c optional vt -dev/mem/memutil.c standard -dev/random/nehemiah.c standard -dev/ppc/ppc.c optional ppc -dev/ppc/ppc_puc.c optional ppc puc pci -dev/sio/sio.c optional sio -dev/sio/sio_isa.c optional sio isa -dev/syscons/schistory.c optional sc -dev/syscons/scmouse.c optional sc -dev/syscons/scterm.c optional sc -dev/syscons/scterm-dumb.c optional sc -dev/syscons/scterm-sc.c optional sc -dev/syscons/scvesactl.c optional sc vga vesa -dev/syscons/scvgarndr.c optional sc vga -dev/syscons/scvidctl.c optional sc -dev/syscons/scvtb.c optional sc -dev/syscons/syscons.c optional sc -dev/syscons/sysmouse.c optional sc -dev/uart/uart_cpu_i386.c optional uart -geom/geom_bsd.c standard -geom/geom_bsd_enc.c standard -geom/geom_mbr.c standard -geom/geom_mbr_enc.c standard -i386/acpica/OsdEnvironment.c optional acpi -i386/acpica/acpi_machdep.c optional acpi -i386/acpica/acpi_wakeup.c optional acpi -acpi_wakecode.h optional acpi \ - dependency "$S/i386/acpica/acpi_wakecode.S" \ - compile-with "${MAKE} -f $S/i386/acpica/Makefile MAKESRCPATH=$S/i386/acpica" \ - no-obj no-implicit-rule before-depend \ - clean "acpi_wakecode.h acpi_wakecode.o acpi_wakecode.bin" -# -i386/acpica/madt.c optional acpi apic -i386/bios/mca_machdep.c optional mca -i386/bios/smapi.c optional smapi -i386/bios/smapi_bios.S optional smapi -i386/bios/smbios.c optional smbios -i386/bios/vpd.c optional vpd -i386/i386/apic_vector.s optional apic -i386/i386/atomic.c standard \ - compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}" -i386/i386/autoconf.c standard -i386/i386/busdma_machdep.c standard -i386-xen/i386-xen/critical.c standard -i386/i386/db_disasm.c optional ddb -i386-xen/i386-xen/db_interface.c optional ddb -i386/i386/db_trace.c optional ddb -i386/i386/i386-gdbstub.c optional ddb -i386/i386/dump_machdep.c standard -i386/i386/elf_machdep.c standard -i386-xen/i386-xen/exception.s standard -i386-xen/i386-xen/i686_mem.c standard -i386/i386/identcpu.c standard -i386/i386/in_cksum.c optional inet -i386-xen/i386-xen/initcpu.c standard -i386-xen/i386-xen/intr_machdep.c standard -i386-xen/i386-xen/io_apic.c optional apic -i386/i386/legacy.c standard -i386-xen/i386-xen/locore.s standard no-obj -i386-xen/i386-xen/machdep.c standard -i386/i386/mem.c standard -i386-xen/i386-xen/mp_clock.c optional smp -i386-xen/i386-xen/mp_machdep.c optional smp -i386/i386/mpboot.s optional smp -i386-xen/i386-xen/mptable.c optional apic -i386-xen/i386-xen/local_apic.c optional apic -i386/i386/mptable_pci.c optional apic pci -i386/i386/nexus.c standard -i386/i386/uio_machdep.c standard -i386/i386/perfmon.c optional perfmon -i386/i386/perfmon.c optional perfmon profiling-routine -i386-xen/i386-xen/pmap.c standard -i386-xen/i386-xen/support.s standard -i386-xen/i386-xen/swtch.s standard -i386-xen/i386-xen/sys_machdep.c standard -i386-xen/i386-xen/trap.c standard -i386/i386/tsc.c standard -i386-xen/i386-xen/vm_machdep.c standard -i386-xen/i386-xen/clock.c standard - -# xen specific arch-dep files -i386-xen/i386-xen/hypervisor.c standard -i386-xen/i386-xen/xen_machdep.c standard -i386-xen/i386-xen/xen_bus.c standard -i386-xen/i386-xen/evtchn.c standard -i386-xen/i386-xen/ctrl_if.c standard -i386-xen/i386-xen/gnttab.c standard - - -i386/isa/asc.c count asc -i386/isa/ctx.c optional ctx -i386/isa/cy.c count cy -i386/isa/elink.c optional ep -i386/isa/elink.c optional ie -i386/isa/gpib.c optional gp -i386/isa/gsc.c count gsc -i386/isa/istallion.c optional stli nowerror -i386/isa/loran.c optional loran -i386/isa/mse.c optional mse -i386/isa/nmi.c standard - -# drivers -i386-xen/xen/misc/npx.c optional npx -i386-xen/xen/misc/evtchn_dev.c standard -i386-xen/xen/char/console.c standard -i386-xen/xen/netfront/xn_netfront.c standard -i386-xen/xen/blkfront/xb_blkfront.c standard - - - -i386/isa/pcf.c optional pcf -i386/isa/pcvt/pcvt_drv.c optional vt -i386/isa/pcvt/pcvt_ext.c optional vt -i386/isa/pcvt/pcvt_kbd.c optional vt -i386/isa/pcvt/pcvt_out.c optional vt -i386/isa/pcvt/pcvt_sup.c optional vt -i386/isa/pcvt/pcvt_vtf.c optional vt -i386/isa/pmtimer.c optional pmtimer -i386/isa/prof_machdep.c optional profiling-routine -i386/isa/spic.c optional spic -i386/isa/spigot.c count spigot -i386/isa/spkr.c optional speaker -i386/isa/stallion.c optional stl nowerror -i386/isa/vesa.c optional vga vesa -i386/isa/wt.c count wt -i386/linux/imgact_linux.c optional compat_linux -i386/linux/linux_dummy.c optional compat_linux -i386/linux/linux_locore.s optional compat_linux \ - dependency "linux_assym.h" -i386/linux/linux_machdep.c optional compat_linux -i386/linux/linux_ptrace.c optional compat_linux -i386/linux/linux_sysent.c optional compat_linux -i386/linux/linux_sysvec.c optional compat_linux -i386/pci/pci_cfgreg.c optional pci -i386/pci/pci_bus.c optional pci -i386/svr4/svr4_locore.s optional compat_svr4 \ - dependency "svr4_assym.h" \ - warning "COMPAT_SVR4 is broken and should be avoided" -i386/svr4/svr4_machdep.c optional compat_svr4 -isa/atkbd_isa.c optional atkbd -isa/atkbdc_isa.c optional atkbdc -isa/fd.c optional fdc -isa/psm.c optional psm -isa/syscons_isa.c optional sc -isa/vga_isa.c optional vga -kern/imgact_aout.c optional compat_aout -kern/imgact_gzip.c optional gzip -libkern/divdi3.c standard -libkern/moddi3.c standard -libkern/qdivrem.c standard -libkern/ucmpdi2.c standard -libkern/udivdi3.c standard -libkern/umoddi3.c standard -libkern/flsl.c standard -libkern/ffsl.c standard - -pci/cy_pci.c optional cy pci -pci/agp_intel.c optional agp -pci/agp_via.c optional agp -pci/agp_sis.c optional agp -pci/agp_ali.c optional agp -pci/agp_amd.c optional agp -pci/agp_i810.c optional agp -pci/agp_nvidia.c optional agp - diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/conf/kern.mk --- a/freebsd-5.3-xen-sparse/conf/kern.mk Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,100 +0,0 @@ -# $FreeBSD: src/sys/conf/kern.mk,v 1.42 2004/05/14 13:35:46 cognet Exp $ - -# -# Warning flags for compiling the kernel and components of the kernel. -# -# Note that the newly added -Wcast-qual is responsible for generating -# most of the remaining warnings. Warnings introduced with -Wall will -# also pop up, but are easier to fix. -.if ${CC} == "icc" -#CWARNFLAGS= -w2 # use this if you are terribly bored -CWARNFLAGS= -.else -CWARNFLAGS?= -Wall -Wredundant-decls -Wnested-externs -Wstrict-prototypes \ - -Wmissing-prototypes -Wpointer-arith -Winline -Wcast-qual \ - -fformat-extensions -.endif -# -std=c99 anonymous unions are non-compliant -# -# The following flags are next up for working on: -# -W - -# -# On the i386, do not align the stack to 16-byte boundaries. Otherwise GCC -# 2.95 adds code to the entry and exit point of every function to align the -# stack to 16-byte boundaries -- thus wasting approximately 12 bytes of stack -# per function call. While the 16-byte alignment may benefit micro benchmarks, -# it is probably an overall loss as it makes the code bigger (less efficient -# use of code cache tag lines) and uses more stack (less efficient use of data -# cache tag lines) -# -.if ${MACHINE_ARCH} == "i386" && ${CC} != "icc" -CFLAGS+= -mno-align-long-strings -mpreferred-stack-boundary=2 -INLINE_LIMIT?= 8000 -.endif - -# -# On the alpha, make sure that we don't use floating-point registers and -# allow the use of BWX etc instructions (only needed for low-level i/o). -# Also, reserve register t7 to point at per-cpu global variables. -# -.if ${MACHINE_ARCH} == "alpha" -CFLAGS+= -mno-fp-regs -ffixed-8 -Wa,-mev6 -INLINE_LIMIT?= 15000 -.endif - -.if ${MACHINE_ARCH} == "arm" -INLINE_LIMIT?= 8000 -.endif -# -# For IA-64, we use r13 for the kernel globals pointer and we only use -# a very small subset of float registers for integer divides. -# -.if ${MACHINE_ARCH} == "ia64" -CFLAGS+= -ffixed-r13 -mfixed-range=f32-f127 -mno-sdata -INLINE_LIMIT?= 15000 -.endif - -# -# For sparc64 we want medlow code model, and we tell gcc to use floating -# point emulation. This avoids using floating point registers for integer -# operations which it has a tendency to do. -# -.if ${MACHINE_ARCH} == "sparc64" -CFLAGS+= -mcmodel=medlow -msoft-float -INLINE_LIMIT?= 15000 -.endif - -# -# For AMD64, use a medium model for now. We'll switch to "kernel" -# once pmap is ready. Be excessively careful to not generate FPU code. -# -.if ${MACHINE_ARCH} == "amd64" -CFLAGS+= -mcmodel=kernel -mno-red-zone \ - -mfpmath=387 -mno-sse -mno-sse2 -mno-mmx -mno-3dnow \ - -msoft-float -fno-asynchronous-unwind-tables -INLINE_LIMIT?= 8000 -.endif - -# -# For PowerPC we tell gcc to use floating point emulation. This avoids using -# floating point registers for integer operations which it has a tendency to do. -# -.if ${MACHINE_ARCH} == "powerpc" -CFLAGS+= -msoft-float -INLINE_LIMIT?= 15000 -.endif - -# -# GCC 3.0 and above like to do certain optimizations based on the -# assumption that the program is linked against libc. Stop this. -# -.if ${CC} == "icc" -CFLAGS+= -nolib_inline -.else -CFLAGS+= -ffreestanding -.endif - -.if ${CC} == "icc" -CFLAGS+= -restrict -.endif diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/conf/ldscript.i386-xen --- a/freebsd-5.3-xen-sparse/conf/ldscript.i386-xen Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,134 +0,0 @@ -/* $FreeBSD: src/sys/conf/ldscript.i386,v 1.9 2003/12/03 07:40:03 phk Exp $ */ -OUTPUT_FORMAT("elf32-i386-freebsd", "elf32-i386-freebsd", "elf32-i386-freebsd") -OUTPUT_ARCH(i386) -ENTRY(btext) -SEARCH_DIR(/usr/lib); -SECTIONS -{ - /* Read-only sections, merged into text segment: */ - . = kernbase + SIZEOF_HEADERS; - .interp : { *(.interp) } - .hash : { *(.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - .rel.text : - { *(.rel.text) *(.rel.gnu.linkonce.t*) } - .rela.text : - { *(.rela.text) *(.rela.gnu.linkonce.t*) } - .rel.data : - { *(.rel.data) *(.rel.gnu.linkonce.d*) } - .rela.data : - { *(.rela.data) *(.rela.gnu.linkonce.d*) } - .rel.rodata : - { *(.rel.rodata) *(.rel.gnu.linkonce.r*) } - .rela.rodata : - { *(.rela.rodata) *(.rela.gnu.linkonce.r*) } - .rel.got : { *(.rel.got) } - .rela.got : { *(.rela.got) } - .rel.ctors : { *(.rel.ctors) } - .rela.ctors : { *(.rela.ctors) } - .rel.dtors : { *(.rel.dtors) } - .rela.dtors : { *(.rela.dtors) } - .rel.init : { *(.rel.init) } - .rela.init : { *(.rela.init) } - .rel.fini : { *(.rel.fini) } - .rela.fini : { *(.rela.fini) } - .rel.bss : { *(.rel.bss) } - .rela.bss : { *(.rela.bss) } - .rel.plt : { *(.rel.plt) } - .rela.plt : { *(.rela.plt) } - .init : { *(.init) } =0x9090 - .plt : { *(.plt) } - .text : - { - *(.text) - *(.stub) - /* .gnu.warning sections are handled specially by elf32.em. */ - *(.gnu.warning) - *(.gnu.linkonce.t*) - } =0x9090 - _etext = .; - PROVIDE (etext = .); - .fini : { *(.fini) } =0x9090 - .rodata : { *(.rodata) *(.gnu.linkonce.r*) } - .rodata1 : { *(.rodata1) } - /* Adjust the address for the data segment. We want to adjust up to - the same address within the page on the next page up. */ - . = ALIGN(0x1000) + (. & (0x1000 - 1)) ; - .data : - { - *(.data) - *(.gnu.linkonce.d*) - CONSTRUCTORS - } - .data1 : { *(.data1) } - . = ALIGN(32 / 8); - _start_ctors = .; - PROVIDE (start_ctors = .); - .ctors : - { - *(.ctors) - } - _stop_ctors = .; - PROVIDE (stop_ctors = .); - .dtors : - { - *(.dtors) - } - .got : { *(.got.plt) *(.got) } - .dynamic : { *(.dynamic) } - /* We want the small data sections together, so single-instruction offsets - can access them all, and initialized data all before uninitialized, so - we can shorten the on-disk segment size. */ - .sdata : { *(.sdata) } - _edata = .; - PROVIDE (edata = .); - __bss_start = .; - .sbss : { *(.sbss) *(.scommon) } - .bss : - { - *(.dynbss) - *(.bss) - *(COMMON) - } - . = ALIGN(32 / 8); - _end = . ; - PROVIDE (end = .); - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to the beginning - of the section so we begin them at 0. */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* These must appear regardless of . */ -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/conf/options.i386-xen --- a/freebsd-5.3-xen-sparse/conf/options.i386-xen Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,162 +0,0 @@ -# $FreeBSD: src/sys/conf/options.i386,v 1.204 2003/12/03 23:06:30 imp Exp $ -# Options specific to the i386 platform kernels - -AUTO_EOI_1 opt_auto_eoi.h -AUTO_EOI_2 opt_auto_eoi.h -BROKEN_KEYBOARD_RESET opt_reset.h -COMPAT_OLDISA -I586_PMC_GUPROF opt_i586_guprof.h -MAXMEM -MPTABLE_FORCE_HTT -NO_MIXED_MODE -PERFMON -DISABLE_PSE opt_pmap.h -DISABLE_PG_G opt_pmap.h -PMAP_SHPGPERPROC opt_pmap.h -PPC_PROBE_CHIPSET opt_ppc.h -PPC_DEBUG opt_ppc.h -POWERFAIL_NMI opt_trap.h -MP_WATCHDOG opt_mp_watchdog.h - - - -# Options for emulators. These should only be used at config time, so -# they are handled like options for static filesystems -# (see src/sys/conf/options), except for broken debugging options. -COMPAT_AOUT opt_dontuse.h -IBCS2 opt_dontuse.h -COMPAT_LINUX opt_dontuse.h -COMPAT_SVR4 opt_dontuse.h -DEBUG_SVR4 opt_svr4.h -PECOFF_SUPPORT opt_dontuse.h -PECOFF_DEBUG opt_pecoff.h - -# Change KVM size. Changes things all over the kernel. -KVA_PAGES opt_global.h -XEN opt_global.h -XENDEV opt_xen.h -NOXENDEBUG opt_xen.h -# Physical address extensions and support for >4G ram. As above. -PAE opt_global.h - -CLK_CALIBRATION_LOOP opt_clock.h -CLK_USE_I8254_CALIBRATION opt_clock.h -CLK_USE_TSC_CALIBRATION opt_clock.h -TIMER_FREQ opt_clock.h - -CPU_ATHLON_SSE_HACK opt_cpu.h -CPU_BLUELIGHTNING_3X opt_cpu.h -CPU_BLUELIGHTNING_FPU_OP_CACHE opt_cpu.h -CPU_BTB_EN opt_cpu.h -CPU_CYRIX_NO_LOCK opt_cpu.h -CPU_DIRECT_MAPPED_CACHE opt_cpu.h -CPU_DISABLE_5X86_LSSER opt_cpu.h -CPU_DISABLE_CMPXCHG opt_global.h # XXX global, unlike other CPU_* -CPU_DISABLE_SSE opt_cpu.h -CPU_ELAN opt_cpu.h -CPU_ELAN_XTAL opt_cpu.h -CPU_ELAN_PPS opt_cpu.h -CPU_ENABLE_SSE opt_cpu.h -CPU_FASTER_5X86_FPU opt_cpu.h -CPU_GEODE opt_cpu.h -CPU_I486_ON_386 opt_cpu.h -CPU_IORT opt_cpu.h -CPU_L2_LATENCY opt_cpu.h -CPU_LOOP_EN opt_cpu.h -CPU_PPRO2CELERON opt_cpu.h -CPU_RSTK_EN opt_cpu.h -CPU_SOEKRIS opt_cpu.h -CPU_SUSP_HLT opt_cpu.h -CPU_UPGRADE_HW_CACHE opt_cpu.h -CPU_WT_ALLOC opt_cpu.h -CYRIX_CACHE_REALLY_WORKS opt_cpu.h -CYRIX_CACHE_WORKS opt_cpu.h -NO_F00F_HACK opt_cpu.h -NO_MEMORY_HOLE opt_cpu.h - -# The CPU type affects the endian conversion functions all over the kernel. -I386_CPU opt_global.h -I486_CPU opt_global.h -I586_CPU opt_global.h -I686_CPU opt_global.h - -VGA_ALT_SEQACCESS opt_vga.h -VGA_DEBUG opt_vga.h -VGA_NO_FONT_LOADING opt_vga.h -VGA_NO_MODE_CHANGE opt_vga.h -VGA_SLOW_IOACCESS opt_vga.h -VGA_WIDTH90 opt_vga.h - -VESA -VESA_DEBUG opt_vesa.h - -PSM_HOOKRESUME opt_psm.h -PSM_RESETAFTERSUSPEND opt_psm.h -PSM_DEBUG opt_psm.h - -ATKBD_DFLT_KEYMAP opt_atkbd.h - -# pcvt(4) has a bunch of options -FAT_CURSOR opt_pcvt.h -XSERVER opt_pcvt.h -PCVT_24LINESDEF opt_pcvt.h -PCVT_CTRL_ALT_DEL opt_pcvt.h -PCVT_META_ESC opt_pcvt.h -PCVT_NSCREENS opt_pcvt.h -PCVT_PRETTYSCRNS opt_pcvt.h -PCVT_SCANSET opt_pcvt.h -PCVT_SCREENSAVER opt_pcvt.h -PCVT_USEKBDSEC opt_pcvt.h -PCVT_VT220KEYB opt_pcvt.h -PCVT_GREENSAVER opt_pcvt.h - -# Video spigot -SPIGOT_UNSECURE opt_spigot.h - -# Enables NETGRAPH support for Cronyx adapters -NETGRAPH_CRONYX opt_ng_cronyx.h - -# ------------------------------- -# isdn4bsd: passive ISA cards -# ------------------------------- -TEL_S0_8 opt_i4b.h -TEL_S0_16 opt_i4b.h -TEL_S0_16_3 opt_i4b.h -AVM_A1 opt_i4b.h -USR_STI opt_i4b.h -ITKIX1 opt_i4b.h -ELSA_PCC16 opt_i4b.h -# ------------------------------- -# isdn4bsd: passive ISA PnP cards -# ------------------------------- -CRTX_S0_P opt_i4b.h -DRN_NGO opt_i4b.h -TEL_S0_16_3_P opt_i4b.h -SEDLBAUER opt_i4b.h -DYNALINK opt_i4b.h -ASUSCOM_IPAC opt_i4b.h -ELSA_QS1ISA opt_i4b.h -SIEMENS_ISURF2 opt_i4b.h -EICON_DIVA opt_i4b.h -COMPAQ_M610 opt_i4b.h -# ------------------------------- -# isdn4bsd: passive PCI cards -# ------------------------------- -ELSA_QS1PCI opt_i4b.h -# ------------------------------- -# isdn4bsd: misc options -# ------------------------------- -# temporary workaround for SMP machines -I4B_SMP_WORKAROUND opt_i4b.h -# enable VJ compression code for ipr i/f -IPR_VJ opt_i4b.h -IPR_LOG opt_i4b.h - -# Device options -DEV_ACPI opt_acpi.h -DEV_APIC opt_apic.h -DEV_NPX opt_npx.h - -# ------------------------------- -# EOF -# ------------------------------- diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/fbsdxensetup --- a/freebsd-5.3-xen-sparse/fbsdxensetup Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,44 +0,0 @@ -#!/bin/csh -f - -setenv XENROOT `pwd` -cd $XENROOT -if ( ! -d freebsd-5.3-xen-sparse ) then - echo "Please run this script from the root of the Xen source tree" - exit 1 -endif -rm -rf $XENROOT/fbsdtmp $XENROOT/freebsd-5.3-xenU -mkdir -p $XENROOT/fbsdtmp -cd $XENROOT/fbsdtmp -echo "step 1" -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.aa -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ab -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ac -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ad -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ae -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.af -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ag -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ah -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ai -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.aj -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.ak -wget ftp://ftp.freebsd.org/pub/FreeBSD/releases/i386/5.3-RELEASE/src/ssys.al -mkdir -p foo -cat ssys.?? | tar --unlink -xpzf - -C foo/ -mkdir -p $XENROOT/freebsd-5.3-xenU -mv foo/sys/* $XENROOT/freebsd-5.3-xenU -cd $XENROOT -rm -rf $XENROOT/fbsdtmp -echo "step 2" -mkdir -p $XENROOT/freebsd-5.3-xenU/i386-xen/include -cd $XENROOT/freebsd-5.3-xenU/i386-xen/include/ -foreach file (../../i386/include/*) - ln -s $file -end -echo "step 3" -cd $XENROOT/freebsd-5.3-xen-sparse -echo "step 4" -./mkbuildtree ../freebsd-5.3-xenU -echo "step 5" -cd $XENROOT/freebsd-5.3-xenU/i386-xen/include -ln -s $XENROOT/xen/include/public xen-public -echo "done" diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/Makefile --- a/freebsd-5.3-xen-sparse/i386-xen/Makefile Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,40 +0,0 @@ -# $FreeBSD: src/sys/i386/Makefile,v 1.11 2002/06/21 06:18:02 mckusick Exp $ -# @(#)Makefile 8.1 (Berkeley) 6/11/93 - -# Makefile for i386 links, tags file - -# SYS is normally set in Make.tags.inc -# SYS=/sys -SYS=/nsys - -TAGDIR= i386 - -.include "../kern/Make.tags.inc" - -all: - @echo "make links or tags only" - -# Directories in which to place i386 tags links -DI386= apm i386 ibcs2 include isa linux - -links:: - -for i in ${COMMDIR1}; do \ - (cd $$i && { rm -f tags; ln -s ../${TAGDIR}/tags tags; }) done - -for i in ${COMMDIR2}; do \ - (cd $$i && { rm -f tags; ln -s ../../${TAGDIR}/tags tags; }) done - -for i in ${DI386}; do \ - (cd $$i && { rm -f tags; ln -s ../tags tags; }) done - -SI386= ${SYS}/i386/apm/*.[ch] \ - ${SYS}/i386/i386/*.[ch] ${SYS}/i386/ibcs2/*.[ch] \ - ${SYS}/i386/include/*.[ch] ${SYS}/i386/isa/*.[ch] \ - ${SYS}/i386/linux/*.[ch] -AI386= ${SYS}/i386/i386/*.s - -tags:: - -ctags -wdt ${COMM} ${SI386} - egrep "^ENTRY(.*)|^ALTENTRY(.*)" ${AI386} | \ - sed "s;\([^:]*\):\([^(]*\)(\([^, )]*\)\(.*\);\3 \1 /^\2(\3\4$$/;" \ - >> tags - sort -o tags tags - chmod 444 tags diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC --- a/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,273 +0,0 @@ -# -# GENERIC -- Generic kernel configuration file for FreeBSD/i386 -# -# For more information on this file, please read the handbook section on -# Kernel Configuration Files: -# -# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html -# -# The handbook is also available locally in /usr/share/doc/handbook -# if you've installed the doc distribution, otherwise always see the -# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the -# latest information. -# -# An exhaustive list of options and more detailed explanations of the -# device lines is also present in the ../../conf/NOTES and NOTES files. -# If you are in doubt as to the purpose or necessity of a line, check first -# in NOTES. -# -# $FreeBSD: src/sys/i386/conf/GENERIC,v 1.394.2.3 2004/01/26 19:42:11 nectar Exp $ - -machine i386 -cpu I486_CPU -cpu I586_CPU -cpu I686_CPU -ident GENERIC - -#To statically compile in device wiring instead of /boot/device.hints -#hints "GENERIC.hints" #Default places to look for devices. - -#makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols - -options SCHED_4BSD #4BSD scheduler -options INET #InterNETworking -options INET6 #IPv6 communications protocols -options FFS #Berkeley Fast Filesystem -options SOFTUPDATES #Enable FFS soft updates support -options UFS_ACL #Support for access control lists -options UFS_DIRHASH #Improve performance on big directories -options MD_ROOT #MD is a potential root device -options NFSCLIENT #Network Filesystem Client -options NFSSERVER #Network Filesystem Server -options NFS_ROOT #NFS usable as /, requires NFSCLIENT -options MSDOSFS #MSDOS Filesystem -options CD9660 #ISO 9660 Filesystem -options PROCFS #Process filesystem (requires PSEUDOFS) -options PSEUDOFS #Pseudo-filesystem framework -options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!] -options COMPAT_FREEBSD4 #Compatible with FreeBSD4 -options SCSI_DELAY=15000 #Delay (in ms) before probing SCSI -options KTRACE #ktrace(1) support -options SYSVSHM #SYSV-style shared memory -options SYSVMSG #SYSV-style message queues -options SYSVSEM #SYSV-style semaphores -options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions -options KBD_INSTALL_CDEV # install a CDEV entry in /dev -options AHC_REG_PRETTY_PRINT # Print register bitfields in debug - # output. Adds ~128k to driver. -options AHD_REG_PRETTY_PRINT # Print register bitfields in debug - # output. Adds ~215k to driver. -options PFIL_HOOKS # pfil(9) framework - -# Debugging for use in -current -#options DDB #Enable the kernel debugger -#options INVARIANTS #Enable calls of extra sanity checking -options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS -#options WITNESS #Enable checks to detect deadlocks and cycles -#options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed - -# To make an SMP kernel, the next two are needed -options SMP # Symmetric MultiProcessor Kernel -device apic # I/O APIC - -device isa -device eisa -device pci - -# Floppy drives -device fdc - -# ATA and ATAPI devices -device ata -device atadisk # ATA disk drives -device ataraid # ATA RAID drives -device atapicd # ATAPI CDROM drives -device atapifd # ATAPI floppy drives -device atapist # ATAPI tape drives -options ATA_STATIC_ID #Static device numbering - -# SCSI Controllers -device ahb # EISA AHA1742 family -device ahc # AHA2940 and onboard AIC7xxx devices -device ahd # AHA39320/29320 and onboard AIC79xx devices -device amd # AMD 53C974 (Tekram DC-390(T)) -device isp # Qlogic family -device mpt # LSI-Logic MPT-Fusion -#device ncr # NCR/Symbios Logic -device sym # NCR/Symbios Logic (newer chipsets + those of `ncr') -device trm # Tekram DC395U/UW/F DC315U adapters - -device adv # Advansys SCSI adapters -device adw # Advansys wide SCSI adapters -device aha # Adaptec 154x SCSI adapters -device aic # Adaptec 15[012]x SCSI adapters, AIC-6[23]60. -device bt # Buslogic/Mylex MultiMaster SCSI adapters - -device ncv # NCR 53C500 -device nsp # Workbit Ninja SCSI-3 -device stg # TMC 18C30/18C50 - -# SCSI peripherals -device scbus # SCSI bus (required for SCSI) -device ch # SCSI media changers -device da # Direct Access (disks) -device sa # Sequential Access (tape etc) -device cd # CD -device pass # Passthrough device (direct SCSI access) -device ses # SCSI Environmental Services (and SAF-TE) - -# RAID controllers interfaced to the SCSI subsystem -device amr # AMI MegaRAID -device asr # DPT SmartRAID V, VI and Adaptec SCSI RAID -device ciss # Compaq Smart RAID 5* -device dpt # DPT Smartcache III, IV - See NOTES for options -device iir # Intel Integrated RAID -device ips # IBM (Adaptec) ServeRAID -device mly # Mylex AcceleRAID/eXtremeRAID - -# RAID controllers -device aac # Adaptec FSA RAID -device aacp # SCSI passthrough for aac (requires CAM) -device ida # Compaq Smart RAID -device mlx # Mylex DAC960 family -device pst # Promise Supertrak SX6000 -device twe # 3ware ATA RAID - -# atkbdc0 controls both the keyboard and the PS/2 mouse -device atkbdc # AT keyboard controller -device atkbd # AT keyboard -device psm # PS/2 mouse - -device vga # VGA video card driver - -device splash # Splash screen and screen saver support - -# syscons is the default console driver, resembling an SCO console -device sc - -# Enable this for the pcvt (VT220 compatible) console driver -#device vt -#options XSERVER # support for X server on a vt console -#options FAT_CURSOR # start with block cursor - -device agp # support several AGP chipsets - -# Floating point support - do not disable. -device npx - -# Power management support (see NOTES for more options) -#device apm -# Add suspend/resume support for the i8254. -device pmtimer - -# PCCARD (PCMCIA) support -# Pcmcia and cardbus bridge support -device cbb # cardbus (yenta) bridge -#device pcic # ExCA ISA and PCI bridges -device pccard # PC Card (16-bit) bus -device cardbus # CardBus (32-bit) bus - -# Serial (COM) ports -device sio # 8250, 16[45]50 based serial ports - -# Parallel port -device ppc -device ppbus # Parallel port bus (required) -device lpt # Printer -device plip # TCP/IP over parallel -device ppi # Parallel port interface device -#device vpo # Requires scbus and da - -# If you've got a "dumb" serial or parallel PCI card that is -# supported by the puc(4) glue driver, uncomment the following -# line to enable it (connects to the sio and/or ppc drivers): -#device puc - -# PCI Ethernet NICs. -device de # DEC/Intel DC21x4x (``Tulip'') -device em # Intel PRO/1000 adapter Gigabit Ethernet Card -device txp # 3Com 3cR990 (``Typhoon'') -device vx # 3Com 3c590, 3c595 (``Vortex'') - -# PCI Ethernet NICs that use the common MII bus controller code. -# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! -device miibus # MII bus support -device bfe # Broadcom BCM440x 10/100 ethernet -device bge # Broadcom BCM570xx Gigabit Ethernet -device dc # DEC/Intel 21143 and various workalikes -device fxp # Intel EtherExpress PRO/100B (82557, 82558) -device pcn # AMD Am79C97x PCI 10/100 (precedence over 'lnc') -device re # RealTek 8139C+/8169/8169S/8110S -device rl # RealTek 8129/8139 -device sf # Adaptec AIC-6915 (``Starfire'') -device sis # Silicon Integrated Systems SiS 900/SiS 7016 -device sk # SysKonnect SK-984x and SK-982x gigabit ethernet -device ste # Sundance ST201 (D-Link DFE-550TX) -device ti # Alteon Networks Tigon I/II gigabit ethernet -device tl # Texas Instruments ThunderLAN -device tx # SMC EtherPower II (83c170 ``EPIC'') -device vr # VIA Rhine, Rhine II -device wb # Winbond W89C840F -device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') - -# ISA Ethernet NICs. pccard nics included. -device cs # Crystal Semiconductor CS89x0 NIC -# 'device ed' requires 'device miibus' -device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards -device ex # Intel EtherExpress Pro/10 and Pro/10+ -device ep # Etherlink III based cards -device fe # Fujitsu MB8696x based cards -device ie # EtherExpress 8/16, 3C507, StarLAN 10 etc. -device lnc # NE2100, NE32-VL Lance Ethernet cards -device sn # SMC's 9000 series of ethernet chips -device xe # Xircom pccard ethernet - -# ISA devices that use the old ISA shims -#device le - -# Wireless NIC cards -device wlan # 802.11 support -device an # Aironet 4500/4800 802.11 wireless NICs. -device awi # BayStack 660 and others -device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs. -#device wl # Older non 802.11 Wavelan wireless NIC. - -# Pseudo devices - the number indicates how many units to allocate. -device random # Entropy device -device loop # Network loopback -device ether # Ethernet support -device sl # Kernel SLIP -device ppp # Kernel PPP -device tun # Packet tunnel. -device pty # Pseudo-ttys (telnet etc) -device md # Memory "disks" -device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) - -# The `bpf' device enables the Berkeley Packet Filter. -# Be aware of the administrative consequences of enabling this! -device bpf # Berkeley packet filter - -# USB support -device uhci # UHCI PCI->USB interface -device ohci # OHCI PCI->USB interface -device usb # USB Bus (required) -#device udbp # USB Double Bulk Pipe devices -device ugen # Generic -device uhid # "Human Interface Devices" -device ukbd # Keyboard -device ulpt # Printer -device umass # Disks/Mass storage - Requires scbus and da -device ums # Mouse -device urio # Diamond Rio 500 MP3 player -device uscanner # Scanners -# USB Ethernet, requires mii -device aue # ADMtek USB ethernet -device axe # ASIX Electronics USB ethernet -device cue # CATC USB ethernet -device kue # Kawasaki LSI USB ethernet - -# FireWire support -device firewire # FireWire bus code -device sbp # SCSI over FireWire (Requires scbus and da) -device fwe # Ethernet over FireWire (non-standard!) diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints --- a/freebsd-5.3-xen-sparse/i386-xen/conf/GENERIC.hints Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,93 +0,0 @@ -# $FreeBSD: src/sys/i386/conf/GENERIC.hints,v 1.11 2002/12/05 22:49:47 jhb Exp $ -hint.fdc.0.at="isa" -hint.fdc.0.port="0x3F0" -hint.fdc.0.irq="6" -hint.fdc.0.drq="2" -hint.fd.0.at="fdc0" -hint.fd.0.drive="0" -hint.fd.1.at="fdc0" -hint.fd.1.drive="1" -hint.ata.0.at="isa" -hint.ata.0.port="0x1F0" -hint.ata.0.irq="14" -hint.ata.1.at="isa" -hint.ata.1.port="0x170" -hint.ata.1.irq="15" -hint.adv.0.at="isa" -hint.adv.0.disabled="1" -hint.bt.0.at="isa" -hint.bt.0.disabled="1" -hint.aha.0.at="isa" -hint.aha.0.disabled="1" -hint.aic.0.at="isa" -hint.aic.0.disabled="1" -hint.atkbdc.0.at="isa" -hint.atkbdc.0.port="0x060" -hint.atkbd.0.at="atkbdc" -hint.atkbd.0.irq="1" -hint.atkbd.0.flags="0x1" -hint.psm.0.at="atkbdc" -hint.psm.0.irq="12" -hint.vga.0.at="isa" -hint.sc.0.at="isa" -hint.sc.0.flags="0x100" -hint.vt.0.at="isa" -hint.vt.0.disabled="1" -hint.apm.0.disabled="1" -hint.apm.0.flags="0x20" -hint.pcic.0.at="isa" -# hint.pcic.0.irq="10" # Default to polling -hint.pcic.0.port="0x3e0" -hint.pcic.0.maddr="0xd0000" -hint.pcic.1.at="isa" -hint.pcic.1.irq="11" -hint.pcic.1.port="0x3e2" -hint.pcic.1.maddr="0xd4000" -hint.pcic.1.disabled="1" -hint.sio.0.at="isa" -hint.sio.0.port="0x3F8" -hint.sio.0.flags="0x10" -hint.sio.0.irq="4" -hint.sio.1.at="isa" -hint.sio.1.port="0x2F8" -hint.sio.1.irq="3" -hint.sio.2.at="isa" -hint.sio.2.disabled="1" -hint.sio.2.port="0x3E8" -hint.sio.2.irq="5" -hint.sio.3.at="isa" -hint.sio.3.disabled="1" -hint.sio.3.port="0x2E8" -hint.sio.3.irq="9" -hint.ppc.0.at="isa" -hint.ppc.0.irq="7" -hint.ed.0.at="isa" -hint.ed.0.disabled="1" -hint.ed.0.port="0x280" -hint.ed.0.irq="10" -hint.ed.0.maddr="0xd8000" -hint.cs.0.at="isa" -hint.cs.0.disabled="1" -hint.cs.0.port="0x300" -hint.sn.0.at="isa" -hint.sn.0.disabled="1" -hint.sn.0.port="0x300" -hint.sn.0.irq="10" -hint.ie.0.at="isa" -hint.ie.0.disabled="1" -hint.ie.0.port="0x300" -hint.ie.0.irq="10" -hint.ie.0.maddr="0xd0000" -hint.fe.0.at="isa" -hint.fe.0.disabled="1" -hint.fe.0.port="0x300" -hint.le.0.at="isa" -hint.le.0.disabled="1" -hint.le.0.port="0x300" -hint.le.0.irq="5" -hint.le.0.maddr="0xd0000" -hint.lnc.0.at="isa" -hint.lnc.0.disabled="1" -hint.lnc.0.port="0x280" -hint.lnc.0.irq="10" -hint.lnc.0.drq="0" diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/Makefile --- a/freebsd-5.3-xen-sparse/i386-xen/conf/Makefile Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,3 +0,0 @@ -# $FreeBSD: src/sys/i386/conf/Makefile,v 1.9 2003/02/26 23:36:58 ru Exp $ - -.include "${.CURDIR}/../../conf/makeLINT.mk" diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/NOTES --- a/freebsd-5.3-xen-sparse/i386-xen/conf/NOTES Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1115 +0,0 @@ -# -# NOTES -- Lines that can be cut/pasted into kernel and hints configs. -# -# This file contains machine dependent kernel configuration notes. For -# machine independent notes, look in /sys/conf/NOTES. -# -# $FreeBSD: src/sys/i386/conf/NOTES,v 1.1108 2003/12/04 19:57:56 phk Exp $ -# - -# -# This directive is mandatory; it defines the architecture to be -# configured for; in this case, the 386 family based IBM-PC and -# compatibles. -# -machine i386 - -# -# We want LINT to cover profiling as well -profile 2 - - -##################################################################### -# SMP OPTIONS: -# -# The apic device enables the use of the I/O APIC for interrupt delivery. -# The apic device can be used in both UP and SMP kernels, but is required -# for SMP kernels. Thus, the apic device is not strictly an SMP option, -# but it is a prerequisite for SMP. -# -# Notes: -# -# Be sure to disable 'cpu I386_CPU' for SMP kernels. -# -# By default, mixed mode is used to route IRQ0 from the AT timer via -# the 8259A master PIC through the ExtINT pin on the first I/O APIC. -# This can be disabled via the NO_MIXED_MODE option. In that case, -# IRQ0 will be routed via an intpin on the first I/O APIC. Not all -# motherboards hook IRQ0 up to the first I/O APIC even though their -# MP table or MADT may claim to do so. That is why mixed mode is -# enabled by default. -# -# HTT CPUs should only be used if they are enabled in the BIOS. For -# the ACPI case, ACPI only correctly tells us about any HTT CPUs if -# they are enabled. However, most HTT systems do not list HTT CPUs -# in the MP Table if they are enabled, thus we guess at the HTT CPUs -# for the MP Table case. However, we shouldn't try to guess and use -# these CPUs if HTTT is disabled. Thus, HTT guessing is only enabled -# for the MP Table if the user explicitly asks for it via the -# MPTABLE_FORCE_HTT option. Do NOT use this option if you have HTT -# disabled in your BIOS. -# - -# Mandatory: -device apic # I/O apic - -# Optional: -options MPTABLE_FORCE_HTT # Enable HTT CPUs with the MP Table -options NO_MIXED_MODE # Disable use of mixed mode - - -##################################################################### -# CPU OPTIONS - -# -# You must specify at least one CPU (the one you intend to run on); -# deleting the specification for CPUs you don't need to use may make -# parts of the system run faster. -# I386_CPU is mutually exclusive with the other CPU types. -# -#cpu I386_CPU -cpu I486_CPU -cpu I586_CPU # aka Pentium(tm) -cpu I686_CPU # aka Pentium Pro(tm) - -# -# Options for CPU features. -# -# CPU_ATHLON_SSE_HACK tries to enable SSE instructions when the BIOS has -# forgotten to enable them. -# -# CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM -# BlueLightning CPU. It works only with Cyrix FPU, and this option -# should not be used with Intel FPU. -# -# CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning -# CPU if CPU supports it. The default is double-clock mode on -# BlueLightning CPU box. -# -# CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1). -# -# CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct -# mapped mode. Default is 2-way set associative mode. -# -# CPU_CYRIX_NO_LOCK enables weak locking for the entire address space -# of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1. -# Otherwise, the NO_LOCK bit of CCR1 is cleared. (NOTE 3) -# -# CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables -# reorder). This option should not be used if you use memory mapped -# I/O device(s). -# -# CPU_ELAN enables support for AMDs ElanSC520 CPU. -# CPU_ELAN_XTAL sets the clock crystal frequency in Hz -# CPU_ELAN_PPS enables precision timestamp code. -# -# CPU_SOEKRIS enables support www.soekris.com hardware. -# -# CPU_ENABLE_SSE enables SSE/MMX2 instructions support. This is default -# on I686_CPU and above. -# CPU_DISABLE_SSE explicitly prevent I686_CPU from turning on SSE. -# -# CPU_FASTER_5X86_FPU enables faster FPU exception handler. -# -# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products -# for i386 machines. -# -# CPU_IORT defines I/O clock delay time (NOTE 1). Default values of -# I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively -# (no clock delay). -# -# CPU_L2_LATENCY specifed the L2 cache latency value. This option is used -# only when CPU_PPRO2CELERON is defined and Mendocino Celeron is detected. -# The default value is 5. -# -# CPU_LOOP_EN prevents flushing the prefetch buffer if the destination -# of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE -# 1). -# -# CPU_PPRO2CELERON enables L2 cache of Mendocino Celeron CPUs. This option -# is useful when you use Socket 8 to Socket 370 converter, because most Pentium -# Pro BIOSs do not enable L2 cache of Mendocino Celeron CPUs. -# -# CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1). -# -# CPU_SUSP_HLT enables suspend on HALT. If this option is set, CPU -# enters suspend mode following execution of HALT instruction. -# -# CPU_UPGRADE_HW_CACHE eliminates unneeded cache flush instruction(s). -# -# CPU_WT_ALLOC enables write allocation on Cyrix 6x86/6x86MX and AMD -# K5/K6/K6-2 cpus. -# -# CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache -# flush at hold state. -# -# CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs -# without cache flush at hold state, and (2) write-back CPU cache on -# Cyrix 6x86 whose revision < 2.7 (NOTE 2). -# -# NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY -# Pentiums) from locking up when a LOCK CMPXCHG8B instruction is -# executed. This option is only needed if I586_CPU is also defined, -# and should be included for any non-Pentium CPU that defines it. -# -# NO_MEMORY_HOLE is an optimisation for systems with AMD K6 processors -# which indicates that the 15-16MB range is *definitely* not being -# occupied by an ISA memory hole. -# -# CPU_DISABLE_CMPXCHG disables the CMPXCHG instruction on > i386 IA32 -# machines. VmWare seems to emulate this instruction poorly, causing -# the guest OS to run very slowly. Enabling this with a SMP kernel -# will cause the kernel to be unusable. -# -# NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT, -# CPU_LOOP_EN and CPU_RSTK_EN should not be used because of CPU bugs. -# These options may crash your system. -# -# NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled -# in write-through mode when revision < 2.7. If revision of Cyrix -# 6x86 >= 2.7, CPU cache is always enabled in write-back mode. -# -# NOTE 3: This option may cause failures for software that requires -# locked cycles in order to operate correctly. -# -options CPU_ATHLON_SSE_HACK -options CPU_BLUELIGHTNING_FPU_OP_CACHE -options CPU_BLUELIGHTNING_3X -options CPU_BTB_EN -options CPU_DIRECT_MAPPED_CACHE -options CPU_DISABLE_5X86_LSSER -options CPU_ELAN -options CPU_SOEKRIS -options CPU_ELAN_XTAL=32768000 -options CPU_ELAN_PPS -options CPU_ENABLE_SSE -#options CPU_DISABLE_SSE -options CPU_FASTER_5X86_FPU -options CPU_I486_ON_386 -options CPU_IORT -options CPU_L2_LATENCY=5 -options CPU_LOOP_EN -options CPU_PPRO2CELERON -options CPU_RSTK_EN -options CPU_SUSP_HLT -options CPU_UPGRADE_HW_CACHE -options CPU_WT_ALLOC -options CYRIX_CACHE_WORKS -options CYRIX_CACHE_REALLY_WORKS -#options NO_F00F_HACK -options CPU_DISABLE_CMPXCHG - -# Debug options -options NPX_DEBUG # enable npx debugging (FPU/math emu) - #new math emulator - -# -# PERFMON causes the driver for Pentium/Pentium Pro performance counters -# to be compiled. See perfmon(4) for more information. -# -options PERFMON - - -##################################################################### -# NETWORKING OPTIONS - -# -# DEVICE_POLLING adds support for mixed interrupt-polling handling -# of network device drivers, which has significant benefits in terms -# of robustness to overloads and responsivity, as well as permitting -# accurate scheduling of the CPU time between kernel network processing -# and other activities. The drawback is a moderate (up to 1/HZ seconds) -# potential increase in response times. -# It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING -# to achieve smoother behaviour. -# Additionally, you can enable/disable polling at runtime with the -# sysctl variable kern.polling.enable (defaults off), and select -# the CPU fraction reserved to userland with the sysctl variable -# kern.polling.user_frac (default 50, range 0..100). -# -# Only the "dc" "fxp" and "sis" devices support this mode of operation at -# the time of this writing. - -options DEVICE_POLLING - - -##################################################################### -# CLOCK OPTIONS - -# The following options are used for debugging clock behavior only, and -# should not be used for production systems. -# -# CLK_CALIBRATION_LOOP will run the clock calibration loop at startup -# until the user presses a key. - -options CLK_CALIBRATION_LOOP - -# The following two options measure the frequency of the corresponding -# clock relative to the RTC (onboard mc146818a). - -options CLK_USE_I8254_CALIBRATION -options CLK_USE_TSC_CALIBRATION - - -##################################################################### -# MISCELLANEOUS DEVICES AND OPTIONS - -device speaker #Play IBM BASIC-style noises out your speaker -hint.speaker.0.at="isa" -hint.speaker.0.port="0x61" -device gzip #Exec gzipped a.out's. REQUIRES COMPAT_AOUT! -device apm_saver # Requires APM - - -##################################################################### -# HARDWARE BUS CONFIGURATION - -# -# ISA bus -# -device isa - -# -# Options for `isa': -# -# AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A -# interrupt controller. This saves about 0.7-1.25 usec for each interrupt. -# This option breaks suspend/resume on some portables. -# -# AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A -# interrupt controller. This saves about 0.7-1.25 usec for each interrupt. -# Automatic EOI is documented not to work for for the slave with the -# original i8259A, but it works for some clones and some integrated -# versions. -# -# MAXMEM specifies the amount of RAM on the machine; if this is not -# specified, FreeBSD will first read the amount of memory from the CMOS -# RAM, so the amount of memory will initially be limited to 64MB or 16MB -# depending on the BIOS. If the BIOS reports 64MB, a memory probe will -# then attempt to detect the installed amount of RAM. If this probe -# fails to detect >64MB RAM you will have to use the MAXMEM option. -# The amount is in kilobytes, so for a machine with 128MB of RAM, it would -# be 131072 (128 * 1024). -# -# BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to -# reset the CPU for reboot. This is needed on some systems with broken -# keyboard controllers. - -options COMPAT_OLDISA #Use ISA shims and glue for old drivers -options AUTO_EOI_1 -#options AUTO_EOI_2 - -options MAXMEM=(128*1024) -#options BROKEN_KEYBOARD_RESET - -# -# EISA bus -# -# The EISA bus device is `eisa'. It provides auto-detection and -# configuration support for all devices on the EISA bus. - -device eisa - -# By default, only 10 EISA slots are probed, since the slot numbers -# above clash with the configuration address space of the PCI subsystem, -# and the EISA probe is not very smart about this. This is sufficient -# for most machines, but in particular the HP NetServer LC series comes -# with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11, -# thus you need to bump this figure to 12 for them. -options EISA_SLOTS=12 - -# -# MCA bus: -# -# The MCA bus device is `mca'. It provides auto-detection and -# configuration support for all devices on the MCA bus. -# No hints are required for MCA. - -device mca - -# -# PCI bus & PCI options: -# -device pci - -# -# AGP GART support -device agp - - -##################################################################### -# HARDWARE DEVICE CONFIGURATION - -# -# Mandatory devices: -# - -# To include support for VGA VESA video modes -options VESA - -# Turn on extra debugging checks and output for VESA support. -options VESA_DEBUG - -# The pcvt console driver (vt220 compatible). -device vt -hint.vt.0.at="isa" -options XSERVER # support for running an X server on vt -options FAT_CURSOR # start with block cursor -# This PCVT option is for keyboards such as those used on really old ThinkPads -options PCVT_SCANSET=2 -# Other PCVT options are documented in pcvt(4). -options PCVT_24LINESDEF -options PCVT_CTRL_ALT_DEL -options PCVT_META_ESC -options PCVT_NSCREENS=9 -options PCVT_PRETTYSCRNS -options PCVT_SCREENSAVER -options PCVT_USEKBDSEC -options PCVT_VT220KEYB -options PCVT_GREENSAVER - -# -# The Numeric Processing eXtension driver. In addition to this, you -# may configure a math emulator (see above). If your machine has a -# hardware FPU and the kernel configuration includes the npx device -# *and* a math emulator compiled into the kernel, the hardware FPU -# will be used, unless it is found to be broken or unless "flags" to -# npx0 includes "0x08", which requests preference for the emulator. -device npx -hint.npx.0.flags="0x0" -hint.npx.0.irq="13" - -# -# `flags' for npx0: -# 0x01 don't use the npx registers to optimize bcopy. -# 0x02 don't use the npx registers to optimize bzero. -# 0x04 don't use the npx registers to optimize copyin or copyout. -# 0x08 use emulator even if hardware FPU is available. -# The npx registers are normally used to optimize copying and zeroing when -# all of the following conditions are satisfied: -# I586_CPU is an option -# the cpu is an i586 (perhaps not a Pentium) -# the probe for npx0 succeeds -# INT 16 exception handling works. -# Then copying and zeroing using the npx registers is normally 30-100% faster. -# The flags can be used to control cases where it doesn't work or is slower. -# Setting them at boot time using userconfig works right (the optimizations -# are not used until later in the bootstrap when npx0 is attached). -# Flag 0x08 automatically disables the i586 optimized routines. -# - -# -# Optional devices: -# - -# 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create -# the /dev/3dfx0 device to work with glide implementations. This should get -# linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as -# the tdfx DRI module from XFree86 and is completely unrelated. -# -# To enable Linuxulator support, one must also include COMPAT_LINUX in the -# config as well, or you will not have the dependencies. The other option -# is to load both as modules. - -device tdfx # Enable 3Dfx Voodoo support -options TDFX_LINUX # Enable Linuxulator support - -# -# ACPI support using the Intel ACPI Component Architecture reference -# implementation. -# -# ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer -# kernel environment variables to select initial debugging levels for the -# Intel ACPICA code. (Note that the Intel code must also have USE_DEBUGGER -# defined when it is built). -# -# ACPI_MAX_THREADS sets the number of task threads started. -# -# ACPI_NO_SEMAPHORES makes the AcpiOs*Semaphore routines a no-op. -# -# ACPICA_PEDANTIC enables strict checking of AML. Our default is to -# relax these checks to allow code generated by the Microsoft compiler -# to still execute. -# -# Note that building ACPI into the kernel is deprecated; the module is -# normally loaded automatically by the loader. -# -device acpi -options ACPI_DEBUG -options ACPI_MAX_THREADS=1 -#!options ACPI_NO_SEMAPHORES -#!options ACPICA_PEDANTIC - -# DRM options: -# mgadrm: AGP Matrox G200, G400, G450, G550 -# r128drm: ATI Rage 128 -# radeondrm: ATI Radeon up to 9000/9100 -# sisdrm: SiS 300/305,540,630 -# tdfxdrm: 3dfx Voodoo 3/4/5 and Banshee -# DRM_DEBUG: include debug printfs, very slow -# -# mga requires AGP in the kernel, and it is recommended -# for AGP r128 and radeon cards. - -device mgadrm -device "r128drm" -device radeondrm -device sisdrm -device tdfxdrm - -options DRM_DEBUG - -# M-systems DiskOnchip products see src/sys/contrib/dev/fla/README -device fla -hint.fla.0.at="isa" - -# -# mse: Logitech and ATI InPort bus mouse ports - -device mse -hint.mse.0.at="isa" -hint.mse.0.port="0x23c" -hint.mse.0.irq="5" - -# -# Network interfaces: -# - -# ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver -# (requires sppp) -# ath: Atheros a/b/g WiFi adapters (requires ath_hal and wlan) -# cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing) -# ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 -# HP PC Lan+, various PC Card devices (refer to etc/defauls/pccard.conf) -# (requires miibus) -# el: 3Com 3C501 (slow!) -# ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210; -# Intel EtherExpress -# le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100, -# DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422) -# lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL, AMD Am7990 and -# Am79C960) -# oltr: Olicom ISA token-ring adapters OC-3115, OC-3117, OC-3118 and OC-3133 -# (no hints needed). -# Olicom PCI token-ring adapters OC-3136, OC-3137, OC-3139, OC-3140, -# OC-3141, OC-3540, OC-3250 -# rdp: RealTek RTL 8002-based pocket ethernet adapters -# sbni: Granch SBNI12-xx ISA and PCI adapters -# sr: RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp) -# wl: Lucent Wavelan (ISA card only). - -# Order for ISA/EISA devices is important here - -device ar -hint.ar.0.at="isa" -hint.ar.0.port="0x300" -hint.ar.0.irq="10" -hint.ar.0.maddr="0xd0000" -device cx -hint.cx.0.at="isa" -hint.cx.0.port="0x240" -hint.cx.0.irq="15" -hint.cx.0.drq="7" -device ed -#options ED_NO_MIIBUS # Disable ed miibus support -hint.ed.0.at="isa" -hint.ed.0.port="0x280" -hint.ed.0.irq="5" -hint.ed.0.maddr="0xd8000" -device el 1 -hint.el.0.at="isa" -hint.el.0.port="0x300" -hint.el.0.irq="9" -device ie # Hints only required for Starlan -hint.ie.2.at="isa" -hint.ie.2.port="0x300" -hint.ie.2.irq="5" -hint.ie.2.maddr="0xd0000" -device le 1 -hint.le.0.at="isa" -hint.le.0.port="0x300" -hint.le.0.irq="5" -hint.le.0.maddr="0xd0000" -device lnc -hint.lnc.0.at="isa" -hint.lnc.0.port="0x280" -hint.lnc.0.irq="10" -hint.lnc.0.drq="0" -device rdp 1 -hint.rdp.0.at="isa" -hint.rdp.0.port="0x378" -hint.rdp.0.irq="7" -hint.rdp.0.flags="2" -device sbni -hint.sbni.0.at="isa" -hint.sbni.0.port="0x210" -hint.sbni.0.irq="0xefdead" -hint.sbni.0.flags="0" -device sr -hint.sr.0.at="isa" -hint.sr.0.port="0x300" -hint.sr.0.irq="5" -hint.sr.0.maddr="0xd0000" -device oltr -hint.oltr.0.at="isa" -device wl -hint.wl.0.at="isa" -hint.wl.0.port="0x300" -options WLCACHE # enables the signal-strength cache -options WLDEBUG # enables verbose debugging output - -device ath -device ath_hal # Atheros HAL (includes binary component) -#device wlan # 802.11 layer - -# -# ATA raid adapters -# -device pst - -# -# SCSI host adapters: -# -# ncv: NCR 53C500 based SCSI host adapters. -# nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters. -# stg: TMC 18C30, 18C50 based SCSI host adapters. - -device ncv -device nsp -device stg -hint.stg.0.at="isa" -hint.stg.0.port="0x140" -hint.stg.0.port="11" - -# -# Adaptec FSA RAID controllers, including integrated DELL controllers, -# the Dell PERC 2/QC and the HP NetRAID-4M -device aac -device aacp # SCSI Passthrough interface (optional, CAM required) - -# -# IBM (now Adaptec) ServeRAID controllers -device ips - -# -# SafeNet crypto driver: can be moved to the MI NOTES as soon as -# it's tested on a big-endian machine -# -device safe # SafeNet 1141 -options SAFE_DEBUG # enable debugging support: hw.safe.debug -options SAFE_RNDTEST # enable rndtest support - -##################################################################### - -# -# Miscellaneous hardware: -# -# wt: Wangtek and Archive QIC-02/QIC-36 tape drives -# ctx: Cortex-I frame grabber -# apm: Laptop Advanced Power Management (experimental) -# pmtimer: Timer device driver for power management events (APM or ACPI) -# spigot: The Creative Labs Video Spigot video-acquisition board -# dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!) -# digi: Digiboard driver -# gp: National Instruments AT-GPIB and AT-GPIB/TNT board, PCMCIA-GPIB -# asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey -# gsc: Genius GS-4500 hand scanner. -# spic: Sony Programmable I/O controller (VAIO notebooks) -# stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based) -# stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent) - -# Notes on APM -# The flags takes the following meaning for apm0: -# 0x0020 Statclock is broken. -# If apm is omitted, some systems require sysctl kern.timecounter.method=1 -# for correct timekeeping. - -# Notes on the spigot: -# The video spigot is at 0xad6. This port address can not be changed. -# The irq values may only be 10, 11, or 15 -# I/O memory is an 8kb region. Possible values are: -# 0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff -# The start address must be on an even boundary. -# Add the following option if you want to allow non-root users to be able -# to access the spigot. This option is not secure because it allows users -# direct access to the I/O page. -# options SPIGOT_UNSECURE - -# Notes on the Specialix SI/XIO driver: -# The host card is memory, not IO mapped. -# The Rev 1 host cards use a 64K chunk, on a 32K boundary. -# The Rev 2 host cards use a 32K chunk, on a 32K boundary. -# The cards can use an IRQ of 11, 12 or 15. - -# Notes on the Sony Programmable I/O controller -# This is a temporary driver that should someday be replaced by something -# that hooks into the ACPI layer. The device is hooked to the PIIX4's -# General Device 10 decoder, which means you have to fiddle with PCI -# registers to map it in, even though it is otherwise treated here as -# an ISA device. At the moment, the driver polls, although the device -# is capable of generating interrupts. It largely undocumented. -# The port location in the hint is where you WANT the device to be -# mapped. 0x10a0 seems to be traditional. At the moment the jogdial -# is the only thing truly supported, but aparently a fair percentage -# of the Vaio extra features are controlled by this device. - -# Notes on the Stallion stl and stli drivers: -# See src/i386/isa/README.stl for complete instructions. -# This is version 0.0.5alpha, unsupported by Stallion. -# The stl driver has a secondary IO port hard coded at 0x280. You need -# to change src/i386/isa/stallion.c if you reconfigure this on the boards. -# The "flags" and "msize" settings on the stli driver depend on the board: -# EasyConnection 8/64 ISA: flags 23 msize 0x1000 -# EasyConnection 8/64 EISA: flags 24 msize 0x10000 -# EasyConnection 8/64 MCA: flags 25 msize 0x1000 -# ONboard ISA: flags 4 msize 0x10000 -# ONboard EISA: flags 7 msize 0x10000 -# ONboard MCA: flags 3 msize 0x10000 -# Brumby: flags 2 msize 0x4000 -# Stallion: flags 1 msize 0x10000 - -# Notes on the Digiboard PC/Xi and PC/Xe series driver -# -# The NDGBPORTS option specifies the number of ports controlled by the -# dgb(4) driver. The default value is 16 ports per device. -# -# The following flag values have special meanings in dgb: -# 0x01 - alternate layout of pins -# 0x02 - use the windowed PC/Xe in 64K mode - -device wt 1 -hint.wt.0.at="isa" -hint.wt.0.port="0x300" -hint.wt.0.irq="5" -hint.wt.0.drq="1" -device ctx -hint.ctx.0.at="isa" -hint.ctx.0.port="0x230" -hint.ctx.0.maddr="0xd0000" -device spigot 1 -hint.spigot.0.at="isa" -hint.spigot.0.port="0xad6" -hint.spigot.0.irq="15" -hint.spigot.0.maddr="0xee000" -device apm -hint.apm.0.flags="0x20" -device pmtimer # Adjust system timer at wakeup time -device gp -hint.gp.0.at="isa" -hint.gp.0.port="0x2c0" -device gsc 1 -hint.gsc.0.at="isa" -hint.gsc.0.port="0x270" -hint.gsc.0.drq="3" -device dgb 1 -options NDGBPORTS=17 -hint.dgb.0.at="isa" -hint.dgb.0.port="0x220" -hint.dgb.0.maddr="0xfc000" -device digi -hint.digi.0.at="isa" -hint.digi.0.port="0x104" -hint.digi.0.maddr="0xd0000" -# BIOS & FEP/OS components of device digi. -device digi_CX -device digi_CX_PCI -device digi_EPCX -device digi_EPCX_PCI -device digi_Xe -device digi_Xem -device digi_Xr -device asc 1 -hint.asc.0.at="isa" -hint.asc.0.port="0x3EB" -hint.asc.0.drq="3" -hint.asc.0.irq="10" -device spic -hint.spic.0.at="isa" -hint.spic.0.port="0x10a0" -device stl -hint.stl.0.at="isa" -hint.stl.0.port="0x2a0" -hint.stl.0.irq="10" -device stli -hint.stli.0.at="isa" -hint.stli.0.port="0x2a0" -hint.stli.0.maddr="0xcc000" -hint.stli.0.flags="23" -hint.stli.0.msize="0x1000" -# You are unlikely to have the hardware for loran <phk@xxxxxxxxxxx> -device loran -hint.loran.0.at="isa" -hint.loran.0.irq="5" -# HOT1 Xilinx 6200 card (http://www.vcc.com/) -device xrpu - -# -# Laptop/Notebook options: -# -# See also: -# apm under `Miscellaneous hardware' -# above. - -# For older notebooks that signal a powerfail condition (external -# power supply dropped, or battery state low) by issuing an NMI: - -options POWERFAIL_NMI # make it beep instead of panicing - -# -# I2C Bus -# -# Philips i2c bus support is provided by the `iicbus' device. -# -# Supported interfaces: -# pcf Philips PCF8584 ISA-bus controller -# -device pcf -hint.pcf.0.at="isa" -hint.pcf.0.port="0x320" -hint.pcf.0.irq="5" - -#--------------------------------------------------------------------------- -# ISDN4BSD -# -# See /usr/share/examples/isdn/ROADMAP for an introduction to isdn4bsd. -# -# i4b passive ISDN cards support contains the following hardware drivers: -# -# isic - Siemens/Infineon ISDN ISAC/HSCX/IPAC chipset driver -# iwic - Winbond W6692 PCI bus ISDN S/T interface controller -# ifpi - AVM Fritz!Card PCI driver -# ifpi2 - AVM Fritz!Card PCI version 2 driver -# ihfc - Cologne Chip HFC ISA/ISA-PnP chipset driver -# ifpnp - AVM Fritz!Card PnP driver -# itjc - Siemens ISAC / TJNet Tiger300/320 chipset -# -# i4b active ISDN cards support contains the following hardware drivers: -# -# iavc - AVM B1 PCI, AVM B1 ISA, AVM T1 -# -# Note that the ``options'' (if given) and ``device'' lines must BOTH -# be uncommented to enable support for a given card ! -# -# In addition to a hardware driver (and probably an option) the mandatory -# ISDN protocol stack devices and the mandatory support device must be -# enabled as well as one or more devices from the optional devices section. -# -#--------------------------------------------------------------------------- -# isic driver (Siemens/Infineon chipsets) -# -device isic -# -# ISA bus non-PnP Cards: -# ---------------------- -# -# Teles S0/8 or Niccy 1008 -options TEL_S0_8 -hint.isic.0.at="isa" -hint.isic.0.maddr="0xd0000" -hint.isic.0.irq="5" -hint.isic.0.flags="1" -# -# Teles S0/16 or Creatix ISDN-S0 or Niccy 1016 -options TEL_S0_16 -hint.isic.0.at="isa" -hint.isic.0.port="0xd80" -hint.isic.0.maddr="0xd0000" -hint.isic.0.irq="5" -hint.isic.0.flags="2" -# -# Teles S0/16.3 -options TEL_S0_16_3 -hint.isic.0.at="isa" -hint.isic.0.port="0xd80" -hint.isic.0.irq="5" -hint.isic.0.flags="3" -# -# AVM A1 or AVM Fritz!Card -options AVM_A1 -hint.isic.0.at="isa" -hint.isic.0.port="0x340" -hint.isic.0.irq="5" -hint.isic.0.flags="4" -# -# USRobotics Sportster ISDN TA intern -options USR_STI -hint.isic.0.at="isa" -hint.isic.0.port="0x268" -hint.isic.0.irq="5" -hint.isic.0.flags="7" -# -# ITK ix1 Micro ( < V.3, non-PnP version ) -options ITKIX1 -hint.isic.0.at="isa" -hint.isic.0.port="0x398" -hint.isic.0.irq="10" -hint.isic.0.flags="18" -# -# ELSA PCC-16 -options ELSA_PCC16 -hint.isic.0.at="isa" -hint.isic.0.port="0x360" -hint.isic.0.irq="10" -hint.isic.0.flags="20" -# -# ISA bus PnP Cards: -# ------------------ -# -# Teles S0/16.3 PnP -options TEL_S0_16_3_P -# -# Creatix ISDN-S0 P&P -options CRTX_S0_P -# -# Dr. Neuhaus Niccy Go@ -options DRN_NGO -# -# Sedlbauer Win Speed -options SEDLBAUER -# -# Dynalink IS64PH -options DYNALINK -# -# ELSA QuickStep 1000pro ISA -options ELSA_QS1ISA -# -# Siemens I-Surf 2.0 -options SIEMENS_ISURF2 -# -# Asuscom ISDNlink 128K ISA -options ASUSCOM_IPAC -# -# Eicon Diehl DIVA 2.0 and 2.02 -options EICON_DIVA -# -# Compaq Microcom 610 ISDN card (Compaq series PSB2222I) -options COMPAQ_M610 -# -# PCI bus Cards: -# -------------- -# -# Cyclades Cyclom-Y PCI serial driver -device cy 1 -options CY_PCI_FASTINTR # Use with cy_pci unless irq is shared -hint.cy.0.at="isa" -hint.cy.0.irq="10" -hint.cy.0.maddr="0xd4000" -hint.cy.0.msize="0x2000" -# -#--------------------------------------------------------------------------- -# ELSA MicroLink ISDN/PCI (same as ELSA QuickStep 1000pro PCI) -options ELSA_QS1PCI -# -# -#--------------------------------------------------------------------------- -# ifpnp driver for AVM Fritz!Card PnP -# -# AVM Fritz!Card PnP -device ifpnp -# -#--------------------------------------------------------------------------- -# ihfc driver for Cologne Chip ISA chipsets (experimental!) -# -# Teles 16.3c ISA PnP -# AcerISDN P10 ISA PnP -# TELEINT ISDN SPEED No.1 -device ihfc -# -#--------------------------------------------------------------------------- -# ifpi driver for AVM Fritz!Card PCI -# -# AVM Fritz!Card PCI -device ifpi -# -#--------------------------------------------------------------------------- -# ifpi2 driver for AVM Fritz!Card PCI version 2 -# -# AVM Fritz!Card PCI version 2 -device "ifpi2" -# -#--------------------------------------------------------------------------- -# iwic driver for Winbond W6692 chipset -# -# ASUSCOM P-IN100-ST-D (and other Winbond W6692 based cards) -device iwic -# -#--------------------------------------------------------------------------- -# itjc driver for Simens ISAC / TJNet Tiger300/320 chipset -# -# Traverse Technologies NETjet-S -# Teles PCI-TJ -device itjc -# -#--------------------------------------------------------------------------- -# iavc driver (AVM active cards, needs i4bcapi driver!) -# -device iavc -# -# AVM B1 ISA bus (PnP mode not supported!) -# ---------------------------------------- -hint.iavc.0.at="isa" -hint.iavc.0.port="0x150" -hint.iavc.0.irq="5" -# -#--------------------------------------------------------------------------- -# ISDN Protocol Stack - mandatory for all hardware drivers -# -# Q.921 / layer 2 - i4b passive cards D channel handling -device "i4bq921" -# -# Q.931 / layer 3 - i4b passive cards D channel handling -device "i4bq931" -# -# layer 4 - i4b common passive and active card handling -device "i4b" -# -#--------------------------------------------------------------------------- -# ISDN devices - mandatory for all hardware drivers -# -# userland driver to do ISDN tracing (for passive cards only) -device "i4btrc" 4 -# -# userland driver to control the whole thing -device "i4bctl" -# -#--------------------------------------------------------------------------- -# ISDN devices - optional -# -# userland driver for access to raw B channel -device "i4brbch" 4 -# -# userland driver for telephony -device "i4btel" 2 -# -# network driver for IP over raw HDLC ISDN -device "i4bipr" 4 -# enable VJ header compression detection for ipr i/f -options IPR_VJ -# enable logging of the first n IP packets to isdnd (n=32 here) -options IPR_LOG=32 -# -# network driver for sync PPP over ISDN; requires an equivalent -# number of sppp device to be configured -device "i4bisppp" 4 -# -# B-channel interface to the netgraph subsystem -device "i4bing" 2 -# -# CAPI driver needed for active ISDN cards (see iavc driver above) -device "i4bcapi" -# -#--------------------------------------------------------------------------- - -# -# Set the number of PV entries per process. Increasing this can -# stop panics related to heavy use of shared memory. However, that can -# (combined with large amounts of physical memory) cause panics at -# boot time due the kernel running out of VM space. -# -# If you're tweaking this, you might also want to increase the sysctls -# "vm.v_free_min", "vm.v_free_reserved", and "vm.v_free_target". -# -# The value below is the one more than the default. -# -options PMAP_SHPGPERPROC=201 - -# -# Change the size of the kernel virtual address space. Due to -# constraints in loader(8) on i386, this must be a multiple of 4. -# 256 = 1 GB of kernel address space. Increasing this also causes -# a reduction of the address space in user processes. 512 splits -# the 4GB cpu address space in half (2GB user, 2GB kernel). -# -options KVA_PAGES=260 - - -##################################################################### -# ABI Emulation - -# Enable iBCS2 runtime support for SCO and ISC binaries -options IBCS2 - -# Emulate spx device for client side of SVR3 local X interface -options SPX_HACK - -# Enable Linux ABI emulation -options COMPAT_LINUX - -# Enable i386 a.out binary support -options COMPAT_AOUT - -# Enable the linux-like proc filesystem support (requires COMPAT_LINUX -# and PSEUDOFS) -options LINPROCFS - -# -# SysVR4 ABI emulation -# -# The svr4 ABI emulator can be statically compiled into the kernel or loaded as -# a KLD module. -# The STREAMS network emulation code can also be compiled statically or as a -# module. If loaded as a module, it must be loaded before the svr4 module -# (the /usr/sbin/svr4 script does this for you). If compiling statically, -# the `streams' device must be configured into any kernel which also -# specifies COMPAT_SVR4. It is possible to have a statically-configured -# STREAMS device and a dynamically loadable svr4 emulator; the /usr/sbin/svr4 -# script understands that it doesn't need to load the `streams' module under -# those circumstances. -# Caveat: At this time, `options KTRACE' is required for the svr4 emulator -# (whether static or dynamic). -# -options COMPAT_SVR4 # build emulator statically -options DEBUG_SVR4 # enable verbose debugging -device streams # STREAMS network driver (required for svr4). - - -##################################################################### -# VM OPTIONS - -# Disable the 4 MByte page PSE CPU feature. The PSE feature allows the -# kernel to use a 4 MByte pages to map the kernel instead of 4k pages. -# This saves on the amount of memory needed for page tables needed to -# map the kernel. You should only disable this feature as a temporary -# workaround if you are having problems with it enabled. -# -#options DISABLE_PSE - -# Disable the global pages PGE CPU feature. The PGE feature allows pages -# to be marked with the PG_G bit. TLB entries for these pages are not -# flushed from the cache when %cr3 is reloaded. This can make context -# switches less expensive. You should only disable this feature as a -# temporary workaround if you are having problems with it enabled. -# -#options DISABLE_PG_G - -# KSTACK_PAGES is the number of memory pages to assign to the kernel -# stack of each thread. - -options KSTACK_PAGES=3 - -##################################################################### - -# More undocumented options for linting. -# Note that documenting these are not considered an affront. - -options FB_INSTALL_CDEV # install a CDEV entry in /dev - -# PECOFF module (Win32 Execution Format) -options PECOFF_SUPPORT -options PECOFF_DEBUG - -options ENABLE_ALART -options I4B_SMP_WORKAROUND -options I586_PMC_GUPROF=0x70000 -options KBDIO_DEBUG=2 -options KBD_MAXRETRY=4 -options KBD_MAXWAIT=6 -options KBD_RESETDELAY=201 - -options PSM_DEBUG=1 - -options TIMER_FREQ=((14318182+6)/12) - -options VM_KMEM_SIZE -options VM_KMEM_SIZE_MAX -options VM_KMEM_SIZE_SCALE diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD --- a/freebsd-5.3-xen-sparse/i386-xen/conf/OLDCARD Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,17 +0,0 @@ -# -# OLDCARD -- Generic kernel configuration file for FreeBSD/i386 -# using the OLDCARD pccard system. -# -# $FreeBSD: src/sys/i386/conf/OLDCARD,v 1.18 2003/02/15 02:39:13 ru Exp $ - -include GENERIC - -ident OLDCARD - -# PCCARD (PCMCIA) support -nodevice cbb # cardbus (yenta) bridge -#nodevice pcic # ExCA ISA and PCI bridges -nodevice pccard # PC Card (16-bit) bus -nodevice cardbus # CardBus (32-bit) bus -device card 1 # pccard bus -device pcic # PCMCIA bridge diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/PAE --- a/freebsd-5.3-xen-sparse/i386-xen/conf/PAE Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,99 +0,0 @@ -# -# PAE -- Generic kernel configuration file for FreeBSD/i386 PAE -# -# $FreeBSD: src/sys/i386/conf/PAE,v 1.8 2003/11/03 22:49:19 jhb Exp $ - -include GENERIC - -ident PAE-GENERIC - -# To make a PAE kernel, the next option is needed -options PAE # Physical Address Extensions Kernel - -# Compile acpi in statically since the module isn't built properly. Most -# machines which support large amounts of memory require acpi. -device acpi - -# Don't build modules with this kernel config, since they are not built with -# the correct options headers. -makeoptions NO_MODULES=yes - -# What follows is a list of drivers that are normally in GENERIC, but either -# don't work or are untested with PAE. Be very careful before enabling any -# of these drivers. Drivers which use DMA and don't handle 64 bit physical -# address properly may cause data corruption when used in a machine with more -# than 4 gigabytes of memory. - -nodevice ahb -nodevice amd -nodevice isp -nodevice sym -nodevice trm - -nodevice adv -nodevice adw -nodevice aha -nodevice aic -nodevice bt - -nodevice ncv -nodevice nsp -nodevice stg - -nodevice asr -nodevice dpt -nodevice iir -nodevice mly - -nodevice amr -nodevice ida -nodevice mlx -nodevice pst - -nodevice agp - -nodevice de -nodevice txp -nodevice vx - -nodevice dc -nodevice pcn -nodevice rl -nodevice sf -nodevice sis -nodevice ste -nodevice tl -nodevice tx -nodevice vr -nodevice wb - -nodevice cs -nodevice ed -nodevice ex -nodevice ep -nodevice fe -nodevice ie -nodevice lnc -nodevice sn -nodevice xe - -nodevice wlan -nodevice an -nodevice awi -nodevice wi - -nodevice uhci -nodevice ohci -nodevice usb -nodevice ugen -nodevice uhid -nodevice ukbd -nodevice ulpt -nodevice umass -nodevice ums -nodevice urio -nodevice uscanner -nodevice aue -nodevice axe -nodevice cue -nodevice kue diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF --- a/freebsd-5.3-xen-sparse/i386-xen/conf/XENCONF Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,137 +0,0 @@ -# -# GENERIC -- Generic kernel configuration file for FreeBSD/i386 -# -# For more information on this file, please read the handbook section on -# Kernel Configuration Files: -# -# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html -# -# The handbook is also available locally in /usr/share/doc/handbook -# if you've installed the doc distribution, otherwise always see the -# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the -# latest information. -# -# An exhaustive list of options and more detailed explanations of the -# device lines is also present in the ../../conf/NOTES and NOTES files. -# If you are in doubt as to the purpose or necessity of a line, check first -# in NOTES. -# -# $FreeBSD: src/sys/i386/conf/GENERIC,v 1.394.2.3 2004/01/26 19:42:11 nectar Exp $ - -machine i386-xen -cpu I686_CPU -ident XEN - -#To statically compile in device wiring instead of /boot/device.hints -#hints "GENERIC.hints" #Default places to look for devices. - -makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols - -options SCHED_4BSD #4BSD scheduler -options INET #InterNETworking -options INET6 #IPv6 communications protocols -options FFS #Berkeley Fast Filesystem -options SOFTUPDATES #Enable FFS soft updates support -options UFS_ACL #Support for access control lists -options UFS_DIRHASH #Improve performance on big directories -options MD_ROOT #MD is a potential root device -options NFSCLIENT #Network Filesystem Client -options NFSSERVER #Network Filesystem Server -# options NFS_ROOT #NFS usable as /, requires NFSCLIENT -#options MSDOSFS #MSDOS Filesystem -#options CD9660 #ISO 9660 Filesystem -options PROCFS #Process filesystem (requires PSEUDOFS) -options PSEUDOFS #Pseudo-filesystem framework -options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!] -options COMPAT_FREEBSD4 #Compatible with FreeBSD4 -options SCSI_DELAY=15000 #Delay (in ms) before probing SCSI -options KTRACE #ktrace(1) support -options SYSVSHM #SYSV-style shared memory -options SYSVMSG #SYSV-style message queues -options SYSVSEM #SYSV-style semaphores -options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions -options KBD_INSTALL_CDEV # install a CDEV entry in /dev -options CPU_DISABLE_SSE # don't turn on SSE framework with Xen -#options PFIL_HOOKS # pfil(9) framework - -# Debugging for use in -current -options KDB #Enable the kernel debugger -options INVARIANTS #Enable calls of extra sanity checking -options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS -#options WITNESS #Enable checks to detect deadlocks and cycles -#options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed - -# To make an SMP kernel, the next two are needed -#options SMP # Symmetric MultiProcessor Kernel -#device apic # I/O APIC - -# SCSI peripherals -device scbus # SCSI bus (required for SCSI) -#device ch # SCSI media changers -device da # Direct Access (disks) -#device sa # Sequential Access (tape etc) -#device cd # CD -device pass # Passthrough device (direct SCSI access) -#device ses # SCSI Environmental Services (and SAF-TE) - -# atkbdc0 controls both the keyboard and the PS/2 mouse -#device atkbdc # AT keyboard controller -#device atkbd # AT keyboard -#device psm # PS/2 mouse - -# device vga # VGA video card driver - -#device splash # Splash screen and screen saver support - -# syscons is the default console driver, resembling an SCO console -#device sc - -# Enable this for the pcvt (VT220 compatible) console driver -#device vt -#options XSERVER # support for X server on a vt console -#options FAT_CURSOR # start with block cursor - -#device agp # support several AGP chipsets - -# Floating point support - do not disable. -device npx - -# Serial (COM) ports -#device sio # 8250, 16[45]50 based serial ports - -# Parallel port -#device ppc -#device ppbus # Parallel port bus (required) -#device lpt # Printer -#device plip # TCP/IP over parallel -#device ppi # Parallel port interface device -#device vpo # Requires scbus and da - -# If you've got a "dumb" serial or parallel PCI card that is -# supported by the puc(4) glue driver, uncomment the following -# line to enable it (connects to the sio and/or ppc drivers): -#device puc - - -# Pseudo devices - the number indicates how many units to allocate. -device random # Entropy device -device loop # Network loopback -device ether # Ethernet support -device tun # Packet tunnel. -device pty # Pseudo-ttys (telnet etc) -device md # Memory "disks" -device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) - -# The `bpf' device enables the Berkeley Packet Filter. -# Be aware of the administrative consequences of enabling this! -device bpf # Berkeley packet filter - -#options BOOTP -options XEN -options MCLSHIFT=12 # this has to be enabled for Xen as we can only have one cluster per page -options MSIZE=256 -options DIAGNOSTIC -options MAXMEM=(256*1024) -options NOXENDEBUG=1 # Turn off Debugging printfs - diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk --- a/freebsd-5.3-xen-sparse/i386-xen/conf/gethints.awk Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,116 +0,0 @@ -#! /usr/bin/awk -f -# -# This is a transition aid. It extracts old-style configuration information -# from a config file and writes an equivalent device.hints file to stdout. -# You can use that with loader(8) or statically compile it in with the -# 'hints' directive. See how GENERIC and GENERIC.hints fit together for -# a static example. You should use loader(8) if at all possible. -# -# $FreeBSD: src/sys/i386/conf/gethints.awk,v 1.2 2002/07/26 03:52:30 peter Exp $ - -# skip commented lines, empty lines and not "device" lines -/^[ \t]*#/ || /^[ \t]*$/ || !/[ \t]*device/ { next; } - -# input format : -# device <name><unit> at <controler>[?] [key [val]]... -# possible keys are : -# disable, port #, irq #, drq #, drive #, iomem #, iosiz #, -# flags #, bus #, target #, unit #. -# output format : -# hint.<name>.<unit>.<key>=<val> -# mapped keys are : -# iomem -> maddr, iosiz -> msize. -{ - gsub ("#.*", ""); # delete comments - gsub ("\"", ""); # and double-quotes - nameunit = $2; # <name><unit> - at = $3; # at - controler = $4; # <controler>[?] - rest = 5; # optional keys begin at indice 5 - if (at != "at" || controler == "") - next; # skip devices w/o controlers - name = nameunit; - sub ("[0-9]*$", "", name); # get the name - unit = nameunit; - sub ("^" name, "", unit); # and the unit - sub ("\?$", "", controler); - printf "hint.%s.%s.at=\"%s\"\n", name, unit, controler; - # for each keys, if any ? - for (key = $rest; rest <= NF; key = $(++rest)) { - # skip auto-detect keys (the one w/ a ?) - if (key == "port?" || key == "drq?" || key == "irq?" || \ - key == "iomem?" || key == "iosiz?") - continue; - # disable has no value, so, give it one - if (key == "disable") { - printf "hint.%s.%s.disabled=\"1\"\n", name, unit; - continue; - } - # recognized keys - if (key == "port" || key == "irq" || key == "drq" || \ - key == "drive" || key == "iomem" || key == "iosiz" || \ - key == "flags" || key == "bus" || key == "target" || \ - key == "unit") { - val = $(++rest); - if (val == "?") # has above - continue; - if (key == "port") { - # map port macros to static values - sub ("IO_AHA0", "0x330", val); - sub ("IO_AHA1", "0x334", val); - sub ("IO_ASC1", "0x3EB", val); - sub ("IO_ASC2", "0x22B", val); - sub ("IO_ASC3", "0x26B", val); - sub ("IO_ASC4", "0x2AB", val); - sub ("IO_ASC5", "0x2EB", val); - sub ("IO_ASC6", "0x32B", val); - sub ("IO_ASC7", "0x36B", val); - sub ("IO_ASC8", "0x3AB", val); - sub ("IO_BT0", "0x330", val); - sub ("IO_BT1", "0x334", val); - sub ("IO_CGA", "0x3D0", val); - sub ("IO_COM1", "0x3F8", val); - sub ("IO_COM2", "0x2F8", val); - sub ("IO_COM3", "0x3E8", val); - sub ("IO_COM4", "0x2E8", val); - sub ("IO_DMA1", "0x000", val); - sub ("IO_DMA2", "0x0C0", val); - sub ("IO_DMAPG", "0x080", val); - sub ("IO_FD1", "0x3F0", val); - sub ("IO_FD2", "0x370", val); - sub ("IO_GAME", "0x201", val); - sub ("IO_GSC1", "0x270", val); - sub ("IO_GSC2", "0x2E0", val); - sub ("IO_GSC3", "0x370", val); - sub ("IO_GSC4", "0x3E0", val); - sub ("IO_ICU1", "0x020", val); - sub ("IO_ICU2", "0x0A0", val); - sub ("IO_KBD", "0x060", val); - sub ("IO_LPT1", "0x378", val); - sub ("IO_LPT2", "0x278", val); - sub ("IO_LPT3", "0x3BC", val); - sub ("IO_MDA", "0x3B0", val); - sub ("IO_NMI", "0x070", val); - sub ("IO_NPX", "0x0F0", val); - sub ("IO_PMP1", "0x026", val); - sub ("IO_PMP2", "0x178", val); - sub ("IO_PPI", "0x061", val); - sub ("IO_RTC", "0x070", val); - sub ("IO_TIMER1", "0x040", val); - sub ("IO_TIMER2", "0x048", val); - sub ("IO_UHA0", "0x330", val); - sub ("IO_VGA", "0x3C0", val); - sub ("IO_WD1", "0x1F0", val); - sub ("IO_WD2", "0x170", val); - } else { - # map key names - sub ("iomem", "maddr", key); - sub ("iosiz", "msize", key); - } - printf "hint.%s.%s.%s=\"%s\"\n", name, unit, key, val; - continue; - } - printf ("unrecognized config token '%s:%s' on line %s\n", - rest, key, NR); # > "/dev/stderr"; - } -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,559 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz and Don Ahn. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/isa/clock.c,v 1.207 2003/11/13 10:02:12 phk Exp $"); - -/* #define DELAYDEBUG */ -/* - * Routines to handle clock hardware. - */ - -/* - * inittodr, settodr and support routines written - * by Christoph Robitschko <chmr@xxxxxxxxxxxxxxxxxx> - * - * reintroduced and updated by Chris Stenton <chris@xxxxxxxxxxx> 8/10/94 - */ - -#include "opt_clock.h" -#include "opt_isa.h" -#include "opt_mca.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bus.h> -#include <sys/lock.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/time.h> -#include <sys/timetc.h> -#include <sys/kernel.h> -#include <sys/limits.h> -#include <sys/sysctl.h> -#include <sys/cons.h> -#include <sys/power.h> - -#include <machine/clock.h> -#include <machine/cputypes.h> -#include <machine/frame.h> -#include <machine/intr_machdep.h> -#include <machine/md_var.h> -#include <machine/psl.h> -#if defined(SMP) -#include <machine/smp.h> -#endif -#include <machine/specialreg.h> - -#include <i386/isa/icu.h> -#include <i386/isa/isa.h> -#include <isa/rtc.h> -#include <i386/isa/timerreg.h> - -/* XEN specific defines */ -#include <machine/xen_intr.h> -#include <vm/vm.h> /* needed by machine/pmap.h */ -#include <vm/pmap.h> /* needed by machine/pmap.h */ -#include <machine/pmap.h> /* needed by xen-os.h */ -#include <machine/hypervisor-ifs.h> -#include <machine/xen-os.h> /* needed by xenfunc.h */ -#include <machine/xenfunc.h> - -/* - * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we - * can use a simple formula for leap years. - */ -#define LEAPYEAR(y) (((u_int)(y) % 4 == 0) ? 1 : 0) -#define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) - -int adjkerntz; /* local offset from GMT in seconds */ -int clkintr_pending; -int disable_rtc_set = 1; /* disable resettodr() if != 0 */ -int pscnt = 1; -int psdiv = 1; -int statclock_disable; -#ifndef TIMER_FREQ -#define TIMER_FREQ 1193182 -#endif -u_int timer_freq = TIMER_FREQ; -struct mtx clock_lock; - - -static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; - -/* Values for timerX_state: */ -#define RELEASED 0 -#define RELEASE_PENDING 1 -#define ACQUIRED 2 -#define ACQUIRE_PENDING 3 - -/* Cached *multiplier* to convert TSC counts to microseconds. - * (see the equation below). - * Equal to 2^32 * (1 / (clocks per usec) ). - * Initialized in time_init. - */ -static unsigned long fast_gettimeoffset_quotient; - -/* These are peridically updated in shared_info, and then copied here. */ -static uint32_t shadow_tsc_stamp; -static uint64_t shadow_system_time; -static uint32_t shadow_time_version; -static struct timeval shadow_tv; - -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name - -#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) - - -static uint64_t processed_system_time;/* System time (ns) at last processing. */ -static DEFINE_PER_CPU(uint64_t, processed_system_time); - - -#define NS_PER_TICK (1000000000ULL/hz) - -/* convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) - * - * Then we use scaling math (suggested by george@xxxxxxxxxx) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * -johnstul@xxxxxxxxxx "math is hard, lets go shopping!" - */ -static unsigned long cyc2ns_scale; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) -{ - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; -} - -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - -/* - * Reads a consistent set of time-base values from Xen, into a shadow data - * area. Must be called with the xtime_lock held for writing. - */ -static void __get_time_values_from_xen(void) -{ - shared_info_t *s = HYPERVISOR_shared_info; - - do { - shadow_time_version = s->time_version2; - rmb(); - shadow_tv.tv_sec = s->wc_sec; - shadow_tv.tv_usec = s->wc_usec; - shadow_tsc_stamp = (uint32_t)s->tsc_timestamp; - shadow_system_time = s->system_time; - rmb(); - } - while (shadow_time_version != s->time_version1); -} - -#define TIME_VALUES_UP_TO_DATE \ - (shadow_time_version == HYPERVISOR_shared_info->time_version2) - -static void (*timer_func)(struct clockframe *frame) = hardclock; - -static unsigned xen_get_offset(void); -static unsigned xen_get_timecount(struct timecounter *tc); - -static struct timecounter xen_timecounter = { - xen_get_timecount, /* get_timecount */ - 0, /* no poll_pps */ - ~0u, /* counter_mask */ - 0, /* frequency */ - "ixen", /* name */ - 0 /* quality */ -}; - - -static void -clkintr(struct clockframe *frame) -{ - int64_t cpu_delta, delta; - int cpu = smp_processor_id(); - long ticks = 0; - - do { - __get_time_values_from_xen(); - delta = cpu_delta = (int64_t)shadow_system_time + - (int64_t)xen_get_offset() * 1000; - delta -= processed_system_time; - cpu_delta -= per_cpu(processed_system_time, cpu); - } while (!TIME_VALUES_UP_TO_DATE); - - if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) { - printk("Timer ISR: Time went backwards: %lld\n", delta); - return; - } - - /* Process elapsed ticks since last call. */ - while ( delta >= NS_PER_TICK ) - { - ticks++; - delta -= NS_PER_TICK; - processed_system_time += NS_PER_TICK; - } - /* Local CPU jiffy work. */ - while (cpu_delta >= NS_PER_TICK) { - cpu_delta -= NS_PER_TICK; - per_cpu(processed_system_time, cpu) += NS_PER_TICK; -#if 0 - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING, regs); -#endif - } - if (ticks > 0) { - if (frame) timer_func(frame); - } - - if (cpu != 0) - return; - /* - * Take synchronised time from Xen once a minute if we're not - * synchronised ourselves, and we haven't chosen to keep an independent - * time base. - */ - - /* XXX TODO */ -} - -#include "opt_ddb.h" -static uint32_t -getit(void) -{ - __get_time_values_from_xen(); - return shadow_tsc_stamp; -} - -/* - * Wait "n" microseconds. - * Relies on timer 1 counting down from (timer_freq / hz) - * Note: timer had better have been programmed before this is first used! - */ -void -DELAY(int n) -{ - int delta, ticks_left; - uint32_t tick, prev_tick; -#ifdef DELAYDEBUG - int getit_calls = 1; - int n1; - static int state = 0; - - if (state == 0) { - state = 1; - for (n1 = 1; n1 <= 10000000; n1 *= 10) - DELAY(n1); - state = 2; - } - if (state == 1) - printf("DELAY(%d)...", n); -#endif - /* - * Read the counter first, so that the rest of the setup overhead is - * counted. Guess the initial overhead is 20 usec (on most systems it - * takes about 1.5 usec for each of the i/o's in getit(). The loop - * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The - * multiplications and divisions to scale the count take a while). - * - * However, if ddb is active then use a fake counter since reading - * the i8254 counter involves acquiring a lock. ddb must not go - * locking for many reasons, but it calls here for at least atkbd - * input. - */ - prev_tick = getit(); - - n -= 0; /* XXX actually guess no initial overhead */ - /* - * Calculate (n * (timer_freq / 1e6)) without using floating point - * and without any avoidable overflows. - */ - if (n <= 0) - ticks_left = 0; - else if (n < 256) - /* - * Use fixed point to avoid a slow division by 1000000. - * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. - * 2^15 is the first power of 2 that gives exact results - * for n between 0 and 256. - */ - ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; - else - /* - * Don't bother using fixed point, although gcc-2.7.2 - * generates particularly poor code for the long long - * division, since even the slow way will complete long - * before the delay is up (unless we're interrupted). - */ - ticks_left = ((u_int)n * (long long)timer_freq + 999999) - / 1000000; - - while (ticks_left > 0) { - tick = getit(); -#ifdef DELAYDEBUG - ++getit_calls; -#endif - delta = tick - prev_tick; - prev_tick = tick; - if (delta < 0) { - /* - * Guard against timer0_max_count being wrong. - * This shouldn't happen in normal operation, - * but it may happen if set_timer_freq() is - * traced. - */ - /* delta += timer0_max_count; ??? */ - if (delta < 0) - delta = 0; - } - ticks_left -= delta; - } -#ifdef DELAYDEBUG - if (state == 1) - printf(" %d calls to getit() at %d usec each\n", - getit_calls, (n + 5) / getit_calls); -#endif -} - - -int -sysbeep(int pitch, int period) -{ - return (0); -} - -/* - * Restore all the timers non-atomically (XXX: should be atomically). - * - * This function is called from pmtimer_resume() to restore all the timers. - * This should not be necessary, but there are broken laptops that do not - * restore all the timers on resume. - */ -void -timer_restore(void) -{ - /* Get timebases for new environment. */ - __get_time_values_from_xen(); - - /* Reset our own concept of passage of system time. */ - processed_system_time = shadow_system_time; -} - -void -startrtclock() -{ - unsigned long long alarm; - uint64_t __cpu_khz; - uint32_t cpu_khz; - - __cpu_khz = HYPERVISOR_shared_info->cpu_freq; - __cpu_khz /= 1000; - cpu_khz = (uint32_t)__cpu_khz; - printk("Xen reported: %lu.%03lu MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - - /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz = - (2^32 * 1 / (clocks/us)) */ - { - unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (fast_gettimeoffset_quotient), "=d" (edx) - :"r" (cpu_khz), - "0" (eax), "1" (edx)); - } - - set_cyc2ns_scale(cpu_khz/1000); - timer_freq = tsc_freq = xen_timecounter.tc_frequency = cpu_khz * 1000; - tc_init(&xen_timecounter); - - - rdtscll(alarm); -} - -/* - * Initialize the time of day register, based on the time base which is, e.g. - * from a filesystem. - */ -void -inittodr(time_t base) -{ - int s, y; - struct timespec ts; - - s = splclock(); - if (base) { - ts.tv_sec = base; - ts.tv_nsec = 0; - tc_setclock(&ts); - } - - y = time_second - shadow_tv.tv_sec; - if (y <= -2 || y >= 2) { - /* badly off, adjust it */ - ts.tv_sec = shadow_tv.tv_sec; - ts.tv_nsec = shadow_tv.tv_usec * 1000; - tc_setclock(&ts); - } - splx(s); -} - -/* - * Write system time back to RTC. Not supported for guest domains. - */ -void -resettodr() -{ -} - - -/* - * Start clocks running. - */ -void -cpu_initclocks(void) -{ - int diag; - int time_irq = bind_virq_to_irq(VIRQ_TIMER); - - if ((diag = intr_add_handler("clk", time_irq, - (driver_intr_t *)clkintr, NULL, - INTR_TYPE_CLK | INTR_FAST, NULL))) { - panic("failed to register clock interrupt: %d\n", diag); - } - - /* should fast clock be enabled ? */ - - /* initialize xen values */ - __get_time_values_from_xen(); - processed_system_time = shadow_system_time; - per_cpu(processed_system_time, 0) = processed_system_time; - -} - -#ifdef SMP -void -ap_cpu_initclocks(void) -{ - int irq; - int cpu = smp_processor_id(); - - per_cpu(processed_system_time, cpu) = shadow_system_time; - - irq = bind_virq_to_irq(VIRQ_TIMER); - PCPU_SET(time_irq, irq); - PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr, - NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); -} -#endif - -void -cpu_startprofclock(void) -{ - - printf("cpu_startprofclock: profiling clock is not supported\n"); -} - -void -cpu_stopprofclock(void) -{ - - printf("cpu_stopprofclock: profiling clock is not supported\n"); -} - -static uint32_t -xen_get_timecount(struct timecounter *tc) -{ - __get_time_values_from_xen(); - return shadow_tsc_stamp; -} - -/* - * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c - */ -#undef rdtsc -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) - -static uint32_t -xen_get_offset(void) -{ - register unsigned long eax, edx; - - /* Read the Time Stamp Counter */ - - rdtsc(eax,edx); - - /* .. relative to previous jiffy (32 bits is enough) */ - eax -= shadow_tsc_stamp; - - /* - * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient - * = (tsc_low delta) * (usecs_per_clock) - * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) - * - * Using a mull instead of a divl saves up to 31 clock cycles - * in the critical path. - */ - - __asm__("mull %2" - :"=a" (eax), "=d" (edx) - :"rm" (fast_gettimeoffset_quotient), - "0" (eax)); - - /* our adjusted time offset in microseconds */ - return edx; -} - -void -idle_block(void) -{ - if (HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK) == 0) - HYPERVISOR_block(); -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/critical.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,46 +0,0 @@ -/*- - * Copyright (c) 2002 Matthew Dillon. All Rights Reserved. - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE - * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/critical.c,v 1.12 2003/11/03 21:06:54 jhb Exp $"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <machine/critical.h> -#include <machine/psl.h> - -/* - * cpu_critical_fork_exit() - cleanup after fork - * - * Enable interrupts in the saved copy of eflags. - */ -void -cpu_critical_fork_exit(void) -{ - curthread->td_md.md_savecrit = 0; -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,533 +0,0 @@ -/****************************************************************************** - * ctrl_if.c - * - * Management functions for special interface to the domain controller. - * - * Copyright (c) 2004, K A Fraser - * Copyright (c) 2004, K M Macy - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/uio.h> -#include <sys/bus.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/mutex.h> -#include <sys/selinfo.h> -#include <sys/poll.h> -#include <sys/conf.h> -#include <sys/fcntl.h> -#include <sys/ioccom.h> -#include <sys/taskqueue.h> - - -#include <machine/cpufunc.h> -#include <machine/intr_machdep.h> -#include <machine/xen-os.h> -#include <machine/xen_intr.h> -#include <machine/bus.h> -#include <sys/rman.h> -#include <machine/resource.h> -#include <machine/synch_bitops.h> - - -#include <machine/hypervisor-ifs.h> - -#include <machine/ctrl_if.h> -#include <machine/evtchn.h> - -/* - * Extra ring macros to sync a consumer index up to the public producer index. - * Generally UNSAFE, but we use it for recovery and shutdown in some cases. - */ -#define RING_DROP_PENDING_REQUESTS(_r) \ - do { \ - (_r)->req_cons = (_r)->sring->req_prod; \ - } while (0) -#define RING_DROP_PENDING_RESPONSES(_r) \ - do { \ - (_r)->rsp_cons = (_r)->sring->rsp_prod; \ - } while (0) -/* - * Only used by initial domain which must create its own control-interface - * event channel. This value is picked up by the user-space domain controller - * via an ioctl. - */ -int initdom_ctrlif_domcontroller_port = -1; - -static int ctrl_if_evtchn; -static int ctrl_if_irq; -static struct mtx ctrl_if_lock; -static int * ctrl_if_wchan = &ctrl_if_evtchn; - - -static ctrl_front_ring_t ctrl_if_tx_ring; -static ctrl_back_ring_t ctrl_if_rx_ring; - -/* Incoming message requests. */ - /* Primary message type -> message handler. */ -static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256]; - /* Primary message type -> callback in process context? */ -static unsigned long ctrl_if_rxmsg_blocking_context[256/sizeof(unsigned long)]; - /* Queue up messages to be handled in process context. */ -static ctrl_msg_t ctrl_if_rxmsg_deferred[CONTROL_RING_SIZE]; -static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_prod; -static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_cons; - -/* Incoming message responses: message identifier -> message handler/id. */ -static struct { - ctrl_msg_handler_t fn; - unsigned long id; -} ctrl_if_txmsg_id_mapping[CONTROL_RING_SIZE]; - -/* - * FreeBSD task queues don't allow you to requeue an already executing task. - * Since ctrl_if_interrupt clears the TX_FULL condition and schedules any - * waiting tasks, which themselves may need to schedule a new task - * (due to new a TX_FULL condition), we ping-pong between these A/B task queues. - * The interrupt runs anything on the current queue and moves the index so that - * future schedulings occur on the next queue. We should never get into a - * situation where there is a task scheduleded on both the A & B queues. - */ -TASKQUEUE_DECLARE(ctrl_if_txA); -TASKQUEUE_DEFINE(ctrl_if_txA, NULL, NULL, {}); -TASKQUEUE_DECLARE(ctrl_if_txB); -TASKQUEUE_DEFINE(ctrl_if_txB, NULL, NULL, {}); -struct taskqueue **taskqueue_ctrl_if_tx[2] = { &taskqueue_ctrl_if_txA, - &taskqueue_ctrl_if_txB }; -static int ctrl_if_idx = 0; - -static struct task ctrl_if_rx_tasklet; -static struct task ctrl_if_tx_tasklet; - /* Passed to schedule_task(). */ -static struct task ctrl_if_rxmsg_deferred_task; - - - -#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048)) - -static void -ctrl_if_notify_controller(void) -{ - notify_via_evtchn(ctrl_if_evtchn); -} - -static void -ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id) -{ - msg->length = 0; - ctrl_if_send_response(msg); -} - -static void -__ctrl_if_tx_tasklet(void *context __unused, int pending __unused) -{ - ctrl_msg_t *msg; - int was_full = RING_FULL(&ctrl_if_tx_ring); - RING_IDX i, rp; - - i = ctrl_if_tx_ring.rsp_cons; - rp = ctrl_if_tx_ring.sring->rsp_prod; - rmb(); /* Ensure we see all requests up to 'rp'. */ - - for ( ; i != rp; i++ ) - { - msg = RING_GET_RESPONSE(&ctrl_if_tx_ring, i); - - /* Execute the callback handler, if one was specified. */ - if ( msg->id != 0xFF ) - { - (*ctrl_if_txmsg_id_mapping[msg->id].fn)( - msg, ctrl_if_txmsg_id_mapping[msg->id].id); - smp_mb(); /* Execute, /then/ free. */ - ctrl_if_txmsg_id_mapping[msg->id].fn = NULL; - } - - } - - /* - * Step over the message in the ring /after/ finishing reading it. As - * soon as the index is updated then the message may get blown away. - */ - smp_mb(); - ctrl_if_tx_ring.rsp_cons = i; - - if ( was_full && !RING_FULL(&ctrl_if_tx_ring) ) - { - wakeup(ctrl_if_wchan); - - /* bump idx so future enqueues will occur on the next taskq - * process any currently pending tasks - */ - ctrl_if_idx++; - taskqueue_run(*taskqueue_ctrl_if_tx[(ctrl_if_idx-1) & 1]); - } - -} - -static void -__ctrl_if_rxmsg_deferred_task(void *context __unused, int pending __unused) -{ - ctrl_msg_t *msg; - CONTROL_RING_IDX dp; - - dp = ctrl_if_rxmsg_deferred_prod; - rmb(); /* Ensure we see all deferred requests up to 'dp'. */ - - while ( ctrl_if_rxmsg_deferred_cons != dp ) - { - msg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX( - ctrl_if_rxmsg_deferred_cons++)]; - (*ctrl_if_rxmsg_handler[msg->type])(msg, 0); - } - -} - -static void -__ctrl_if_rx_tasklet(void *context __unused, int pending __unused) -{ - ctrl_msg_t msg, *pmsg; - CONTROL_RING_IDX dp; - RING_IDX rp, i; - - i = ctrl_if_rx_ring.req_cons; - rp = ctrl_if_rx_ring.sring->req_prod; - dp = ctrl_if_rxmsg_deferred_prod; - - rmb(); /* Ensure we see all requests up to 'rp'. */ - - for ( ; i != rp; i++) - { - pmsg = RING_GET_REQUEST(&ctrl_if_rx_ring, i); - memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg)); - - if ( msg.length > sizeof(msg.msg)) - msg.length = sizeof(msg.msg); - if ( msg.length != 0 ) - memcpy(msg.msg, pmsg->msg, msg.length); - if ( test_bit(msg.type, &ctrl_if_rxmsg_blocking_context) ) - { - memcpy(&ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(dp++)], - &msg, offsetof(ctrl_msg_t, msg) + msg.length); - } - else - { - (*ctrl_if_rxmsg_handler[msg.type])(&msg, 0); - } - } - ctrl_if_rx_ring.req_cons = i; - - if ( dp != ctrl_if_rxmsg_deferred_prod ) - { - wmb(); - ctrl_if_rxmsg_deferred_prod = dp; - taskqueue_enqueue(taskqueue_thread, &ctrl_if_rxmsg_deferred_task); - } - -} - -static void -ctrl_if_interrupt(void *ctrl_sc) -/* (int irq, void *dev_id, struct pt_regs *regs) */ -{ - - - if ( RING_HAS_UNCONSUMED_RESPONSES(&ctrl_if_tx_ring) ) - taskqueue_enqueue(taskqueue_swi, &ctrl_if_tx_tasklet); - - - if ( RING_HAS_UNCONSUMED_REQUESTS(&ctrl_if_rx_ring) ) - taskqueue_enqueue(taskqueue_swi, &ctrl_if_rx_tasklet); - -} - -int -ctrl_if_send_message_noblock( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id) -{ - unsigned long flags; - ctrl_msg_t *dmsg; - int i; - - mtx_lock_irqsave(&ctrl_if_lock, flags); - - if ( RING_FULL(&ctrl_if_tx_ring) ) - { - mtx_unlock_irqrestore(&ctrl_if_lock, flags); - return EAGAIN; - } - - msg->id = 0xFF; - if ( hnd != NULL ) - { - for ( i = 0; ctrl_if_txmsg_id_mapping[i].fn != NULL; i++ ) - continue; - ctrl_if_txmsg_id_mapping[i].fn = hnd; - ctrl_if_txmsg_id_mapping[i].id = id; - msg->id = i; - } - - dmsg = RING_GET_REQUEST(&ctrl_if_tx_ring, - ctrl_if_tx_ring.req_prod_pvt); - memcpy(dmsg, msg, sizeof(*msg)); - ctrl_if_tx_ring.req_prod_pvt++; - RING_PUSH_REQUESTS(&ctrl_if_tx_ring); - - mtx_unlock_irqrestore(&ctrl_if_lock, flags); - - ctrl_if_notify_controller(); - - return 0; -} - -int -ctrl_if_send_message_block( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id, - long wait_state) -{ - int rc, sst = 0; - - /* Fast path. */ - if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN ) - goto done; - - for ( ; ; ) - { - - if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN ) - break; - - if ( sst != 0) { - rc = EINTR; - goto done; - } - - sst = tsleep(ctrl_if_wchan, PWAIT|PCATCH, "ctlrwt", 10); - } - done: - - return rc; -} - -int -ctrl_if_enqueue_space_callback(struct task *task) -{ - - /* Fast path. */ - if ( !RING_FULL(&ctrl_if_tx_ring) ) - return 0; - - (void)taskqueue_enqueue(*taskqueue_ctrl_if_tx[(ctrl_if_idx & 1)], task); - - /* - * We may race execution of the task queue, so return re-checked status. If - * the task is not executed despite the ring being non-full then we will - * certainly return 'not full'. - */ - smp_mb(); - return RING_FULL(&ctrl_if_tx_ring); -} - -void -ctrl_if_send_response(ctrl_msg_t *msg) -{ - unsigned long flags; - ctrl_msg_t *dmsg; - - /* - * NB. The response may the original request message, modified in-place. - * In this situation we may have src==dst, so no copying is required. - */ - mtx_lock_irqsave(&ctrl_if_lock, flags); - dmsg = RING_GET_RESPONSE(&ctrl_if_rx_ring, - ctrl_if_rx_ring.rsp_prod_pvt); - if ( dmsg != msg ) - memcpy(dmsg, msg, sizeof(*msg)); - - ctrl_if_rx_ring.rsp_prod_pvt++; - RING_PUSH_RESPONSES(&ctrl_if_rx_ring); - - mtx_unlock_irqrestore(&ctrl_if_lock, flags); - - ctrl_if_notify_controller(); -} - -int -ctrl_if_register_receiver( - uint8_t type, - ctrl_msg_handler_t hnd, - unsigned int flags) -{ - unsigned long _flags; - int inuse; - - mtx_lock_irqsave(&ctrl_if_lock, _flags); - - inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler); - - if ( inuse ) - { - printk("Receiver %p already established for control " - "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type); - } - else - { - ctrl_if_rxmsg_handler[type] = hnd; - clear_bit(type, &ctrl_if_rxmsg_blocking_context); - if ( flags == CALLBACK_IN_BLOCKING_CONTEXT ) - { - set_bit(type, &ctrl_if_rxmsg_blocking_context); - } - } - - mtx_unlock_irqrestore(&ctrl_if_lock, _flags); - - return !inuse; -} - -void -ctrl_if_unregister_receiver(uint8_t type, ctrl_msg_handler_t hnd) -{ - unsigned long flags; - - mtx_lock_irqsave(&ctrl_if_lock, flags); - - if ( ctrl_if_rxmsg_handler[type] != hnd ) - printk("Receiver %p is not registered for control " - "messages of type %d.\n", hnd, type); - else - ctrl_if_rxmsg_handler[type] = ctrl_if_rxmsg_default_handler; - - mtx_unlock_irqrestore(&ctrl_if_lock, flags); - - /* Ensure that @hnd will not be executed after this function returns. */ - /* XXX need rx_tasklet_lock -- can cheat for now?*/ -#ifdef notyet - tasklet_unlock_wait(&ctrl_if_rx_tasklet); -#endif -} - -void -ctrl_if_suspend(void) -{ - /* I'm not sure what the equivalent is - we aren't going to support suspend - * yet anyway - */ -#ifdef notyet - free_irq(ctrl_if_irq, NULL); -#endif - unbind_evtchn_from_irq(ctrl_if_evtchn); -} - -#if 0 -/** Reset the control interface progress pointers. - * Marks the queues empty if 'clear' non-zero. - */ -static void -ctrl_if_reset(int clear) -{ - control_if_t *ctrl_if = get_ctrl_if(); - - if (clear) { - *ctrl_if = (control_if_t){}; - } - - ctrl_if_tx_resp_cons = ctrl_if->tx_resp_prod; - ctrl_if_rx_req_cons = ctrl_if->rx_resp_prod; -} - -#endif -void -ctrl_if_resume(void) -{ - control_if_t *ctrl_if = get_ctrl_if(); - - TRACE_ENTER; - if ( xen_start_info->flags & SIF_INITDOMAIN ) - { - /* - * The initial domain must create its own domain-controller link. - * The controller is probably not running at this point, but will - * pick up its end of the event channel from - */ - evtchn_op_t op; - op.cmd = EVTCHNOP_bind_interdomain; - op.u.bind_interdomain.dom1 = DOMID_SELF; - op.u.bind_interdomain.dom2 = DOMID_SELF; - op.u.bind_interdomain.port1 = 0; - op.u.bind_interdomain.port2 = 0; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("event_channel_op failed\n"); - xen_start_info->domain_controller_evtchn = op.u.bind_interdomain.port1; - initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2; - } - - - /* Sync up with shared indexes. */ - FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM); - BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM); - - ctrl_if_evtchn = xen_start_info->domain_controller_evtchn; - ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn); - - /* - * I have not taken the time to determine what the interrupt thread priorities - * correspond to - this interface is used for network and disk, network would - * seem higher priority, hence I'm using it - */ - - intr_add_handler("ctrl-if", ctrl_if_irq, (driver_intr_t*)ctrl_if_interrupt, - NULL, INTR_TYPE_NET, NULL); - TRACE_EXIT; - /* XXX currently assuming not MPSAFE */ -} - -static void -ctrl_if_init(void *dummy __unused) -{ - control_if_t *ctrl_if = get_ctrl_if(); - - int i; - - for ( i = 0; i < 256; i++ ) - ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler; - - FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM); - BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM); - - mtx_init(&ctrl_if_lock, "ctrlif", NULL, MTX_SPIN | MTX_NOWITNESS); - - TASK_INIT(&ctrl_if_tx_tasklet, 0, __ctrl_if_tx_tasklet, NULL); - - TASK_INIT(&ctrl_if_rx_tasklet, 0, __ctrl_if_rx_tasklet, NULL); - - TASK_INIT(&ctrl_if_rxmsg_deferred_task, 0, __ctrl_if_rxmsg_deferred_task, NULL); - - - ctrl_if_resume(); -} - -/* - * !! The following are DANGEROUS FUNCTIONS !! - * Use with care [for example, see xencons_force_flush()]. - */ - -int -ctrl_if_transmitter_empty(void) -{ - return (ctrl_if_tx_ring.sring->req_prod == ctrl_if_tx_ring.rsp_cons); -} - -void -ctrl_if_discard_responses(void) -{ - RING_DROP_PENDING_RESPONSES(&ctrl_if_tx_ring); -} - -SYSINIT(ctrl_if_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, ctrl_if_init, NULL); diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/db_interface.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,209 +0,0 @@ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@xxxxxxxxxx - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/db_interface.c,v 1.77 2003/11/08 03:01:26 alc Exp $"); - -/* - * Interface to new debugger. - */ -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/reboot.h> -#include <sys/cons.h> -#include <sys/pcpu.h> -#include <sys/proc.h> -#include <sys/smp.h> - -#include <machine/cpu.h> -#ifdef SMP -#include <machine/smptests.h> /** CPUSTOP_ON_DDBBREAK */ -#endif - -#include <vm/vm.h> -#include <vm/pmap.h> - -#include <ddb/ddb.h> - -#include <machine/setjmp.h> -#include <machine/xenfunc.h> - - -static jmp_buf *db_nofault = 0; -extern jmp_buf db_jmpbuf; - -extern void gdb_handle_exception(db_regs_t *, int, int); - -int db_active; -db_regs_t ddb_regs; - -static __inline u_short -rss(void) -{ - u_short ss; -#ifdef __GNUC__ - __asm __volatile("mov %%ss,%0" : "=r" (ss)); -#else - ss = 0; /* XXXX Fix for other compilers. */ -#endif - return ss; -} - -/* - * kdb_trap - field a TRACE or BPT trap - */ -int -kdb_trap(int type, int code, struct i386_saved_state *regs) -{ - volatile int ddb_mode = !(boothowto & RB_GDB); - - disable_intr(); - - if (ddb_mode) { - /* we can't do much as a guest domain except print a - * backtrace and die gracefuly. The reason is that we - * can't get character input to make this work. - */ - db_active = 1; - db_print_backtrace(); - db_printf("************ Domain shutting down ************\n"); - HYPERVISOR_shutdown(); - } else { - Debugger("kdb_trap"); - } - return (1); -} - -/* - * Read bytes from kernel address space for debugger. - */ -void -db_read_bytes(vm_offset_t addr, size_t size, char *data) -{ - char *src; - - db_nofault = &db_jmpbuf; - - src = (char *)addr; - while (size-- > 0) - *data++ = *src++; - - db_nofault = 0; -} - -/* - * Write bytes to kernel address space for debugger. - */ -void -db_write_bytes(vm_offset_t addr, size_t size, char *data) -{ - char *dst; - - pt_entry_t *ptep0 = NULL; - pt_entry_t oldmap0 = 0; - vm_offset_t addr1; - pt_entry_t *ptep1 = NULL; - pt_entry_t oldmap1 = 0; - - db_nofault = &db_jmpbuf; - - if (addr > trunc_page((vm_offset_t)btext) - size && - addr < round_page((vm_offset_t)etext)) { - - ptep0 = pmap_pte(kernel_pmap, addr); - oldmap0 = *ptep0; - *ptep0 |= PG_RW; - - /* Map another page if the data crosses a page boundary. */ - if ((*ptep0 & PG_PS) == 0) { - addr1 = trunc_page(addr + size - 1); - if (trunc_page(addr) != addr1) { - ptep1 = pmap_pte(kernel_pmap, addr1); - oldmap1 = *ptep1; - *ptep1 |= PG_RW; - } - } else { - addr1 = trunc_4mpage(addr + size - 1); - if (trunc_4mpage(addr) != addr1) { - ptep1 = pmap_pte(kernel_pmap, addr1); - oldmap1 = *ptep1; - *ptep1 |= PG_RW; - } - } - - invltlb(); - } - - dst = (char *)addr; - - while (size-- > 0) - *dst++ = *data++; - - db_nofault = 0; - - if (ptep0) { - *ptep0 = oldmap0; - - if (ptep1) - *ptep1 = oldmap1; - - invltlb(); - } -} - -/* - * XXX - * Move this to machdep.c and allow it to be called if any debugger is - * installed. - */ -void -Debugger(const char *msg) -{ - static volatile u_int in_Debugger; - - /* - * XXX - * Do nothing if the console is in graphics mode. This is - * OK if the call is for the debugger hotkey but not if the call - * is a weak form of panicing. - */ - if (cons_unavail && !(boothowto & RB_GDB)) - return; - - if (atomic_cmpset_acq_int(&in_Debugger, 0, 1)) { - db_printf("Debugger(\"%s\")\n", msg); - breakpoint(); - atomic_store_rel_int(&in_Debugger, 0); - } -} - -void -db_show_mdpcpu(struct pcpu *pc) -{ - - db_printf("APIC ID = %d\n", pc->pc_apic_id); - db_printf("currentldt = 0x%x\n", pc->pc_currentldt); -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,666 +0,0 @@ -/****************************************************************************** - * evtchn.c - * - * Communication via Xen event channels. - * - * Copyright (c) 2002-2004, K A Fraser - */ -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bus.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/mutex.h> - -#include <machine/cpufunc.h> -#include <machine/intr_machdep.h> -#include <machine/xen-os.h> -#include <machine/xen_intr.h> -#include <machine/synch_bitops.h> -#include <machine/evtchn.h> -#include <machine/hypervisor.h> -#include <machine/hypervisor-ifs.h> - - -static struct mtx irq_mapping_update_lock; - -#define TODO printf("%s: not implemented!\n", __func__) - -/* IRQ <-> event-channel mappings. */ -static int evtchn_to_irq[NR_EVENT_CHANNELS]; -static int irq_to_evtchn[NR_IRQS]; - -static int virq_to_irq[MAX_VIRT_CPUS][NR_VIRQS]; -static int ipi_to_evtchn[MAX_VIRT_CPUS][NR_VIRQS]; - - -/* Reference counts for bindings to IRQs. */ -static int irq_bindcount[NR_IRQS]; - -#define VALID_EVTCHN(_chn) ((_chn) != -1) - -/* - * Force a proper event-channel callback from Xen after clearing the - * callback mask. We do this in a very simple manner, by making a call - * down into Xen. The pending flag will be checked by Xen on return. - */ -void force_evtchn_callback(void) -{ - (void)HYPERVISOR_xen_version(0); -} - -void -evtchn_do_upcall(struct intrframe *frame) -{ - unsigned long l1, l2; - unsigned int l1i, l2i, port; - int irq, owned; - unsigned long flags; - shared_info_t *s = HYPERVISOR_shared_info; - vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()]; - - local_irq_save(flags); - - while ( s->vcpu_data[0].evtchn_upcall_pending ) - { - s->vcpu_data[0].evtchn_upcall_pending = 0; - /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ - l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0); - while ( (l1i = ffs(l1)) != 0 ) - { - l1i--; - l1 &= ~(1 << l1i); - - l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i]; - while ( (l2i = ffs(l2)) != 0 ) - { - l2i--; - l2 &= ~(1 << l2i); - - port = (l1i << 5) + l2i; - irq = evtchn_to_irq[port]; -#ifdef SMP - if (irq == PCPU_GET(cpuast)) - continue; -#endif - if ( (owned = mtx_owned(&sched_lock)) != 0 ) - mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); - if ( irq != -1 ) { - struct intsrc *isrc = intr_lookup_source(irq); - intr_execute_handlers(isrc, frame); - } else { - evtchn_device_upcall(port); - } - if ( owned ) - mtx_lock_spin_flags(&sched_lock, MTX_QUIET); - } - } - } - - local_irq_restore(flags); - -} - - -static int -find_unbound_irq(void) -{ - int irq; - - for ( irq = 0; irq < NR_IRQS; irq++ ) - if ( irq_bindcount[irq] == 0 ) - break; - - if ( irq == NR_IRQS ) - panic("No available IRQ to bind to: increase NR_IRQS!\n"); - - return irq; -} - -int -bind_virq_to_irq(int virq) -{ - evtchn_op_t op; - int evtchn, irq; - - mtx_lock(&irq_mapping_update_lock); - - if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 ) - { - op.cmd = EVTCHNOP_bind_virq; - op.u.bind_virq.virq = virq; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to bind virtual IRQ %d\n", virq); - evtchn = op.u.bind_virq.port; - - irq = find_unbound_irq(); - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - - PCPU_GET(virq_to_irq)[virq] = irq; - } - - irq_bindcount[irq]++; - - mtx_unlock(&irq_mapping_update_lock); - - return irq; -} - -void -unbind_virq_from_irq(int virq) -{ - evtchn_op_t op; - int irq = PCPU_GET(virq_to_irq)[virq]; - int evtchn = irq_to_evtchn[irq]; - - mtx_lock(&irq_mapping_update_lock); - - if ( --irq_bindcount[irq] == 0 ) - { - op.cmd = EVTCHNOP_close; - op.u.close.dom = DOMID_SELF; - op.u.close.port = evtchn; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to unbind virtual IRQ %d\n", virq); - - evtchn_to_irq[evtchn] = -1; - irq_to_evtchn[irq] = -1; - PCPU_GET(virq_to_irq)[virq] = -1; - } - - mtx_unlock(&irq_mapping_update_lock); -} - - -int -bind_ipi_on_cpu_to_irq(int cpu, int ipi) -{ - evtchn_op_t op; - int evtchn, irq; - - mtx_lock(&irq_mapping_update_lock); - - if ( (evtchn = PCPU_GET(ipi_to_evtchn)[ipi]) == 0 ) - { - op.cmd = EVTCHNOP_bind_ipi; - op.u.bind_ipi.ipi_edom = cpu; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, cpu); - evtchn = op.u.bind_ipi.port; - - irq = find_unbound_irq(); - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - - PCPU_GET(ipi_to_evtchn)[ipi] = evtchn; - } else - irq = evtchn_to_irq[evtchn]; - - irq_bindcount[irq]++; - - mtx_unlock(&irq_mapping_update_lock); - - return irq; -} - -void -unbind_ipi_on_cpu_from_irq(int cpu, int ipi) -{ - evtchn_op_t op; - int evtchn = PCPU_GET(ipi_to_evtchn)[ipi]; - int irq = irq_to_evtchn[evtchn]; - - mtx_lock(&irq_mapping_update_lock); - - if ( --irq_bindcount[irq] == 0 ) - { - op.cmd = EVTCHNOP_close; - op.u.close.dom = DOMID_SELF; - op.u.close.port = evtchn; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to unbind virtual IPI %d on cpu %d\n", ipi, cpu); - - evtchn_to_irq[evtchn] = -1; - irq_to_evtchn[irq] = -1; - PCPU_GET(ipi_to_evtchn)[ipi] = 0; - } - - mtx_unlock(&irq_mapping_update_lock); -} - -int -bind_evtchn_to_irq(int evtchn) -{ - int irq; - - mtx_lock(&irq_mapping_update_lock); - - if ( (irq = evtchn_to_irq[evtchn]) == -1 ) - { - irq = find_unbound_irq(); - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - } - - irq_bindcount[irq]++; - - mtx_unlock(&irq_mapping_update_lock); - - return irq; -} - -void -unbind_evtchn_from_irq(int evtchn) -{ - int irq = evtchn_to_irq[evtchn]; - - mtx_lock(&irq_mapping_update_lock); - - if ( --irq_bindcount[irq] == 0 ) - { - evtchn_to_irq[evtchn] = -1; - irq_to_evtchn[irq] = -1; - } - - mtx_unlock(&irq_mapping_update_lock); -} - - -/* - * Interface to generic handling in intr_machdep.c - */ - - -/*------------ interrupt handling --------------------------------------*/ -#define TODO printf("%s: not implemented!\n", __func__) - - struct mtx xenpic_lock; - -struct xenpic_intsrc { - struct intsrc xp_intsrc; - uint8_t xp_vector; - boolean_t xp_masked; -}; - -struct xenpic { - struct pic xp_pic; /* this MUST be first */ - uint16_t xp_numintr; - struct xenpic_intsrc xp_pins[0]; -}; - -static void xenpic_enable_dynirq_source(struct intsrc *isrc); -static void xenpic_disable_dynirq_source(struct intsrc *isrc, int); -static void xenpic_eoi_source(struct intsrc *isrc); -static void xenpic_enable_dynirq_intr(struct intsrc *isrc); -static int xenpic_vector(struct intsrc *isrc); -static int xenpic_source_pending(struct intsrc *isrc); -static void xenpic_suspend(struct intsrc *isrc); -static void xenpic_resume(struct intsrc *isrc); - - -struct pic xenpic_template = { - xenpic_enable_dynirq_source, - xenpic_disable_dynirq_source, - xenpic_eoi_source, - xenpic_enable_dynirq_intr, - xenpic_vector, - xenpic_source_pending, - xenpic_suspend, - xenpic_resume -}; - - -void -xenpic_enable_dynirq_source(struct intsrc *isrc) -{ - unsigned int irq; - struct xenpic_intsrc *xp; - - xp = (struct xenpic_intsrc *)isrc; - - if (xp->xp_masked) { - irq = xenpic_vector(isrc); - unmask_evtchn(irq_to_evtchn[irq]); - xp->xp_masked = FALSE; - } -} - -static void -xenpic_disable_dynirq_source(struct intsrc *isrc, int foo) -{ - unsigned int irq; - struct xenpic_intsrc *xp; - - xp = (struct xenpic_intsrc *)isrc; - - if (!xp->xp_masked) { - irq = xenpic_vector(isrc); - mask_evtchn(irq_to_evtchn[irq]); - xp->xp_masked = TRUE; - } - -} - -static void -xenpic_enable_dynirq_intr(struct intsrc *isrc) -{ - unsigned int irq; - - irq = xenpic_vector(isrc); - unmask_evtchn(irq_to_evtchn[irq]); -} - -static void -xenpic_eoi_source(struct intsrc *isrc) -{ - unsigned int irq = xenpic_vector(isrc); - clear_evtchn(irq_to_evtchn[irq]); -} - -static int -xenpic_vector(struct intsrc *isrc) -{ - struct xenpic_intsrc *pin = (struct xenpic_intsrc *)isrc; - return (pin->xp_vector); -} - -static int -xenpic_source_pending(struct intsrc *isrc) -{ - TODO; - return 0; -} - -static void -xenpic_suspend(struct intsrc *isrc) -{ - TODO; -} - -static void -xenpic_resume(struct intsrc *isrc) -{ - TODO; -} - -#ifdef CONFIG_PHYSDEV -/* required for support of physical devices */ -static inline void -pirq_unmask_notify(int pirq) -{ - physdev_op_t op; - if ( unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0])) ) - { - op.cmd = PHYSDEVOP_IRQ_UNMASK_NOTIFY; - (void)HYPERVISOR_physdev_op(&op); - } -} - -static inline void -pirq_query_unmask(int pirq) -{ - physdev_op_t op; - op.cmd = PHYSDEVOP_IRQ_STATUS_QUERY; - op.u.irq_status_query.irq = pirq; - (void)HYPERVISOR_physdev_op(&op); - clear_bit(pirq, &pirq_needs_unmask_notify[0]); - if ( op.u.irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY ) - set_bit(pirq, &pirq_needs_unmask_notify[0]); -} - -/* - * On startup, if there is no action associated with the IRQ then we are - * probing. In this case we should not share with others as it will confuse us. - */ -#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL) - -static unsigned int startup_pirq(unsigned int irq) -{ - evtchn_op_t op; - int evtchn; - - op.cmd = EVTCHNOP_bind_pirq; - op.u.bind_pirq.pirq = irq; - /* NB. We are happy to share unless we are probing. */ - op.u.bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - { - if ( !probing_irq(irq) ) /* Some failures are expected when probing. */ - printk(KERN_INFO "Failed to obtain physical IRQ %d\n", irq); - return 0; - } - evtchn = op.u.bind_pirq.port; - - pirq_query_unmask(irq_to_pirq(irq)); - - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - - unmask_evtchn(evtchn); - pirq_unmask_notify(irq_to_pirq(irq)); - - return 0; -} - -static void shutdown_pirq(unsigned int irq) -{ - evtchn_op_t op; - int evtchn = irq_to_evtchn[irq]; - - if ( !VALID_EVTCHN(evtchn) ) - return; - - mask_evtchn(evtchn); - - op.cmd = EVTCHNOP_close; - op.u.close.dom = DOMID_SELF; - op.u.close.port = evtchn; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to unbind physical IRQ %d\n", irq); - - evtchn_to_irq[evtchn] = -1; - irq_to_evtchn[irq] = -1; -} - -static void enable_pirq(unsigned int irq) -{ - int evtchn = irq_to_evtchn[irq]; - if ( !VALID_EVTCHN(evtchn) ) - return; - unmask_evtchn(evtchn); - pirq_unmask_notify(irq_to_pirq(irq)); -} - -static void disable_pirq(unsigned int irq) -{ - int evtchn = irq_to_evtchn[irq]; - if ( !VALID_EVTCHN(evtchn) ) - return; - mask_evtchn(evtchn); -} - -static void ack_pirq(unsigned int irq) -{ - int evtchn = irq_to_evtchn[irq]; - if ( !VALID_EVTCHN(evtchn) ) - return; - mask_evtchn(evtchn); - clear_evtchn(evtchn); -} - -static void end_pirq(unsigned int irq) -{ - int evtchn = irq_to_evtchn[irq]; - if ( !VALID_EVTCHN(evtchn) ) - return; - if ( !(irq_desc[irq].status & IRQ_DISABLED) ) - { - unmask_evtchn(evtchn); - pirq_unmask_notify(irq_to_pirq(irq)); - } -} - -static struct hw_interrupt_type pirq_type = { - "Phys-irq", - startup_pirq, - shutdown_pirq, - enable_pirq, - disable_pirq, - ack_pirq, - end_pirq, - NULL -}; -#endif - -#if 0 -static void -misdirect_interrupt(void *sc) -{ -} -#endif -void irq_suspend(void) -{ - int virq, irq, evtchn; - - /* Unbind VIRQs from event channels. */ - for ( virq = 0; virq < NR_VIRQS; virq++ ) - { - if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 ) - continue; - evtchn = irq_to_evtchn[irq]; - - /* Mark the event channel as unused in our table. */ - evtchn_to_irq[evtchn] = -1; - irq_to_evtchn[irq] = -1; - } - - /* - * We should now be unbound from all event channels. Stale bindings to - * PIRQs and/or inter-domain event channels will cause us to barf here. - */ - for ( evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++ ) - if ( evtchn_to_irq[evtchn] != -1 ) - panic("Suspend attempted while bound to evtchn %d.\n", evtchn); -} - - -void irq_resume(void) -{ - evtchn_op_t op; - int virq, irq, evtchn; - - for ( evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++ ) - mask_evtchn(evtchn); /* New event-channel space is not 'live' yet. */ - - for ( virq = 0; virq < NR_VIRQS; virq++ ) - { - if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 ) - continue; - - /* Get a new binding from Xen. */ - op.cmd = EVTCHNOP_bind_virq; - op.u.bind_virq.virq = virq; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to bind virtual IRQ %d\n", virq); - evtchn = op.u.bind_virq.port; - - /* Record the new mapping. */ - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - - /* Ready for use. */ - unmask_evtchn(evtchn); - } -} - -void -ap_evtchn_init(int cpu) -{ - int i; - - /* XXX -- expedience hack */ - PCPU_SET(virq_to_irq, (int *)&virq_to_irq[cpu]); - PCPU_SET(ipi_to_evtchn, (int *)&ipi_to_evtchn[cpu]); - - /* No VIRQ -> IRQ mappings. */ - for ( i = 0; i < NR_VIRQS; i++ ) - PCPU_GET(virq_to_irq)[i] = -1; -} - - -static void -evtchn_init(void *dummy __unused) -{ - int i; - struct xenpic *xp; - struct xenpic_intsrc *pin; - - - /* XXX -- expedience hack */ - PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]); - PCPU_SET(ipi_to_evtchn, (int *)&ipi_to_evtchn[0]); - - /* No VIRQ -> IRQ mappings. */ - for ( i = 0; i < NR_VIRQS; i++ ) - PCPU_GET(virq_to_irq)[i] = -1; - - /* No event-channel -> IRQ mappings. */ - for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) - { - evtchn_to_irq[i] = -1; - mask_evtchn(i); /* No event channels are 'live' right now. */ - } - - /* No IRQ -> event-channel mappings. */ - for ( i = 0; i < NR_IRQS; i++ ) - irq_to_evtchn[i] = -1; - - xp = malloc(sizeof(struct xenpic) + NR_DYNIRQS*sizeof(struct xenpic_intsrc), M_DEVBUF, M_WAITOK); - xp->xp_pic = xenpic_template; - xp->xp_numintr = NR_DYNIRQS; - bzero(xp->xp_pins, sizeof(struct xenpic_intsrc) * NR_DYNIRQS); - - for ( i = 0, pin = xp->xp_pins; i < NR_DYNIRQS; i++, pin++ ) - { - /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - irq_bindcount[dynirq_to_irq(i)] = 0; - - pin->xp_intsrc.is_pic = (struct pic *)xp; - pin->xp_vector = i; - intr_register_source(&pin->xp_intsrc); - } - /* We don't currently have any support for physical devices in XenoFreeBSD - * so leaving this out for the moment for the sake of expediency. - */ -#ifdef notyet - for ( i = 0; i < NR_PIRQS; i++ ) - { - /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */ - irq_bindcount[pirq_to_irq(i)] = 1; - - irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED; - irq_desc[pirq_to_irq(i)].action = 0; - irq_desc[pirq_to_irq(i)].depth = 1; - irq_desc[pirq_to_irq(i)].handler = &pirq_type; - } - -#endif -#if 0 - (void) intr_add_handler("xb_mis", bind_virq_to_irq(VIRQ_MISDIRECT), - (driver_intr_t *)misdirect_interrupt, - NULL, INTR_TYPE_MISC, NULL); - -#endif -} - -SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL); - /* - * xenpic_lock: in order to allow an interrupt to occur in a critical - * section, to set pcpu->ipending (etc...) properly, we - * must be able to get the icu lock, so it can't be - * under witness. - */ - -MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_DEF|MTX_NOWITNESS); diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,438 +0,0 @@ -/*- - * Copyright (c) 1989, 1990 William F. Jolitz. - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/exception.s,v 1.106 2003/11/03 22:08:52 jhb Exp $ - */ - -#include "opt_npx.h" - -#include <machine/asmacros.h> -#include <machine/psl.h> -#include <machine/trap.h> - -#include "assym.s" - -#define SEL_RPL_MASK 0x0002 -/* Offsets into shared_info_t. */ -#define evtchn_upcall_pending /* 0 */ -#define evtchn_upcall_mask 1 -#define XEN_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg) -#define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg) -#define XEN_TEST_PENDING(reg) testb $0x1,evtchn_upcall_pending(reg) - - -#define POPA \ - popl %edi; \ - popl %esi; \ - popl %ebp; \ - popl %ebx; \ - popl %ebx; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - - .text - -/*****************************************************************************/ -/* Trap handling */ -/*****************************************************************************/ -/* - * Trap and fault vector routines. - * - * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on - * the stack that mostly looks like an interrupt, but does not disable - * interrupts. A few of the traps we are use are interrupt gates, - * SDT_SYS386IGT, which are nearly the same thing except interrupts are - * disabled on entry. - * - * The cpu will push a certain amount of state onto the kernel stack for - * the current process. The amount of state depends on the type of trap - * and whether the trap crossed rings or not. See i386/include/frame.h. - * At the very least the current EFLAGS (status register, which includes - * the interrupt disable state prior to the trap), the code segment register, - * and the return instruction pointer are pushed by the cpu. The cpu - * will also push an 'error' code for certain traps. We push a dummy - * error code for those traps where the cpu doesn't in order to maintain - * a consistent frame. We also push a contrived 'trap number'. - * - * The cpu does not push the general registers, we must do that, and we - * must restore them prior to calling 'iret'. The cpu adjusts the %cs and - * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we - * must load them with appropriate values for supervisor mode operation. - */ - -MCOUNT_LABEL(user) -MCOUNT_LABEL(btrap) - -IDTVEC(div) - pushl $0; TRAP(T_DIVIDE) -IDTVEC(dbg) - pushl $0; TRAP(T_TRCTRAP) -IDTVEC(nmi) - pushl $0; TRAP(T_NMI) -IDTVEC(bpt) - pushl $0; TRAP(T_BPTFLT) -IDTVEC(ofl) - pushl $0; TRAP(T_OFLOW) -IDTVEC(bnd) - pushl $0; TRAP(T_BOUND) -IDTVEC(ill) - pushl $0; TRAP(T_PRIVINFLT) -IDTVEC(dna) - pushl $0; TRAP(T_DNA) -IDTVEC(fpusegm) - pushl $0; TRAP(T_FPOPFLT) -IDTVEC(tss) - TRAP(T_TSSFLT) -IDTVEC(missing) - TRAP(T_SEGNPFLT) -IDTVEC(stk) - TRAP(T_STKFLT) -IDTVEC(prot) - TRAP(T_PROTFLT) -IDTVEC(page) - pushl %eax - movl 4(%esp),%eax - movl %eax,-44(%esp) # move cr2 after trap frame - popl %eax - addl $4,%esp - TRAP(T_PAGEFLT) -IDTVEC(mchk) - pushl $0; TRAP(T_MCHK) -IDTVEC(rsvd) - pushl $0; TRAP(T_RESERVED) -IDTVEC(fpu) - pushl $0; TRAP(T_ARITHTRAP) -IDTVEC(align) - TRAP(T_ALIGNFLT) - -IDTVEC(xmm) - pushl $0; TRAP(T_XMMFLT) - -IDTVEC(hypervisor_callback) - pushl %eax; TRAP(T_HYPCALLBACK) - -hypervisor_callback_pending: - movl $T_HYPCALLBACK,TF_TRAPNO(%esp) - movl $T_HYPCALLBACK,TF_ERR(%esp) - jmp 11f - - /* - * alltraps entry point. Interrupts are enabled if this was a trap - * gate (TGT), else disabled if this was an interrupt gate (IGT). - * Note that int0x80_syscall is a trap gate. Only page faults - * use an interrupt gate. - */ - - SUPERALIGN_TEXT - .globl alltraps - .type alltraps,@function -alltraps: - cld - pushal - pushl %ds - pushl %es - pushl %fs -alltraps_with_regs_pushed: - movl $KDSEL,%eax - movl %eax,%ds - movl %eax,%es - movl $KPSEL,%eax - movl %eax,%fs - FAKE_MCOUNT(TF_EIP(%esp)) -save_cr2: - movl TF_TRAPNO(%esp),%eax - cmpl $T_PAGEFLT,%eax - jne calltrap - movl -4(%esp),%eax - movl %eax,PCPU(CR2) -calltrap: - movl TF_EIP(%esp),%eax - cmpl $scrit,%eax - jb 11f - cmpl $ecrit,%eax - jb critical_region_fixup -11: call trap - - /* - * Return via doreti to handle ASTs. - */ - MEXITCOUNT - jmp doreti - -/* - * SYSCALL CALL GATE (old entry point for a.out binaries) - * - * The intersegment call has been set up to specify one dummy parameter. - * - * This leaves a place to put eflags so that the call frame can be - * converted to a trap frame. Note that the eflags is (semi-)bogusly - * pushed into (what will be) tf_err and then copied later into the - * final spot. It has to be done this way because esp can't be just - * temporarily altered for the pushfl - an interrupt might come in - * and clobber the saved cs/eip. - */ - SUPERALIGN_TEXT -IDTVEC(lcall_syscall) - pushfl /* save eflags */ - popl 8(%esp) /* shuffle into tf_eflags */ - pushl $7 /* sizeof "lcall 7,0" */ - subl $4,%esp /* skip over tf_trapno */ - pushal - pushl %ds - pushl %es - pushl %fs - movl $KDSEL,%eax /* switch to kernel segments */ - movl %eax,%ds - movl %eax,%es - movl $KPSEL,%eax - movl %eax,%fs - FAKE_MCOUNT(TF_EIP(%esp)) - call syscall - MEXITCOUNT - jmp doreti - -/* - * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) - * - * Even though the name says 'int0x80', this is actually a TGT (trap gate) - * rather then an IGT (interrupt gate). Thus interrupts are enabled on - * entry just as they are for a normal syscall. - */ - SUPERALIGN_TEXT -IDTVEC(int0x80_syscall) - pushl $2 /* sizeof "int 0x80" */ - pushl $0xBEEF - pushal - pushl %ds - pushl %es - pushl %fs - movl $KDSEL,%eax /* switch to kernel segments */ - movl %eax,%ds - movl %eax,%es - movl $KPSEL,%eax - movl %eax,%fs - FAKE_MCOUNT(TF_EIP(%esp)) - call syscall - MEXITCOUNT - jmp doreti - -ENTRY(fork_trampoline) - pushl %esp /* trapframe pointer */ - pushl %ebx /* arg1 */ - pushl %esi /* function */ - call fork_exit - addl $12,%esp - /* cut from syscall */ - - /* - * Return via doreti to handle ASTs. - */ - MEXITCOUNT - jmp doreti - - -/* -# A note on the "critical region" in our callback handler. -# We want to avoid stacking callback handlers due to events occurring -# during handling of the last event. To do this, we keep events disabled -# until weve done all processing. HOWEVER, we must enable events before -# popping the stack frame (cant be done atomically) and so it would still -# be possible to get enough handler activations to overflow the stack. -# Although unlikely, bugs of that kind are hard to track down, so wed -# like to avoid the possibility. -# So, on entry to the handler we detect whether we interrupted an -# existing activation in its critical region -- if so, we pop the current -# activation and restart the handler using the previous one. -*/ - - -/* - * void doreti(struct trapframe) - * - * Handle return from interrupts, traps and syscalls. - */ - .text - SUPERALIGN_TEXT - .globl doreti - .type doreti,@function -doreti: - FAKE_MCOUNT(bintr) /* init "from" bintr -> doreti */ -doreti_next: - testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */ - jz doreti_exit /* #can't handle ASTs now if not */ - -doreti_ast: - /* - * Check for ASTs atomically with returning. Disabling CPU - * interrupts provides sufficient locking even in the SMP case, - * since we will be informed of any new ASTs by an IPI. - */ - - movl HYPERVISOR_shared_info,%esi - XEN_BLOCK_EVENTS(%esi) - movl PCPU(CURTHREAD),%eax - testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax) - je doreti_exit - XEN_UNBLOCK_EVENTS(%esi) - pushl %esp /* pass a pointer to the trapframe */ - call ast - add $4,%esp - jmp doreti_ast - -doreti_exit: - /* - * doreti_exit: pop registers, iret. - * - * The segment register pop is a special case, since it may - * fault if (for example) a sigreturn specifies bad segment - * registers. The fault is handled in trap.c. - */ - - movl HYPERVISOR_shared_info,%esi - XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks (sti) - - .globl scrit -scrit: - XEN_TEST_PENDING(%esi) - jnz hypervisor_callback_pending /* More to go */ - MEXITCOUNT - - .globl doreti_popl_fs -doreti_popl_fs: - popl %fs - .globl doreti_popl_es -doreti_popl_es: - popl %es - .globl doreti_popl_ds -doreti_popl_ds: - popl %ds - POPA - addl $8,%esp - .globl doreti_iret -doreti_iret: - iret - .globl ecrit -ecrit: - - /* - * doreti_iret_fault and friends. Alternative return code for - * the case where we get a fault in the doreti_exit code - * above. trap() (i386/i386/trap.c) catches this specific - * case, sends the process a signal and continues in the - * corresponding place in the code below. - */ - ALIGN_TEXT - .globl doreti_iret_fault -doreti_iret_fault: - subl $8,%esp - pushal - pushl %ds - .globl doreti_popl_ds_fault -doreti_popl_ds_fault: - pushl %es - .globl doreti_popl_es_fault -doreti_popl_es_fault: - pushl %fs - .globl doreti_popl_fs_fault -doreti_popl_fs_fault: - movl $0,TF_ERR(%esp) /* XXX should be the error code */ - movl $T_PROTFLT,TF_TRAPNO(%esp) - jmp alltraps_with_regs_pushed - - - - -/* -# [How we do the fixup]. We want to merge the current stack frame with the -# just-interrupted frame. How we do this depends on where in the critical -# region the interrupted handler was executing, and so how many saved -# registers are in each frame. We do this quickly using the lookup table -# 'critical_fixup_table'. For each byte offset in the critical region, it -# provides the number of bytes which have already been popped from the -# interrupted stack frame. -*/ - -.globl critical_region_fixup -critical_region_fixup: - addl $critical_fixup_table-scrit,%eax - movzbl (%eax),%eax # %eax contains num bytes popped - movl %esp,%esi - add %eax,%esi # %esi points at end of src region - movl %esp,%edi - add $0x40,%edi # %edi points at end of dst region - movl %eax,%ecx - shr $2,%ecx # convert bytes to words - je 16f # skip loop if nothing to copy -15: subl $4,%esi # pre-decrementing copy loop - subl $4,%edi - movl (%esi),%eax - movl %eax,(%edi) - loop 15b -16: movl %edi,%esp # final %edi is top of merged stack - jmp hypervisor_callback_pending - - -critical_fixup_table: -.byte 0x0,0x0,0x0 #testb $0x1,(%esi) -.byte 0x0,0x0,0x0,0x0,0x0,0x0 #jne ea -.byte 0x0,0x0 #pop %fs -.byte 0x04 #pop %es -.byte 0x08 #pop %ds -.byte 0x0c #pop %edi -.byte 0x10 #pop %esi -.byte 0x14 #pop %ebp -.byte 0x18 #pop %ebx -.byte 0x1c #pop %ebx -.byte 0x20 #pop %edx -.byte 0x24 #pop %ecx -.byte 0x28 #pop %eax -.byte 0x2c,0x2c,0x2c #add $0x8,%esp -.byte 0x34 #iret - - -/* # Hypervisor uses this for application faults while it executes.*/ -ENTRY(failsafe_callback) - pushal - call xen_failsafe_handler -/*# call install_safe_pf_handler */ - movl 28(%esp),%ebx -1: movl %ebx,%ds - movl 32(%esp),%ebx -2: movl %ebx,%es - movl 36(%esp),%ebx -3: movl %ebx,%fs - movl 40(%esp),%ebx -4: movl %ebx,%gs -/*# call install_normal_pf_handler */ - popal - addl $12,%esp - iret - - diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,233 +0,0 @@ -/*- - * Copyright (c) 1982, 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/genassym.c,v 1.146 2003/11/12 18:14:34 jhb Exp $"); - -#include "opt_apic.h" -#include "opt_compat.h" -#include "opt_kstack_pages.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/assym.h> -#include <sys/bio.h> -#include <sys/buf.h> -#include <sys/proc.h> -#include <sys/errno.h> -#include <sys/mount.h> -#include <sys/mutex.h> -#include <sys/socket.h> -#include <sys/resourcevar.h> -#include <sys/ucontext.h> -#include <sys/user.h> -#include <machine/bootinfo.h> -#include <machine/tss.h> -#include <sys/vmmeter.h> -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/pmap.h> -#include <vm/vm_map.h> -#include <sys/user.h> -#include <sys/proc.h> -#include <net/if.h> -#include <netinet/in.h> -#include <nfs/nfsproto.h> -#include <nfs/rpcv2.h> -#include <nfsclient/nfs.h> -#include <nfsclient/nfsdiskless.h> -#ifdef DEV_APIC -#include <machine/apicreg.h> -#endif -#include <machine/cpu.h> -#include <machine/sigframe.h> -#include <machine/proc.h> - -ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); -ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); -ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); -ASSYM(P_SFLAG, offsetof(struct proc, p_sflag)); -ASSYM(P_UAREA, offsetof(struct proc, p_uarea)); - -ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); -ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); -ASSYM(TD_PROC, offsetof(struct thread, td_proc)); -ASSYM(TD_MD, offsetof(struct thread, td_md)); - -ASSYM(P_MD, offsetof(struct proc, p_md)); -ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); - -ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); -ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); - -ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); -ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); -ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); -/* ASSYM(UPAGES, UPAGES);*/ -ASSYM(UAREA_PAGES, UAREA_PAGES); -ASSYM(KSTACK_PAGES, KSTACK_PAGES); -ASSYM(PAGE_SIZE, PAGE_SIZE); -ASSYM(NPTEPG, NPTEPG); -ASSYM(NPDEPG, NPDEPG); -ASSYM(NPDEPTD, NPDEPTD); -ASSYM(NPGPTD, NPGPTD); -ASSYM(PDESIZE, sizeof(pd_entry_t)); -ASSYM(PTESIZE, sizeof(pt_entry_t)); -ASSYM(PDESHIFT, PDESHIFT); -ASSYM(PTESHIFT, PTESHIFT); -ASSYM(PAGE_SHIFT, PAGE_SHIFT); -ASSYM(PAGE_MASK, PAGE_MASK); -ASSYM(PDRSHIFT, PDRSHIFT); -ASSYM(PDRMASK, PDRMASK); -ASSYM(USRSTACK, USRSTACK); -ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); -ASSYM(KERNBASE, KERNBASE); -ASSYM(KERNLOAD, KERNLOAD); -ASSYM(MCLBYTES, MCLBYTES); -ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3)); -ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi)); -ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi)); -ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp)); -ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp)); -ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx)); -ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip)); -ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0)); - -ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs)); -ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0)); -ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1)); -ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); -ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); -ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); -ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); -ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl)); -ASSYM(PCB_DBREGS, PCB_DBREGS); -ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); - -ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); -ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); -ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); -ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu)); -ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); -ASSYM(PCB_SWITCHOUT, offsetof(struct pcb, pcb_switchout)); - -ASSYM(PCB_SIZE, sizeof(struct pcb)); - -ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); -ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); -ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); -ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags)); -ASSYM(TF_EIP, offsetof(struct trapframe, tf_eip)); -ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler)); -#ifdef COMPAT_43 -ASSYM(SIGF_SC, offsetof(struct osigframe, sf_siginfo.si_sc)); -#endif -ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); -#ifdef COMPAT_FREEBSD4 -ASSYM(SIGF_UC4, offsetof(struct sigframe4, sf_uc)); -#endif -#ifdef COMPAT_43 -ASSYM(SC_PS, offsetof(struct osigcontext, sc_ps)); -ASSYM(SC_FS, offsetof(struct osigcontext, sc_fs)); -ASSYM(SC_GS, offsetof(struct osigcontext, sc_gs)); -ASSYM(SC_TRAPNO, offsetof(struct osigcontext, sc_trapno)); -#endif -#ifdef COMPAT_FREEBSD4 -ASSYM(UC4_EFLAGS, offsetof(struct ucontext4, uc_mcontext.mc_eflags)); -ASSYM(UC4_GS, offsetof(struct ucontext4, uc_mcontext.mc_gs)); -#endif -ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags)); -ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs)); -ASSYM(ENOENT, ENOENT); -ASSYM(EFAULT, EFAULT); -ASSYM(ENAMETOOLONG, ENAMETOOLONG); -ASSYM(MAXCOMLEN, MAXCOMLEN); -ASSYM(MAXPATHLEN, MAXPATHLEN); -ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo)); -ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version)); -ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname)); -ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless)); -ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon)); -ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless)); -ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size)); -ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab)); -ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab)); -ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); -ASSYM(PC_SIZEOF, sizeof(struct pcpu)); -ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace)); -ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); -ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread)); -ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread)); -ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); -ASSYM(PC_COMMON_TSS, offsetof(struct pcpu, pc_common_tss)); -ASSYM(PC_COMMON_TSSD, offsetof(struct pcpu, pc_common_tssd)); -ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt)); -ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt)); -ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); -ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap)); -ASSYM(PC_CR2, offsetof(struct pcpu, pc_cr2)); -ASSYM(PC_CR3, offsetof(struct pcpu, pc_pdir)); - -#ifdef DEV_APIC -ASSYM(LA_VER, offsetof(struct LAPIC, version)); -ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); -ASSYM(LA_EOI, offsetof(struct LAPIC, eoi)); -ASSYM(LA_SVR, offsetof(struct LAPIC, svr)); -ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo)); -ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi)); -ASSYM(LA_ISR, offsetof(struct LAPIC, isr0)); -#endif - -ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); -ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); -ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL)); - -ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL)); -ASSYM(GPROC0_SEL, GPROC0_SEL); - -ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock)); -ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse)); - -#ifdef PC98 -#include <machine/bus.h> - -ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base)); -ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat)); -#endif - -ASSYM(HYPERVISOR_STACK_SWITCH, __HYPERVISOR_stack_switch); diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/gnttab.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/gnttab.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,367 +0,0 @@ -/****************************************************************************** - * gnttab.c - * - * Two sets of functionality: - * 1. Granting foreign access to our memory reservation. - * 2. Accessing others' memory reservations via grant references. - * (i.e., mechanisms for both sender and recipient of grant references) - * - * Copyright (c) 2005, Christopher Clark - * Copyright (c) 2004, K A Fraser - */ - -#include "opt_pmap.h" -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bus.h> -#include <sys/conf.h> -#include <sys/module.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/mman.h> -#include <vm/vm.h> -#include <vm/vm_extern.h> -#include <vm/pmap.h> -#include <vm/vm_kern.h> - -#include <machine/gnttab.h> -#include <machine/pmap.h> - -#include <machine/hypervisor-ifs.h> - -#define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c)) - - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) -{ - __asm__ __volatile__ ( "rep;nop" : : : "memory" ); -} -#define cpu_relax() rep_nop() - -#if 1 -#define ASSERT(_p) \ - if ( !(_p) ) { printk("Assertion '%s': line %d, file %s\n", \ - #_p , __LINE__, __FILE__); *(int*)0=0; } -#else -#define ASSERT(_p) ((void)0) -#endif - -#define WPRINTK(fmt, args...) \ - printk("xen_grant: " fmt, ##args) - -static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES]; -static grant_ref_t gnttab_free_head; - -static grant_entry_t *shared; -#if 0 -/* /proc/xen/grant */ -static struct proc_dir_entry *grant_pde; -#endif - -/* - * Lock-free grant-entry allocator - */ - -static inline int -get_free_entry(void) -{ - grant_ref_t fh, nfh = gnttab_free_head; - do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; } - while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, - gnttab_free_list[fh])) != fh) ); - return fh; -} - -static inline void -put_free_entry(grant_ref_t ref) -{ - grant_ref_t fh, nfh = gnttab_free_head; - do { gnttab_free_list[ref] = fh = nfh; wmb(); } - while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) ); -} - -/* - * Public grant-issuing interface functions - */ - -int -gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly) -{ - int ref; - - if ( unlikely((ref = get_free_entry()) == -1) ) - return -ENOSPC; - - shared[ref].frame = frame; - shared[ref].domid = domid; - wmb(); - shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); - - return ref; -} - -void -gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, - unsigned long frame, int readonly) -{ - shared[ref].frame = frame; - shared[ref].domid = domid; - wmb(); - shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); -} - - -int -gnttab_query_foreign_access(grant_ref_t ref) -{ - uint16_t nflags; - - nflags = shared[ref].flags; - - return (nflags & (GTF_reading|GTF_writing)); -} - -void -gnttab_end_foreign_access(grant_ref_t ref, int readonly) -{ - uint16_t flags, nflags; - - nflags = shared[ref].flags; - do { - if ( (flags = nflags) & (GTF_reading|GTF_writing) ) - printk("WARNING: g.e. still in use!\n"); - } - while ( (nflags = cmpxchg(&shared[ref].flags, flags, 0)) != flags ); - - put_free_entry(ref); -} - -int -gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) -{ - int ref; - - if ( unlikely((ref = get_free_entry()) == -1) ) - return -ENOSPC; - - shared[ref].frame = pfn; - shared[ref].domid = domid; - wmb(); - shared[ref].flags = GTF_accept_transfer; - - return ref; -} - -void -gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, - unsigned long pfn) -{ - shared[ref].frame = pfn; - shared[ref].domid = domid; - wmb(); - shared[ref].flags = GTF_accept_transfer; -} - -unsigned long -gnttab_end_foreign_transfer(grant_ref_t ref) -{ - unsigned long frame = 0; - uint16_t flags; - - flags = shared[ref].flags; - ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed)); - - /* - * If a transfer is committed then wait for the frame address to appear. - * Otherwise invalidate the grant entry against future use. - */ - if ( likely(flags != GTF_accept_transfer) || - (cmpxchg(&shared[ref].flags, flags, 0) != GTF_accept_transfer) ) - while ( unlikely((frame = shared[ref].frame) == 0) ) - cpu_relax(); - - put_free_entry(ref); - - return frame; -} - -void -gnttab_free_grant_references(uint16_t count, grant_ref_t head) -{ - /* TODO: O(N)...? */ - grant_ref_t to_die = 0, next = head; - int i; - - for ( i = 0; i < count; i++ ) - to_die = next; - next = gnttab_free_list[next]; - put_free_entry( to_die ); -} - -int -gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head, - grant_ref_t *terminal) -{ - int i; - grant_ref_t h = gnttab_free_head; - - for ( i = 0; i < count; i++ ) - if ( unlikely(get_free_entry() == -1) ) - goto not_enough_refs; - - *head = h; - *terminal = gnttab_free_head; - - return 0; - -not_enough_refs: - gnttab_free_head = h; - return -ENOSPC; -} - -int -gnttab_claim_grant_reference(grant_ref_t *private_head, grant_ref_t terminal ) -{ - grant_ref_t g; - if ( unlikely((g = *private_head) == terminal) ) - return -ENOSPC; - *private_head = gnttab_free_list[g]; - return g; -} - -void -gnttab_release_grant_reference( grant_ref_t *private_head, - grant_ref_t release ) -{ - gnttab_free_list[release] = *private_head; - *private_head = release; -} -#ifdef notyet -static int -grant_ioctl(struct cdev *dev, u_long cmd, caddr_t data, - int flag, struct thread *td) -{ - - int ret; - privcmd_hypercall_t hypercall; - - /* XXX Need safety checks here if using for anything other - * than debugging */ - return -ENOSYS; - - if ( cmd != IOCTL_PRIVCMD_HYPERCALL ) - return -ENOSYS; - - if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) ) - return -EFAULT; - - if ( hypercall.op != __HYPERVISOR_grant_table_op ) - return -ENOSYS; - - /* hypercall-invoking asm taken from privcmd.c */ - __asm__ __volatile__ ( - "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; " - "movl 4(%%eax),%%ebx ;" - "movl 8(%%eax),%%ecx ;" - "movl 12(%%eax),%%edx ;" - "movl 16(%%eax),%%esi ;" - "movl 20(%%eax),%%edi ;" - "movl (%%eax),%%eax ;" - TRAP_INSTR "; " - "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx" - : "=a" (ret) : "0" (&hypercall) : "memory" ); - - return ret; - -} - -static struct cdevsw gnttab_cdevsw = { - d_ioctl: grant_ioctl, -}; - -static int -grant_read(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - unsigned int i; - grant_entry_t *gt; - - gt = (grant_entry_t *)shared; - len = 0; - - for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) - /* TODO: safety catch here until this can handle >PAGE_SIZE output */ - if (len > (PAGE_SIZE - 200)) - { - len += sprintf( page + len, "Truncated.\n"); - break; - } - - if ( gt[i].flags ) - len += sprintf( page + len, - "Grant: ref (0x%x) flags (0x%hx) dom (0x%hx) frame (0x%x)\n", - i, - gt[i].flags, - gt[i].domid, - gt[i].frame ); - - *eof = 1; - return len; -} - -static int -grant_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - /* TODO: implement this */ - return -ENOSYS; -} -#endif -static int -gnttab_init(void *unused) -{ - gnttab_setup_table_t setup; - unsigned long frames[NR_GRANT_FRAMES]; - int i; - - setup.dom = DOMID_SELF; - setup.nr_frames = NR_GRANT_FRAMES; - setup.frame_list = frames; - - if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0) - panic("grant table setup failed\n"); - if (setup.status != 0) - panic("non-zero status in grant table setup\n"); - shared = (grant_entry_t *)kmem_alloc_nofault(kernel_map, NR_GRANT_FRAMES); - - for (i = 0; i < NR_GRANT_FRAMES; i++) - pmap_kenter_ma((vm_offset_t)(shared + (i*PAGE_SIZE)), frames[i] << PAGE_SHIFT); - - for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) - gnttab_free_list[i] = i + 1; -#if 0 - /* - * /proc/xen/grant : used by libxc to access grant tables - */ - if ( (grant_pde = create_xen_proc_entry("grant", 0600)) == NULL ) - { - WPRINTK("Unable to create grant xen proc entry\n"); - return -1; - } - - grant_file_ops.read = grant_pde->proc_fops->read; - grant_file_ops.write = grant_pde->proc_fops->write; - - grant_pde->proc_fops = &grant_file_ops; - - grant_pde->read_proc = &grant_read; - grant_pde->write_proc = &grant_write; -#endif - printk("Grant table initialized\n"); - return 0; -} - -SYSINIT(gnttab, SI_SUB_PSEUDO, SI_ORDER_FIRST, gnttab_init, NULL); diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,107 +0,0 @@ -/****************************************************************************** - * hypervisor.c - * - * Communication to/from hypervisor. - * - * Copyright (c) 2002-2003, K A Fraser - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIEAS OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include <machine/xen-os.h> -#include <machine/hypervisor.h> -#include <machine/xenvar.h> -#include <machine/multicall.h> - -/* XXX need to verify what the caller save registers are on x86 KMM */ -#define CALLER_SAVE __asm__("pushal; ") -#define CALLER_RESTORE __asm__("popal;") - - -/* ni == non-inline - these are only intended for use from assembler - * no reason to have them in a header - - * - */ -void ni_queue_multicall0(unsigned long op); -void ni_queue_multicall1(unsigned long op, unsigned long arg1); -void ni_queue_multicall2(unsigned long op, unsigned long arg1, - unsigned long arg2); -void ni_queue_multicall3(unsigned long op, unsigned long arg1, - unsigned long arg2, unsigned long arg3); -void ni_queue_multicall4(unsigned long op, unsigned long arg1, - unsigned long arg2, unsigned long arg4, - unsigned long arg5); - -void ni_execute_multicall_list(void); - -multicall_entry_t multicall_list[MAX_MULTICALL_ENTS]; -int nr_multicall_ents = 0; - - -void -ni_queue_multicall0(unsigned long op) -{ - CALLER_SAVE; - queue_multicall0(op); - CALLER_RESTORE; -} - -void -ni_queue_multicall1(unsigned long op, unsigned long arg1) -{ - CALLER_SAVE; - queue_multicall1(op, arg1); - CALLER_RESTORE; -} - -void -ni_queue_multicall2(unsigned long op, unsigned long arg1, - unsigned long arg2) -{ - CALLER_SAVE; - queue_multicall2(op, arg1, arg2); - CALLER_RESTORE; -} - -void -ni_queue_multicall3(unsigned long op, unsigned long arg1, - unsigned long arg2, unsigned long arg3) -{ - CALLER_SAVE; - queue_multicall3(op, arg1, arg2, arg3); - CALLER_RESTORE; -} - -void -ni_queue_multicall4(unsigned long op, unsigned long arg1, - unsigned long arg2, unsigned long arg3, - unsigned long arg4) -{ - CALLER_SAVE; - queue_multicall4(op, arg1, arg2, arg3, arg4); - CALLER_RESTORE; -} - -void -ni_execute_multicall_list(void) -{ - CALLER_SAVE; - execute_multicall_list(); - CALLER_RESTORE; -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,626 +0,0 @@ -/*- - * Copyright (c) 1999 Michael Smith <msmith@xxxxxxxxxxx> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/i686_mem.c,v 1.23 2003/10/21 18:28:34 silby Exp $"); - -#include <sys/param.h> -#include <sys/kernel.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/memrange.h> -#include <sys/smp.h> -#include <sys/sysctl.h> - -#include <machine/md_var.h> -#include <machine/specialreg.h> - -/* - * i686 memory range operations - * - * This code will probably be impenetrable without reference to the - * Intel Pentium Pro documentation. - */ - -static char *mem_owner_bios = "BIOS"; - -#define MR686_FIXMTRR (1<<0) - -#define mrwithin(mr, a) \ - (((a) >= (mr)->mr_base) && ((a) < ((mr)->mr_base + (mr)->mr_len))) -#define mroverlap(mra, mrb) \ - (mrwithin(mra, mrb->mr_base) || mrwithin(mrb, mra->mr_base)) - -#define mrvalid(base, len) \ - ((!(base & ((1 << 12) - 1))) && /* base is multiple of 4k */ \ - ((len) >= (1 << 12)) && /* length is >= 4k */ \ - powerof2((len)) && /* ... and power of two */ \ - !((base) & ((len) - 1))) /* range is not discontiuous */ - -#define mrcopyflags(curr, new) (((curr) & ~MDF_ATTRMASK) | ((new) & MDF_ATTRMASK)) - -static int mtrrs_disabled; -TUNABLE_INT("machdep.disable_mtrrs", &mtrrs_disabled); -SYSCTL_INT(_machdep, OID_AUTO, disable_mtrrs, CTLFLAG_RDTUN, - &mtrrs_disabled, 0, "Disable i686 MTRRs."); - -static void i686_mrinit(struct mem_range_softc *sc); -static int i686_mrset(struct mem_range_softc *sc, - struct mem_range_desc *mrd, - int *arg); -static void i686_mrAPinit(struct mem_range_softc *sc); - -static struct mem_range_ops i686_mrops = { - i686_mrinit, - i686_mrset, - i686_mrAPinit -}; - -/* XXX for AP startup hook */ -static u_int64_t mtrrcap, mtrrdef; - -static struct mem_range_desc *mem_range_match(struct mem_range_softc *sc, - struct mem_range_desc *mrd); -static void i686_mrfetch(struct mem_range_softc *sc); -static int i686_mtrrtype(int flags); -#if 0 -static int i686_mrt2mtrr(int flags, int oldval); -#endif -static int i686_mtrrconflict(int flag1, int flag2); -static void i686_mrstore(struct mem_range_softc *sc); -static void i686_mrstoreone(void *arg); -static struct mem_range_desc *i686_mtrrfixsearch(struct mem_range_softc *sc, - u_int64_t addr); -static int i686_mrsetlow(struct mem_range_softc *sc, - struct mem_range_desc *mrd, - int *arg); -static int i686_mrsetvariable(struct mem_range_softc *sc, - struct mem_range_desc *mrd, - int *arg); - -/* i686 MTRR type to memory range type conversion */ -static int i686_mtrrtomrt[] = { - MDF_UNCACHEABLE, - MDF_WRITECOMBINE, - MDF_UNKNOWN, - MDF_UNKNOWN, - MDF_WRITETHROUGH, - MDF_WRITEPROTECT, - MDF_WRITEBACK -}; - -#define MTRRTOMRTLEN (sizeof(i686_mtrrtomrt) / sizeof(i686_mtrrtomrt[0])) - -static int -i686_mtrr2mrt(int val) { - if (val < 0 || val >= MTRRTOMRTLEN) - return MDF_UNKNOWN; - return i686_mtrrtomrt[val]; -} - -/* - * i686 MTRR conflicts. Writeback and uncachable may overlap. - */ -static int -i686_mtrrconflict(int flag1, int flag2) { - flag1 &= MDF_ATTRMASK; - flag2 &= MDF_ATTRMASK; - if (flag1 == flag2 || - (flag1 == MDF_WRITEBACK && flag2 == MDF_UNCACHEABLE) || - (flag2 == MDF_WRITEBACK && flag1 == MDF_UNCACHEABLE)) - return 0; - return 1; -} - -/* - * Look for an exactly-matching range. - */ -static struct mem_range_desc * -mem_range_match(struct mem_range_softc *sc, struct mem_range_desc *mrd) -{ - struct mem_range_desc *cand; - int i; - - for (i = 0, cand = sc->mr_desc; i < sc->mr_ndesc; i++, cand++) - if ((cand->mr_base == mrd->mr_base) && - (cand->mr_len == mrd->mr_len)) - return(cand); - return(NULL); -} - -/* - * Fetch the current mtrr settings from the current CPU (assumed to all - * be in sync in the SMP case). Note that if we are here, we assume - * that MTRRs are enabled, and we may or may not have fixed MTRRs. - */ -static void -i686_mrfetch(struct mem_range_softc *sc) -{ - struct mem_range_desc *mrd; - u_int64_t msrv; - int i, j, msr; - - mrd = sc->mr_desc; - - /* Get fixed-range MTRRs */ - if (sc->mr_cap & MR686_FIXMTRR) { - msr = MSR_MTRR64kBase; - for (i = 0; i < (MTRR_N64K / 8); i++, msr++) { - msrv = rdmsr(msr); - for (j = 0; j < 8; j++, mrd++) { - mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) | - i686_mtrr2mrt(msrv & 0xff) | - MDF_ACTIVE; - if (mrd->mr_owner[0] == 0) - strcpy(mrd->mr_owner, mem_owner_bios); - msrv = msrv >> 8; - } - } - msr = MSR_MTRR16kBase; - for (i = 0; i < (MTRR_N16K / 8); i++, msr++) { - msrv = rdmsr(msr); - for (j = 0; j < 8; j++, mrd++) { - mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) | - i686_mtrr2mrt(msrv & 0xff) | - MDF_ACTIVE; - if (mrd->mr_owner[0] == 0) - strcpy(mrd->mr_owner, mem_owner_bios); - msrv = msrv >> 8; - } - } - msr = MSR_MTRR4kBase; - for (i = 0; i < (MTRR_N4K / 8); i++, msr++) { - msrv = rdmsr(msr); - for (j = 0; j < 8; j++, mrd++) { - mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) | - i686_mtrr2mrt(msrv & 0xff) | - MDF_ACTIVE; - if (mrd->mr_owner[0] == 0) - strcpy(mrd->mr_owner, mem_owner_bios); - msrv = msrv >> 8; - } - } - } - - /* Get remainder which must be variable MTRRs */ - msr = MSR_MTRRVarBase; - for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) { - msrv = rdmsr(msr); - mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) | - i686_mtrr2mrt(msrv & 0xff); - mrd->mr_base = msrv & 0x0000000ffffff000LL; - msrv = rdmsr(msr + 1); - mrd->mr_flags = (msrv & 0x800) ? - (mrd->mr_flags | MDF_ACTIVE) : - (mrd->mr_flags & ~MDF_ACTIVE); - /* Compute the range from the mask. Ick. */ - mrd->mr_len = (~(msrv & 0x0000000ffffff000LL) & 0x0000000fffffffffLL) + 1; - if (!mrvalid(mrd->mr_base, mrd->mr_len)) - mrd->mr_flags |= MDF_BOGUS; - /* If unclaimed and active, must be the BIOS */ - if ((mrd->mr_flags & MDF_ACTIVE) && (mrd->mr_owner[0] == 0)) - strcpy(mrd->mr_owner, mem_owner_bios); - } -} - -/* - * Return the MTRR memory type matching a region's flags - */ -static int -i686_mtrrtype(int flags) -{ - int i; - - flags &= MDF_ATTRMASK; - - for (i = 0; i < MTRRTOMRTLEN; i++) { - if (i686_mtrrtomrt[i] == MDF_UNKNOWN) - continue; - if (flags == i686_mtrrtomrt[i]) - return(i); - } - return(-1); -} -#if 0 -static int -i686_mrt2mtrr(int flags, int oldval) -{ - int val; - - if ((val = i686_mtrrtype(flags)) == -1) - return oldval & 0xff; - return val & 0xff; -} -#endif -/* - * Update running CPU(s) MTRRs to match the ranges in the descriptor - * list. - * - * XXX Must be called with interrupts enabled. - */ -static void -i686_mrstore(struct mem_range_softc *sc) -{ -#ifdef SMP - /* - * We should use ipi_all_but_self() to call other CPUs into a - * locking gate, then call a target function to do this work. - * The "proper" solution involves a generalised locking gate - * implementation, not ready yet. - */ - smp_rendezvous(NULL, i686_mrstoreone, NULL, (void *)sc); -#else - disable_intr(); /* disable interrupts */ - i686_mrstoreone((void *)sc); - enable_intr(); -#endif -} - -/* - * Update the current CPU's MTRRs with those represented in the - * descriptor list. Note that we do this wholesale rather than - * just stuffing one entry; this is simpler (but slower, of course). - */ -static void -i686_mrstoreone(void *arg) -{ -#if 0 - struct mem_range_softc *sc = (struct mem_range_softc *)arg; - struct mem_range_desc *mrd; - u_int64_t omsrv, msrv; - int i, j, msr; - u_int cr4save; - - mrd = sc->mr_desc; - - cr4save = rcr4(); /* save cr4 */ - if (cr4save & CR4_PGE) - load_cr4(cr4save & ~CR4_PGE); - load_cr0((rcr0() & ~CR0_NW) | CR0_CD); /* disable caches (CD = 1, NW = 0) */ - wbinvd(); /* flush caches, TLBs */ - wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) & ~0x800); /* disable MTRRs (E = 0) */ - - /* Set fixed-range MTRRs */ - if (sc->mr_cap & MR686_FIXMTRR) { - msr = MSR_MTRR64kBase; - for (i = 0; i < (MTRR_N64K / 8); i++, msr++) { - msrv = 0; - omsrv = rdmsr(msr); - for (j = 7; j >= 0; j--) { - msrv = msrv << 8; - msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8)); - } - wrmsr(msr, msrv); - mrd += 8; - } - msr = MSR_MTRR16kBase; - for (i = 0; i < (MTRR_N16K / 8); i++, msr++) { - msrv = 0; - omsrv = rdmsr(msr); - for (j = 7; j >= 0; j--) { - msrv = msrv << 8; - msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8)); - } - wrmsr(msr, msrv); - mrd += 8; - } - msr = MSR_MTRR4kBase; - for (i = 0; i < (MTRR_N4K / 8); i++, msr++) { - msrv = 0; - omsrv = rdmsr(msr); - for (j = 7; j >= 0; j--) { - msrv = msrv << 8; - msrv |= i686_mrt2mtrr((mrd + j)->mr_flags, omsrv >> (j*8)); - } - wrmsr(msr, msrv); - mrd += 8; - } - } - - /* Set remainder which must be variable MTRRs */ - msr = MSR_MTRRVarBase; - for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) { - /* base/type register */ - omsrv = rdmsr(msr); - if (mrd->mr_flags & MDF_ACTIVE) { - msrv = mrd->mr_base & 0x0000000ffffff000LL; - msrv |= i686_mrt2mtrr(mrd->mr_flags, omsrv); - } else { - msrv = 0; - } - wrmsr(msr, msrv); - - /* mask/active register */ - if (mrd->mr_flags & MDF_ACTIVE) { - msrv = 0x800 | (~(mrd->mr_len - 1) & 0x0000000ffffff000LL); - } else { - msrv = 0; - } - wrmsr(msr + 1, msrv); - } - wbinvd(); /* flush caches, TLBs */ - wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) | 0x800); /* restore MTRR state */ - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* enable caches CD = 0 and NW = 0 */ - load_cr4(cr4save); /* restore cr4 */ -#endif -} - -/* - * Hunt for the fixed MTRR referencing (addr) - */ -static struct mem_range_desc * -i686_mtrrfixsearch(struct mem_range_softc *sc, u_int64_t addr) -{ - struct mem_range_desc *mrd; - int i; - - for (i = 0, mrd = sc->mr_desc; i < (MTRR_N64K + MTRR_N16K + MTRR_N4K); i++, mrd++) - if ((addr >= mrd->mr_base) && (addr < (mrd->mr_base + mrd->mr_len))) - return(mrd); - return(NULL); -} - -/* - * Try to satisfy the given range request by manipulating the fixed MTRRs that - * cover low memory. - * - * Note that we try to be generous here; we'll bloat the range out to the - * next higher/lower boundary to avoid the consumer having to know too much - * about the mechanisms here. - * - * XXX note that this will have to be updated when we start supporting "busy" ranges. - */ -static int -i686_mrsetlow(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg) -{ - struct mem_range_desc *first_md, *last_md, *curr_md; - - /* range check */ - if (((first_md = i686_mtrrfixsearch(sc, mrd->mr_base)) == NULL) || - ((last_md = i686_mtrrfixsearch(sc, mrd->mr_base + mrd->mr_len - 1)) == NULL)) - return(EINVAL); - - /* check we aren't doing something risky */ - if (!(mrd->mr_flags & MDF_FORCE)) - for (curr_md = first_md; curr_md <= last_md; curr_md++) { - if ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN) - return (EACCES); - } - - /* set flags, clear set-by-firmware flag */ - for (curr_md = first_md; curr_md <= last_md; curr_md++) { - curr_md->mr_flags = mrcopyflags(curr_md->mr_flags & ~MDF_FIRMWARE, mrd->mr_flags); - bcopy(mrd->mr_owner, curr_md->mr_owner, sizeof(mrd->mr_owner)); - } - - return(0); -} - - -/* - * Modify/add a variable MTRR to satisfy the request. - * - * XXX needs to be updated to properly support "busy" ranges. - */ -static int -i686_mrsetvariable(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg) -{ - struct mem_range_desc *curr_md, *free_md; - int i; - - /* - * Scan the currently active variable descriptors, look for - * one we exactly match (straight takeover) and for possible - * accidental overlaps. - * Keep track of the first empty variable descriptor in case we - * can't perform a takeover. - */ - i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0; - curr_md = sc->mr_desc + i; - free_md = NULL; - for (; i < sc->mr_ndesc; i++, curr_md++) { - if (curr_md->mr_flags & MDF_ACTIVE) { - /* exact match? */ - if ((curr_md->mr_base == mrd->mr_base) && - (curr_md->mr_len == mrd->mr_len)) { - /* whoops, owned by someone */ - if (curr_md->mr_flags & MDF_BUSY) - return(EBUSY); - /* check we aren't doing something risky */ - if (!(mrd->mr_flags & MDF_FORCE) && - ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN)) - return (EACCES); - /* Ok, just hijack this entry */ - free_md = curr_md; - break; - } - /* non-exact overlap ? */ - if (mroverlap(curr_md, mrd)) { - /* between conflicting region types? */ - if (i686_mtrrconflict(curr_md->mr_flags, mrd->mr_flags)) - return(EINVAL); - } - } else if (free_md == NULL) { - free_md = curr_md; - } - } - /* got somewhere to put it? */ - if (free_md == NULL) - return(ENOSPC); - - /* Set up new descriptor */ - free_md->mr_base = mrd->mr_base; - free_md->mr_len = mrd->mr_len; - free_md->mr_flags = mrcopyflags(MDF_ACTIVE, mrd->mr_flags); - bcopy(mrd->mr_owner, free_md->mr_owner, sizeof(mrd->mr_owner)); - return(0); -} - -/* - * Handle requests to set memory range attributes by manipulating MTRRs. - * - */ -static int -i686_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg) -{ - struct mem_range_desc *targ; - int error = 0; - - switch(*arg) { - case MEMRANGE_SET_UPDATE: - /* make sure that what's being asked for is even possible at all */ - if (!mrvalid(mrd->mr_base, mrd->mr_len) || - i686_mtrrtype(mrd->mr_flags) == -1) - return(EINVAL); - -#define FIXTOP ((MTRR_N64K * 0x10000) + (MTRR_N16K * 0x4000) + (MTRR_N4K * 0x1000)) - - /* are the "low memory" conditions applicable? */ - if ((sc->mr_cap & MR686_FIXMTRR) && - ((mrd->mr_base + mrd->mr_len) <= FIXTOP)) { - if ((error = i686_mrsetlow(sc, mrd, arg)) != 0) - return(error); - } else { - /* it's time to play with variable MTRRs */ - if ((error = i686_mrsetvariable(sc, mrd, arg)) != 0) - return(error); - } - break; - - case MEMRANGE_SET_REMOVE: - if ((targ = mem_range_match(sc, mrd)) == NULL) - return(ENOENT); - if (targ->mr_flags & MDF_FIXACTIVE) - return(EPERM); - if (targ->mr_flags & MDF_BUSY) - return(EBUSY); - targ->mr_flags &= ~MDF_ACTIVE; - targ->mr_owner[0] = 0; - break; - - default: - return(EOPNOTSUPP); - } - - /* update the hardware */ - i686_mrstore(sc); - i686_mrfetch(sc); /* refetch to see where we're at */ - return(0); -} - -/* - * Work out how many ranges we support, initialise storage for them, - * fetch the initial settings. - */ -static void -i686_mrinit(struct mem_range_softc *sc) -{ - struct mem_range_desc *mrd; - int nmdesc = 0; - int i; - - /* XXX */ - return; - - mtrrcap = rdmsr(MSR_MTRRcap); - mtrrdef = rdmsr(MSR_MTRRdefType); - - /* For now, bail out if MTRRs are not enabled */ - if (!(mtrrdef & 0x800)) { - if (bootverbose) - printf("CPU supports MTRRs but not enabled\n"); - return; - } - nmdesc = mtrrcap & 0xff; - printf("Pentium Pro MTRR support enabled\n"); - - /* If fixed MTRRs supported and enabled */ - if ((mtrrcap & 0x100) && (mtrrdef & 0x400)) { - sc->mr_cap = MR686_FIXMTRR; - nmdesc += MTRR_N64K + MTRR_N16K + MTRR_N4K; - } - - sc->mr_desc = - (struct mem_range_desc *)malloc(nmdesc * sizeof(struct mem_range_desc), - M_MEMDESC, M_WAITOK | M_ZERO); - sc->mr_ndesc = nmdesc; - - mrd = sc->mr_desc; - - /* Populate the fixed MTRR entries' base/length */ - if (sc->mr_cap & MR686_FIXMTRR) { - for (i = 0; i < MTRR_N64K; i++, mrd++) { - mrd->mr_base = i * 0x10000; - mrd->mr_len = 0x10000; - mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE; - } - for (i = 0; i < MTRR_N16K; i++, mrd++) { - mrd->mr_base = i * 0x4000 + 0x80000; - mrd->mr_len = 0x4000; - mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE; - } - for (i = 0; i < MTRR_N4K; i++, mrd++) { - mrd->mr_base = i * 0x1000 + 0xc0000; - mrd->mr_len = 0x1000; - mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN | MDF_FIXACTIVE; - } - } - - /* - * Get current settings, anything set now is considered to have - * been set by the firmware. (XXX has something already played here?) - */ - i686_mrfetch(sc); - mrd = sc->mr_desc; - for (i = 0; i < sc->mr_ndesc; i++, mrd++) { - if (mrd->mr_flags & MDF_ACTIVE) - mrd->mr_flags |= MDF_FIRMWARE; - } -} - -/* - * Initialise MTRRs on an AP after the BSP has run the init code. - */ -static void -i686_mrAPinit(struct mem_range_softc *sc) -{ - i686_mrstoreone((void *)sc); /* set MTRRs to match BSP */ - wrmsr(MSR_MTRRdefType, mtrrdef); /* set MTRR behaviour to match BSP */ -} - -static void -i686_mem_drvinit(void *unused) -{ - /* Try for i686 MTRRs */ - if (!mtrrs_disabled && (cpu_feature & CPUID_MTRR) && - ((cpu_id & 0xf00) == 0x600 || (cpu_id & 0xf00) == 0xf00) && - ((strcmp(cpu_vendor, "GenuineIntel") == 0) || - (strcmp(cpu_vendor, "AuthenticAMD") == 0))) { - mem_range_softc.mr_op = &i686_mrops; - } -} - -SYSINIT(i686memdev,SI_SUB_DRIVERS,SI_ORDER_FIRST,i686_mem_drvinit,NULL) diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,889 +0,0 @@ -/*- - * Copyright (c) KATO Takenori, 1997, 1998. - * - * All rights reserved. Unpublished rights reserved under the copyright - * laws of Japan. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer as - * the first lines of this file unmodified. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/initcpu.c,v 1.49 2003/11/10 15:48:30 jhb Exp $"); - -#include "opt_cpu.h" - -#include <sys/param.h> -#include <sys/kernel.h> -#include <sys/systm.h> -#include <sys/sysctl.h> - -#include <machine/cputypes.h> -#include <machine/md_var.h> -#include <machine/specialreg.h> - -#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif -#if defined(CPU_DISABLE_SSE) -#undef CPU_ENABLE_SSE -#endif - -void initializecpu(void); -#if defined(I586_CPU) && defined(CPU_WT_ALLOC) -void enable_K5_wt_alloc(void); -void enable_K6_wt_alloc(void); -void enable_K6_2_wt_alloc(void); -#endif - -#ifdef I486_CPU -static void init_5x86(void); -static void init_bluelightning(void); -static void init_486dlc(void); -static void init_cy486dx(void); -#ifdef CPU_I486_ON_386 -static void init_i486_on_386(void); -#endif -static void init_6x86(void); -#endif /* I486_CPU */ - -#ifdef I686_CPU -static void init_6x86MX(void); -static void init_ppro(void); -static void init_mendocino(void); -#endif - -static int hw_instruction_sse; -SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, - &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); - -/* Must *NOT* be BSS or locore will bzero these after setting them */ -int cpu = 0; /* Are we 386, 386sx, 486, etc? */ -u_int cpu_feature = 0; /* Feature flags */ -u_int cpu_high = 0; /* Highest arg to CPUID */ -u_int cpu_id = 0; /* Stepping ID */ -u_int cpu_procinfo = 0; /* HyperThreading Info / Brand Index / CLFUSH */ -char cpu_vendor[20] = ""; /* CPU Origin code */ - -#ifdef CPU_ENABLE_SSE -u_int cpu_fxsr; /* SSE enabled */ -#endif - -#ifdef I486_CPU -/* - * IBM Blue Lightning - */ -static void -init_bluelightning(void) -{ -#if 0 - u_long eflags; - -#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) - need_post_dma_flush = 1; -#endif - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - invd(); - -#ifdef CPU_BLUELIGHTNING_FPU_OP_CACHE - wrmsr(0x1000, 0x9c92LL); /* FP operand can be cacheable on Cyrix FPU */ -#else - wrmsr(0x1000, 0x1c92LL); /* Intel FPU */ -#endif - /* Enables 13MB and 0-640KB cache. */ - wrmsr(0x1001, (0xd0LL << 32) | 0x3ff); -#ifdef CPU_BLUELIGHTNING_3X - wrmsr(0x1002, 0x04000000LL); /* Enables triple-clock mode. */ -#else - wrmsr(0x1002, 0x03000000LL); /* Enables double-clock mode. */ -#endif - - /* Enable caching in CR0. */ - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ - invd(); - write_eflags(eflags); -#endif -} - -/* - * Cyrix 486SLC/DLC/SR/DR series - */ -static void -init_486dlc(void) -{ - u_long eflags; - u_char ccr0; - - eflags = read_eflags(); - disable_intr(); - invd(); - - ccr0 = read_cyrix_reg(CCR0); -#ifndef CYRIX_CACHE_WORKS - ccr0 |= CCR0_NC1 | CCR0_BARB; - write_cyrix_reg(CCR0, ccr0); - invd(); -#else - ccr0 &= ~CCR0_NC0; -#ifndef CYRIX_CACHE_REALLY_WORKS - ccr0 |= CCR0_NC1 | CCR0_BARB; -#else - ccr0 |= CCR0_NC1; -#endif -#ifdef CPU_DIRECT_MAPPED_CACHE - ccr0 |= CCR0_CO; /* Direct mapped mode. */ -#endif - write_cyrix_reg(CCR0, ccr0); - - /* Clear non-cacheable region. */ - write_cyrix_reg(NCR1+2, NCR_SIZE_0K); - write_cyrix_reg(NCR2+2, NCR_SIZE_0K); - write_cyrix_reg(NCR3+2, NCR_SIZE_0K); - write_cyrix_reg(NCR4+2, NCR_SIZE_0K); - - write_cyrix_reg(0, 0); /* dummy write */ - - /* Enable caching in CR0. */ - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ - invd(); -#endif /* !CYRIX_CACHE_WORKS */ - write_eflags(eflags); -} - - -/* - * Cyrix 486S/DX series - */ -static void -init_cy486dx(void) -{ - u_long eflags; - u_char ccr2; - - eflags = read_eflags(); - disable_intr(); - invd(); - - ccr2 = read_cyrix_reg(CCR2); -#ifdef CPU_SUSP_HLT - ccr2 |= CCR2_SUSP_HLT; -#endif - -#ifdef PC98 - /* Enables WB cache interface pin and Lock NW bit in CR0. */ - ccr2 |= CCR2_WB | CCR2_LOCK_NW; - /* Unlock NW bit in CR0. */ - write_cyrix_reg(CCR2, ccr2 & ~CCR2_LOCK_NW); - load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */ -#endif - - write_cyrix_reg(CCR2, ccr2); - write_eflags(eflags); -} - - -/* - * Cyrix 5x86 - */ -static void -init_5x86(void) -{ - u_long eflags; - u_char ccr2, ccr3, ccr4, pcr0; - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - wbinvd(); - - (void)read_cyrix_reg(CCR3); /* dummy */ - - /* Initialize CCR2. */ - ccr2 = read_cyrix_reg(CCR2); - ccr2 |= CCR2_WB; -#ifdef CPU_SUSP_HLT - ccr2 |= CCR2_SUSP_HLT; -#else - ccr2 &= ~CCR2_SUSP_HLT; -#endif - ccr2 |= CCR2_WT1; - write_cyrix_reg(CCR2, ccr2); - - /* Initialize CCR4. */ - ccr3 = read_cyrix_reg(CCR3); - write_cyrix_reg(CCR3, CCR3_MAPEN0); - - ccr4 = read_cyrix_reg(CCR4); - ccr4 |= CCR4_DTE; - ccr4 |= CCR4_MEM; -#ifdef CPU_FASTER_5X86_FPU - ccr4 |= CCR4_FASTFPE; -#else - ccr4 &= ~CCR4_FASTFPE; -#endif - ccr4 &= ~CCR4_IOMASK; - /******************************************************************** - * WARNING: The "BIOS Writers Guide" mentions that I/O recovery time - * should be 0 for errata fix. - ********************************************************************/ -#ifdef CPU_IORT - ccr4 |= CPU_IORT & CCR4_IOMASK; -#endif - write_cyrix_reg(CCR4, ccr4); - - /* Initialize PCR0. */ - /**************************************************************** - * WARNING: RSTK_EN and LOOP_EN could make your system unstable. - * BTB_EN might make your system unstable. - ****************************************************************/ - pcr0 = read_cyrix_reg(PCR0); -#ifdef CPU_RSTK_EN - pcr0 |= PCR0_RSTK; -#else - pcr0 &= ~PCR0_RSTK; -#endif -#ifdef CPU_BTB_EN - pcr0 |= PCR0_BTB; -#else - pcr0 &= ~PCR0_BTB; -#endif -#ifdef CPU_LOOP_EN - pcr0 |= PCR0_LOOP; -#else - pcr0 &= ~PCR0_LOOP; -#endif - - /**************************************************************** - * WARNING: if you use a memory mapped I/O device, don't use - * DISABLE_5X86_LSSER option, which may reorder memory mapped - * I/O access. - * IF YOUR MOTHERBOARD HAS PCI BUS, DON'T DISABLE LSSER. - ****************************************************************/ -#ifdef CPU_DISABLE_5X86_LSSER - pcr0 &= ~PCR0_LSSER; -#else - pcr0 |= PCR0_LSSER; -#endif - write_cyrix_reg(PCR0, pcr0); - - /* Restore CCR3. */ - write_cyrix_reg(CCR3, ccr3); - - (void)read_cyrix_reg(0x80); /* dummy */ - - /* Unlock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); - load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */ - /* Lock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); - - write_eflags(eflags); -} - -#ifdef CPU_I486_ON_386 -/* - * There are i486 based upgrade products for i386 machines. - * In this case, BIOS doesn't enables CPU cache. - */ -static void -init_i486_on_386(void) -{ - u_long eflags; - -#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) - need_post_dma_flush = 1; -#endif - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0, NW = 0 */ - - write_eflags(eflags); -} -#endif - -/* - * Cyrix 6x86 - * - * XXX - What should I do here? Please let me know. - */ -static void -init_6x86(void) -{ - u_long eflags; - u_char ccr3, ccr4; - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - wbinvd(); - - /* Initialize CCR0. */ - write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1); - - /* Initialize CCR1. */ -#ifdef CPU_CYRIX_NO_LOCK - write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK); -#else - write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK); -#endif - - /* Initialize CCR2. */ -#ifdef CPU_SUSP_HLT - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT); -#else - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT); -#endif - - ccr3 = read_cyrix_reg(CCR3); - write_cyrix_reg(CCR3, CCR3_MAPEN0); - - /* Initialize CCR4. */ - ccr4 = read_cyrix_reg(CCR4); - ccr4 |= CCR4_DTE; - ccr4 &= ~CCR4_IOMASK; -#ifdef CPU_IORT - write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK)); -#else - write_cyrix_reg(CCR4, ccr4 | 7); -#endif - - /* Initialize CCR5. */ -#ifdef CPU_WT_ALLOC - write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC); -#endif - - /* Restore CCR3. */ - write_cyrix_reg(CCR3, ccr3); - - /* Unlock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); - - /* - * Earlier revision of the 6x86 CPU could crash the system if - * L1 cache is in write-back mode. - */ - if ((cyrix_did & 0xff00) > 0x1600) - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ - else { - /* Revision 2.6 and lower. */ -#ifdef CYRIX_CACHE_REALLY_WORKS - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ -#else - load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0 and NW = 1 */ -#endif - } - - /* Lock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); - - write_eflags(eflags); -} -#endif /* I486_CPU */ - -#ifdef I686_CPU -/* - * Cyrix 6x86MX (code-named M2) - * - * XXX - What should I do here? Please let me know. - */ -static void -init_6x86MX(void) -{ -#if 0 - u_long eflags; - u_char ccr3, ccr4; - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - wbinvd(); - - /* Initialize CCR0. */ - write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1); - - /* Initialize CCR1. */ -#ifdef CPU_CYRIX_NO_LOCK - write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK); -#else - write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK); -#endif - - /* Initialize CCR2. */ -#ifdef CPU_SUSP_HLT - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT); -#else - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT); -#endif - - ccr3 = read_cyrix_reg(CCR3); - write_cyrix_reg(CCR3, CCR3_MAPEN0); - - /* Initialize CCR4. */ - ccr4 = read_cyrix_reg(CCR4); - ccr4 &= ~CCR4_IOMASK; -#ifdef CPU_IORT - write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK)); -#else - write_cyrix_reg(CCR4, ccr4 | 7); -#endif - - /* Initialize CCR5. */ -#ifdef CPU_WT_ALLOC - write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC); -#endif - - /* Restore CCR3. */ - write_cyrix_reg(CCR3, ccr3); - - /* Unlock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); - - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ - - /* Lock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); - - write_eflags(eflags); -#endif -} - -static void -init_ppro(void) -{ - u_int64_t apicbase; - - /* - * Local APIC should be disabled if it is not going to be used. - */ - apicbase = rdmsr(MSR_APICBASE); - apicbase &= ~APICBASE_ENABLED; - wrmsr(MSR_APICBASE, apicbase); -} - -/* - * Initialize BBL_CR_CTL3 (Control register 3: used to configure the - * L2 cache). - */ -static void -init_mendocino(void) -{ -#ifdef CPU_PPRO2CELERON - u_long eflags; - u_int64_t bbl_cr_ctl3; - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - wbinvd(); - - bbl_cr_ctl3 = rdmsr(MSR_BBL_CR_CTL3); - - /* If the L2 cache is configured, do nothing. */ - if (!(bbl_cr_ctl3 & 1)) { - bbl_cr_ctl3 = 0x134052bLL; - - /* Set L2 Cache Latency (Default: 5). */ -#ifdef CPU_CELERON_L2_LATENCY -#if CPU_L2_LATENCY > 15 -#error invalid CPU_L2_LATENCY. -#endif - bbl_cr_ctl3 |= CPU_L2_LATENCY << 1; -#else - bbl_cr_ctl3 |= 5 << 1; -#endif - wrmsr(MSR_BBL_CR_CTL3, bbl_cr_ctl3); - } - - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); - write_eflags(eflags); -#endif /* CPU_PPRO2CELERON */ -} - -#endif /* I686_CPU */ - -/* - * Initialize CR4 (Control register 4) to enable SSE instructions. - */ -void -enable_sse(void) -{ -#ifdef XEN - return; -#endif -#if defined(CPU_ENABLE_SSE) - if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { - load_cr4(rcr4() | CR4_FXSR | CR4_XMM); - cpu_fxsr = hw_instruction_sse = 1; - } -#endif -} - -void -initializecpu(void) -{ - - switch (cpu) { -#ifdef I486_CPU - case CPU_BLUE: - init_bluelightning(); - break; - case CPU_486DLC: - init_486dlc(); - break; - case CPU_CY486DX: - init_cy486dx(); - break; - case CPU_M1SC: - init_5x86(); - break; -#ifdef CPU_I486_ON_386 - case CPU_486: - init_i486_on_386(); - break; -#endif - case CPU_M1: - init_6x86(); - break; -#endif /* I486_CPU */ -#ifdef I686_CPU - case CPU_M2: - init_6x86MX(); - break; - case CPU_686: - if (strcmp(cpu_vendor, "GenuineIntel") == 0) { - switch (cpu_id & 0xff0) { - case 0x610: - init_ppro(); - break; - case 0x660: - init_mendocino(); - break; - } - } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { -#if defined(I686_CPU) && defined(CPU_ATHLON_SSE_HACK) - /* - * Sometimes the BIOS doesn't enable SSE instructions. - * According to AMD document 20734, the mobile - * Duron, the (mobile) Athlon 4 and the Athlon MP - * support SSE. These correspond to cpu_id 0x66X - * or 0x67X. - */ - if ((cpu_feature & CPUID_XMM) == 0 && - ((cpu_id & ~0xf) == 0x660 || - (cpu_id & ~0xf) == 0x670 || - (cpu_id & ~0xf) == 0x680)) { - u_int regs[4]; - wrmsr(0xC0010015, rdmsr(0xC0010015) & ~0x08000); - do_cpuid(1, regs); - cpu_feature = regs[3]; - } -#endif - } - break; -#endif - default: - break; - } - enable_sse(); - -#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) - /* - * OS should flush L1 cache by itself because no PC-98 supports - * non-Intel CPUs. Use wbinvd instruction before DMA transfer - * when need_pre_dma_flush = 1, use invd instruction after DMA - * transfer when need_post_dma_flush = 1. If your CPU upgrade - * product supports hardware cache control, you can add the - * CPU_UPGRADE_HW_CACHE option in your kernel configuration file. - * This option eliminates unneeded cache flush instruction(s). - */ - if (strcmp(cpu_vendor, "CyrixInstead") == 0) { - switch (cpu) { -#ifdef I486_CPU - case CPU_486DLC: - need_post_dma_flush = 1; - break; - case CPU_M1SC: - need_pre_dma_flush = 1; - break; - case CPU_CY486DX: - need_pre_dma_flush = 1; -#ifdef CPU_I486_ON_386 - need_post_dma_flush = 1; -#endif - break; -#endif - default: - break; - } - } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { - switch (cpu_id & 0xFF0) { - case 0x470: /* Enhanced Am486DX2 WB */ - case 0x490: /* Enhanced Am486DX4 WB */ - case 0x4F0: /* Am5x86 WB */ - need_pre_dma_flush = 1; - break; - } - } else if (strcmp(cpu_vendor, "IBM") == 0) { - need_post_dma_flush = 1; - } else { -#ifdef CPU_I486_ON_386 - need_pre_dma_flush = 1; -#endif - } -#endif /* PC98 && !CPU_UPGRADE_HW_CACHE */ -} - -#if defined(I586_CPU) && defined(CPU_WT_ALLOC) -/* - * Enable write allocate feature of AMD processors. - * Following two functions require the Maxmem variable being set. - */ -void -enable_K5_wt_alloc(void) -{ - u_int64_t msr; - register_t savecrit; - - /* - * Write allocate is supported only on models 1, 2, and 3, with - * a stepping of 4 or greater. - */ - if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) { - savecrit = intr_disable(); - msr = rdmsr(0x83); /* HWCR */ - wrmsr(0x83, msr & !(0x10)); - - /* - * We have to tell the chip where the top of memory is, - * since video cards could have frame bufferes there, - * memory-mapped I/O could be there, etc. - */ - if(Maxmem > 0) - msr = Maxmem / 16; - else - msr = 0; - msr |= AMD_WT_ALLOC_TME | AMD_WT_ALLOC_FRE; -#ifdef PC98 - if (!(inb(0x43b) & 4)) { - wrmsr(0x86, 0x0ff00f0); - msr |= AMD_WT_ALLOC_PRE; - } -#else - /* - * There is no way to know wheter 15-16M hole exists or not. - * Therefore, we disable write allocate for this range. - */ - wrmsr(0x86, 0x0ff00f0); - msr |= AMD_WT_ALLOC_PRE; -#endif - wrmsr(0x85, msr); - - msr=rdmsr(0x83); - wrmsr(0x83, msr|0x10); /* enable write allocate */ - intr_restore(savecrit); - } -} - -void -enable_K6_wt_alloc(void) -{ - quad_t size; - u_int64_t whcr; - u_long eflags; - - eflags = read_eflags(); - disable_intr(); - wbinvd(); - -#ifdef CPU_DISABLE_CACHE - /* - * Certain K6-2 box becomes unstable when write allocation is - * enabled. - */ - /* - * The AMD-K6 processer provides the 64-bit Test Register 12(TR12), - * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported. - * All other bits in TR12 have no effect on the processer's operation. - * The I/O Trap Restart function (bit 9 of TR12) is always enabled - * on the AMD-K6. - */ - wrmsr(0x0000000e, (u_int64_t)0x0008); -#endif - /* Don't assume that memory size is aligned with 4M. */ - if (Maxmem > 0) - size = ((Maxmem >> 8) + 3) >> 2; - else - size = 0; - - /* Limit is 508M bytes. */ - if (size > 0x7f) - size = 0x7f; - whcr = (rdmsr(0xc0000082) & ~(0x7fLL << 1)) | (size << 1); - -#if defined(PC98) || defined(NO_MEMORY_HOLE) - if (whcr & (0x7fLL << 1)) { -#ifdef PC98 - /* - * If bit 2 of port 0x43b is 0, disable wrte allocate for the - * 15-16M range. - */ - if (!(inb(0x43b) & 4)) - whcr &= ~0x0001LL; - else -#endif - whcr |= 0x0001LL; - } -#else - /* - * There is no way to know wheter 15-16M hole exists or not. - * Therefore, we disable write allocate for this range. - */ - whcr &= ~0x0001LL; -#endif - wrmsr(0x0c0000082, whcr); - - write_eflags(eflags); -} - -void -enable_K6_2_wt_alloc(void) -{ - quad_t size; - u_int64_t whcr; - u_long eflags; - - eflags = read_eflags(); - disable_intr(); - wbinvd(); - -#ifdef CPU_DISABLE_CACHE - /* - * Certain K6-2 box becomes unstable when write allocation is - * enabled. - */ - /* - * The AMD-K6 processer provides the 64-bit Test Register 12(TR12), - * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported. - * All other bits in TR12 have no effect on the processer's operation. - * The I/O Trap Restart function (bit 9 of TR12) is always enabled - * on the AMD-K6. - */ - wrmsr(0x0000000e, (u_int64_t)0x0008); -#endif - /* Don't assume that memory size is aligned with 4M. */ - if (Maxmem > 0) - size = ((Maxmem >> 8) + 3) >> 2; - else - size = 0; - - /* Limit is 4092M bytes. */ - if (size > 0x3fff) - size = 0x3ff; - whcr = (rdmsr(0xc0000082) & ~(0x3ffLL << 22)) | (size << 22); - -#if defined(PC98) || defined(NO_MEMORY_HOLE) - if (whcr & (0x3ffLL << 22)) { -#ifdef PC98 - /* - * If bit 2 of port 0x43b is 0, disable wrte allocate for the - * 15-16M range. - */ - if (!(inb(0x43b) & 4)) - whcr &= ~(1LL << 16); - else -#endif - whcr |= 1LL << 16; - } -#else - /* - * There is no way to know wheter 15-16M hole exists or not. - * Therefore, we disable write allocate for this range. - */ - whcr &= ~(1LL << 16); -#endif - wrmsr(0x0c0000082, whcr); - - write_eflags(eflags); -} -#endif /* I585_CPU && CPU_WT_ALLOC */ - -#include "opt_ddb.h" -#ifdef DDB -#include <ddb/ddb.h> -#if 0 -DB_SHOW_COMMAND(cyrixreg, cyrixreg) -{ - u_long eflags; - u_int cr0; - u_char ccr1, ccr2, ccr3; - u_char ccr0 = 0, ccr4 = 0, ccr5 = 0, pcr0 = 0; - - cr0 = rcr0(); - if (strcmp(cpu_vendor,"CyrixInstead") == 0) { - eflags = read_eflags(); - disable_intr(); - - - if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) { - ccr0 = read_cyrix_reg(CCR0); - } - ccr1 = read_cyrix_reg(CCR1); - ccr2 = read_cyrix_reg(CCR2); - ccr3 = read_cyrix_reg(CCR3); - if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) { - write_cyrix_reg(CCR3, CCR3_MAPEN0); - ccr4 = read_cyrix_reg(CCR4); - if ((cpu == CPU_M1) || (cpu == CPU_M2)) - ccr5 = read_cyrix_reg(CCR5); - else - pcr0 = read_cyrix_reg(PCR0); - write_cyrix_reg(CCR3, ccr3); /* Restore CCR3. */ - } - write_eflags(eflags); - - if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) - printf("CCR0=%x, ", (u_int)ccr0); - - printf("CCR1=%x, CCR2=%x, CCR3=%x", - (u_int)ccr1, (u_int)ccr2, (u_int)ccr3); - if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) { - printf(", CCR4=%x, ", (u_int)ccr4); - if (cpu == CPU_M1SC) - printf("PCR0=%x\n", pcr0); - else - printf("CCR5=%x\n", ccr5); - } - } - printf("CR0=%x\n", cr0); -} -#endif -#endif /* DDB */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/intr_machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,326 +0,0 @@ -/*- - * Copyright (c) 2003 John Baldwin <jhb@xxxxxxxxxxx> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the author nor the names of any co-contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/intr_machdep.c,v 1.4 2003/11/17 06:10:14 peter Exp $ - */ - -/* - * Machine dependent interrupt code for i386. For the i386, we have to - * deal with different PICs. Thus, we use the passed in vector to lookup - * an interrupt source associated with that vector. The interrupt source - * describes which PIC the source belongs to and includes methods to handle - * that source. - */ - -#include "opt_ddb.h" - -#include <sys/param.h> -#include <sys/bus.h> -#include <sys/interrupt.h> -#include <sys/lock.h> -#include <sys/ktr.h> -#include <sys/kernel.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/syslog.h> -#include <sys/systm.h> -#include <machine/clock.h> -#include <machine/intr_machdep.h> -#ifdef DDB -#include <ddb/ddb.h> -#endif - -#define MAX_STRAY_LOG 5 - -typedef void (*mask_fn)(uintptr_t vector); - -static int intrcnt_index; -static struct intsrc *interrupt_sources[NUM_IO_INTS]; -static struct mtx intr_table_lock; - -static void intr_init(void *__dummy); -static void intrcnt_setname(const char *name, int index); -static void intrcnt_updatename(struct intsrc *is); -static void intrcnt_register(struct intsrc *is); - -/* - * Register a new interrupt source with the global interrupt system. - * The global interrupts need to be disabled when this function is - * called. - */ -int -intr_register_source(struct intsrc *isrc) -{ - int error, vector; - - vector = isrc->is_pic->pic_vector(isrc); - if (interrupt_sources[vector] != NULL) - return (EEXIST); - error = ithread_create(&isrc->is_ithread, (uintptr_t)isrc, 0, - (mask_fn)isrc->is_pic->pic_disable_source, - (mask_fn)isrc->is_pic->pic_enable_source, "irq%d:", vector); - if (error) - return (error); - mtx_lock_spin(&intr_table_lock); - if (interrupt_sources[vector] != NULL) { - mtx_unlock_spin(&intr_table_lock); - ithread_destroy(isrc->is_ithread); - return (EEXIST); - } - intrcnt_register(isrc); - interrupt_sources[vector] = isrc; - mtx_unlock_spin(&intr_table_lock); - return (0); -} - -struct intsrc * -intr_lookup_source(int vector) -{ - - return (interrupt_sources[vector]); -} - -int -intr_add_handler(const char *name, int vector, driver_intr_t handler, - void *arg, enum intr_type flags, void **cookiep) -{ - struct intsrc *isrc; - int error; - - isrc = intr_lookup_source(vector); - if (isrc == NULL) - return (EINVAL); - - error = ithread_add_handler(isrc->is_ithread, name, handler, arg, - ithread_priority(flags), flags, cookiep); - if (error == 0) { - intrcnt_updatename(isrc); - isrc->is_pic->pic_enable_intr(isrc); - isrc->is_pic->pic_enable_source(isrc); - } - return (error); -} - -int -intr_remove_handler(void *cookie) -{ - int error; - - error = ithread_remove_handler(cookie); -#ifdef XXX - if (error == 0) - intrcnt_updatename(/* XXX */); -#endif - return (error); -} - -int -intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol) -{ - struct intsrc *isrc; - - isrc = intr_lookup_source(vector); - if (isrc == NULL) - return (EINVAL); - return (isrc->is_pic->pic_config_intr(isrc, trig, pol)); -} - -void -intr_execute_handlers(struct intsrc *isrc, struct intrframe *iframe) -{ - struct thread *td; - struct ithd *it; - struct intrhand *ih; - int error, vector; - - td = curthread; - td->td_intr_nesting_level++; - - /* - * We count software interrupts when we process them. The - * code here follows previous practice, but there's an - * argument for counting hardware interrupts when they're - * processed too. - */ - atomic_add_long(isrc->is_count, 1); - atomic_add_int(&cnt.v_intr, 1); - - it = isrc->is_ithread; - if (it == NULL) - ih = NULL; - else - ih = TAILQ_FIRST(&it->it_handlers); - - /* - * XXX: We assume that IRQ 0 is only used for the ISA timer - * device (clk). - */ - vector = isrc->is_pic->pic_vector(isrc); - if (vector == 0) - clkintr_pending = 1; - - - if (ih != NULL && ih->ih_flags & IH_FAST) { - /* - * Execute fast interrupt handlers directly. - * To support clock handlers, if a handler registers - * with a NULL argument, then we pass it a pointer to - * a trapframe as its argument. - */ - critical_enter(); - TAILQ_FOREACH(ih, &it->it_handlers, ih_next) { - MPASS(ih->ih_flags & IH_FAST); - CTR3(KTR_INTR, "%s: executing handler %p(%p)", - __func__, ih->ih_handler, - ih->ih_argument == NULL ? iframe : - ih->ih_argument); - if (ih->ih_argument == NULL) - ih->ih_handler(iframe); - else - ih->ih_handler(ih->ih_argument); - } - isrc->is_pic->pic_eoi_source(isrc); - error = 0; - /* XXX */ - td->td_pflags &= ~TDP_OWEPREEMPT; - critical_exit(); - } else { - /* - * For stray and threaded interrupts, we mask and EOI the - * source. - */ - isrc->is_pic->pic_disable_source(isrc, PIC_EOI); - if (ih == NULL) - error = EINVAL; - else - error = ithread_schedule(it); - isrc->is_pic->pic_eoi_source(isrc); - } - - if (error == EINVAL) { - atomic_add_long(isrc->is_straycount, 1); - if (*isrc->is_straycount < MAX_STRAY_LOG) - log(LOG_ERR, "stray irq%d\n", vector); - else if (*isrc->is_straycount == MAX_STRAY_LOG) - log(LOG_CRIT, - "too many stray irq %d's: not logging anymore\n", - vector); - } - td->td_intr_nesting_level--; - -} - -void -intr_resume(void) -{ - struct intsrc **isrc; - int i; - - mtx_lock_spin(&intr_table_lock); - for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++) - if (*isrc != NULL && (*isrc)->is_pic->pic_resume != NULL) - (*isrc)->is_pic->pic_resume(*isrc); - mtx_unlock_spin(&intr_table_lock); -} - -void -intr_suspend(void) -{ - struct intsrc **isrc; - int i; - - mtx_lock_spin(&intr_table_lock); - for (i = 0, isrc = interrupt_sources; i < NUM_IO_INTS; i++, isrc++) - if (*isrc != NULL && (*isrc)->is_pic->pic_suspend != NULL) - (*isrc)->is_pic->pic_suspend(*isrc); - mtx_unlock_spin(&intr_table_lock); -} - -static void -intrcnt_setname(const char *name, int index) -{ - - snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s", - MAXCOMLEN, name); -} - -static void -intrcnt_updatename(struct intsrc *is) -{ - - intrcnt_setname(is->is_ithread->it_td->td_proc->p_comm, is->is_index); -} - -static void -intrcnt_register(struct intsrc *is) -{ - char straystr[MAXCOMLEN + 1]; - - /* mtx_assert(&intr_table_lock, MA_OWNED); */ - KASSERT(is->is_ithread != NULL, ("%s: isrc with no ithread", __func__)); - is->is_index = intrcnt_index; - intrcnt_index += 2; - snprintf(straystr, MAXCOMLEN + 1, "stray irq%d", - is->is_pic->pic_vector(is)); - intrcnt_updatename(is); - is->is_count = &intrcnt[is->is_index]; - intrcnt_setname(straystr, is->is_index + 1); - is->is_straycount = &intrcnt[is->is_index + 1]; -} - -static void -intr_init(void *dummy __unused) -{ - - intrcnt_setname("???", 0); - intrcnt_index = 1; - mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN); -} -SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL) - -#ifdef DDB -/* - * Dump data about interrupt handlers - */ -DB_SHOW_COMMAND(irqs, db_show_irqs) -{ - struct intsrc **isrc; - int i, quit, verbose; - - quit = 0; - if (strcmp(modif, "v") == 0) - verbose = 1; - else - verbose = 0; - isrc = interrupt_sources; - db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE); - for (i = 0; i < NUM_IO_INTS && !quit; i++, isrc++) - if (*isrc != NULL) - db_dump_ithread((*isrc)->is_ithread, verbose); -} -#endif diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/io_apic.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,850 +0,0 @@ -/*- - * Copyright (c) 2003 John Baldwin <jhb@xxxxxxxxxxx> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the author nor the names of any co-contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/io_apic.c,v 1.14 2004/08/02 15:31:10 scottl Exp $"); - -#include "opt_isa.h" -#include "opt_no_mixed_mode.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bus.h> -#include <sys/kernel.h> -#include <sys/malloc.h> -#include <sys/lock.h> -#include <sys/mutex.h> - -#include <vm/vm.h> -#include <vm/pmap.h> - -#include <machine/apicreg.h> -#include <machine/frame.h> -#include <machine/intr_machdep.h> -#include <machine/apicvar.h> -#include <machine/segments.h> - -#define IOAPIC_ISA_INTS 16 -#define IOAPIC_MEM_REGION 32 -#define IOAPIC_REDTBL_LO(i) (IOAPIC_REDTBL + (i) * 2) -#define IOAPIC_REDTBL_HI(i) (IOAPIC_REDTBL_LO(i) + 1) - -#define VECTOR_EXTINT 252 -#define VECTOR_NMI 253 -#define VECTOR_SMI 254 -#define VECTOR_DISABLED 255 - -#define DEST_NONE -1 -#define DEST_EXTINT -2 - -#define TODO printf("%s: not implemented!\n", __func__) - -MALLOC_DEFINE(M_IOAPIC, "I/O APIC", "I/O APIC structures"); - -/* - * New interrupt support code.. - * - * XXX: we really should have the interrupt cookie passed up from new-bus - * just be a int pin, and not map 1:1 to interrupt vector number but should - * use INTR_TYPE_FOO to set priority bands for device classes and do all the - * magic remapping of intpin to vector in here. For now we just cheat as on - * ia64 and map intpin X to vector NRSVIDT + X. Note that we assume that the - * first IO APIC has ISA interrupts on pins 1-15. Not sure how you are - * really supposed to figure out which IO APIC in a system with multiple IO - * APIC's actually has the ISA interrupts routed to it. As far as interrupt - * pin numbers, we use the ACPI System Interrupt number model where each - * IO APIC has a contiguous chunk of the System Interrupt address space. - */ - -/* - * Direct the ExtINT pin on the first I/O APIC to a logical cluster of - * CPUs rather than a physical destination of just the BSP. - * - * Note: This is disabled by default as test systems seem to croak with it - * enabled. -#define ENABLE_EXTINT_LOGICAL_DESTINATION - */ - -struct ioapic_intsrc { - struct intsrc io_intsrc; - u_int io_intpin:8; - u_int io_vector:8; - u_int io_activehi:1; - u_int io_edgetrigger:1; - u_int io_masked:1; - int io_dest:5; - int io_bus:4; -}; - -struct ioapic { - struct pic io_pic; - u_int io_id:8; /* logical ID */ - u_int io_apic_id:4; - u_int io_intbase:8; /* System Interrupt base */ - u_int io_numintr:8; - volatile ioapic_t *io_addr; /* XXX: should use bus_space */ - STAILQ_ENTRY(ioapic) io_next; - struct ioapic_intsrc io_pins[0]; -}; - -static u_int ioapic_read(volatile ioapic_t *apic, int reg); -static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val); -static const char *ioapic_bus_string(int bus_type); -static void ioapic_print_vector(struct ioapic_intsrc *intpin); -static void ioapic_enable_source(struct intsrc *isrc); -static void ioapic_disable_source(struct intsrc *isrc, int eoi); -static void ioapic_eoi_source(struct intsrc *isrc); -static void ioapic_enable_intr(struct intsrc *isrc); -static int ioapic_vector(struct intsrc *isrc); -static int ioapic_source_pending(struct intsrc *isrc); -static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, - enum intr_polarity pol); -static void ioapic_suspend(struct intsrc *isrc); -static void ioapic_resume(struct intsrc *isrc); -static void ioapic_program_destination(struct ioapic_intsrc *intpin); -static void ioapic_program_intpin(struct ioapic_intsrc *intpin); -static void ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin); - -static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list); -struct pic ioapic_template = { ioapic_enable_source, ioapic_disable_source, - ioapic_eoi_source, ioapic_enable_intr, - ioapic_vector, ioapic_source_pending, - ioapic_suspend, ioapic_resume, - ioapic_config_intr }; - -static int bsp_id, current_cluster, logical_clusters, next_ioapic_base; -static u_int mixed_mode_enabled, next_id, program_logical_dest; -#ifdef NO_MIXED_MODE -static int mixed_mode_active = 0; -#else -static int mixed_mode_active = 1; -#endif -TUNABLE_INT("hw.apic.mixed_mode", &mixed_mode_active); - -static __inline void -_ioapic_eoi_source(struct intsrc *isrc) -{ - lapic_eoi(); -} - -static u_int -ioapic_read(volatile ioapic_t *apic, int reg) -{ - - mtx_assert(&icu_lock, MA_OWNED); - apic->ioregsel = reg; - return (apic->iowin); -} - -static void -ioapic_write(volatile ioapic_t *apic, int reg, u_int val) -{ - - mtx_assert(&icu_lock, MA_OWNED); - apic->ioregsel = reg; - apic->iowin = val; -} - -static const char * -ioapic_bus_string(int bus_type) -{ - - switch (bus_type) { - case APIC_BUS_ISA: - return ("ISA"); - case APIC_BUS_EISA: - return ("EISA"); - case APIC_BUS_PCI: - return ("PCI"); - default: - return ("unknown"); - } -} - -static void -ioapic_print_vector(struct ioapic_intsrc *intpin) -{ - - switch (intpin->io_vector) { - case VECTOR_DISABLED: - printf("disabled"); - break; - case VECTOR_EXTINT: - printf("ExtINT"); - break; - case VECTOR_NMI: - printf("NMI"); - break; - case VECTOR_SMI: - printf("SMI"); - break; - default: - printf("%s IRQ %u", ioapic_bus_string(intpin->io_bus), - intpin->io_vector); - } -} - -static void -ioapic_enable_source(struct intsrc *isrc) -{ - struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; - struct ioapic *io = (struct ioapic *)isrc->is_pic; - uint32_t flags; - - mtx_lock_spin(&icu_lock); - if (intpin->io_masked) { - flags = ioapic_read(io->io_addr, - IOAPIC_REDTBL_LO(intpin->io_intpin)); - flags &= ~(IOART_INTMASK); - ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), - flags); - intpin->io_masked = 0; - } - mtx_unlock_spin(&icu_lock); -} - -static void -ioapic_disable_source(struct intsrc *isrc, int eoi) -{ - struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; - struct ioapic *io = (struct ioapic *)isrc->is_pic; - uint32_t flags; - - mtx_lock_spin(&icu_lock); - if (!intpin->io_masked && !intpin->io_edgetrigger) { - flags = ioapic_read(io->io_addr, - IOAPIC_REDTBL_LO(intpin->io_intpin)); - flags |= IOART_INTMSET; - ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), - flags); - intpin->io_masked = 1; - } - - if (eoi == PIC_EOI) - _ioapic_eoi_source(isrc); - - mtx_unlock_spin(&icu_lock); -} - -static void -ioapic_eoi_source(struct intsrc *isrc) -{ - - _ioapic_eoi_source(isrc); -} - -/* - * Completely program an intpin based on the data in its interrupt source - * structure. - */ -static void -ioapic_program_intpin(struct ioapic_intsrc *intpin) -{ - struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic; - uint32_t low, high, value; - - /* - * For pins routed via mixed mode or disabled, just ensure that - * they are masked. - */ - if (intpin->io_dest == DEST_EXTINT || - intpin->io_vector == VECTOR_DISABLED) { - low = ioapic_read(io->io_addr, - IOAPIC_REDTBL_LO(intpin->io_intpin)); - if ((low & IOART_INTMASK) == IOART_INTMCLR) - ioapic_write(io->io_addr, - IOAPIC_REDTBL_LO(intpin->io_intpin), - low | IOART_INTMSET); - return; - } - - /* Set the destination. */ - if (intpin->io_dest == DEST_NONE) { - low = IOART_DESTPHY; - high = bsp_id << APIC_ID_SHIFT; - } else { - low = IOART_DESTLOG; - high = (intpin->io_dest << APIC_ID_CLUSTER_SHIFT | - APIC_ID_CLUSTER_ID) << APIC_ID_SHIFT; - } - - /* Program the rest of the low word. */ - if (intpin->io_edgetrigger) - low |= IOART_TRGREDG; - else - low |= IOART_TRGRLVL; - if (intpin->io_activehi) - low |= IOART_INTAHI; - else - low |= IOART_INTALO; - if (intpin->io_masked) - low |= IOART_INTMSET; - switch (intpin->io_vector) { - case VECTOR_EXTINT: - KASSERT(intpin->io_edgetrigger, - ("EXTINT not edge triggered")); - low |= IOART_DELEXINT; - break; - case VECTOR_NMI: - KASSERT(intpin->io_edgetrigger, - ("NMI not edge triggered")); - low |= IOART_DELNMI; - break; - case VECTOR_SMI: - KASSERT(intpin->io_edgetrigger, - ("SMI not edge triggered")); - low |= IOART_DELSMI; - break; - default: - low |= IOART_DELLOPRI | apic_irq_to_idt(intpin->io_vector); - } - - /* Write the values to the APIC. */ - mtx_lock_spin(&icu_lock); - ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low); - value = ioapic_read(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin)); - value &= ~IOART_DEST; - value |= high; - ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), value); - mtx_unlock_spin(&icu_lock); -} - -/* - * Program an individual intpin's logical destination. - */ -static void -ioapic_program_destination(struct ioapic_intsrc *intpin) -{ - struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic; - - KASSERT(intpin->io_dest != DEST_NONE, - ("intpin not assigned to a cluster")); - KASSERT(intpin->io_dest != DEST_EXTINT, - ("intpin routed via ExtINT")); - if (bootverbose) { - printf("ioapic%u: routing intpin %u (", io->io_id, - intpin->io_intpin); - ioapic_print_vector(intpin); - printf(") to cluster %u\n", intpin->io_dest); - } - ioapic_program_intpin(intpin); -} - -static void -ioapic_assign_cluster(struct ioapic_intsrc *intpin) -{ - - /* - * Assign this intpin to a logical APIC cluster in a - * round-robin fashion. We don't actually use the logical - * destination for this intpin until after all the CPU's - * have been started so that we don't end up with interrupts - * that don't go anywhere. Another alternative might be to - * start up the CPU's earlier so that they can handle interrupts - * sooner. - */ - intpin->io_dest = current_cluster; - current_cluster++; - if (current_cluster >= logical_clusters) - current_cluster = 0; - if (program_logical_dest) - ioapic_program_destination(intpin); -} - -static void -ioapic_enable_intr(struct intsrc *isrc) -{ - struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; - - KASSERT(intpin->io_dest != DEST_EXTINT, - ("ExtINT pin trying to use ioapic enable_intr method")); - if (intpin->io_dest == DEST_NONE) { - ioapic_assign_cluster(intpin); - lapic_enable_intr(intpin->io_vector); - } -} - -static int -ioapic_vector(struct intsrc *isrc) -{ - struct ioapic_intsrc *pin; - - pin = (struct ioapic_intsrc *)isrc; - return (pin->io_vector); -} - -static int -ioapic_source_pending(struct intsrc *isrc) -{ - struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; - - return (lapic_intr_pending(intpin->io_vector)); -} - -static int -ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, - enum intr_polarity pol) -{ - struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; - struct ioapic *io = (struct ioapic *)isrc->is_pic; - int changed; - - KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM), - ("%s: Conforming trigger or polarity\n", __func__)); - - /* - * EISA interrupts always use active high polarity, so don't allow - * them to be set to active low. - * - * XXX: Should we write to the ELCR if the trigger mode changes for - * an EISA IRQ? - */ - if (intpin->io_bus == APIC_BUS_EISA) - pol = INTR_POLARITY_HIGH; - changed = 0; - if (intpin->io_edgetrigger != (trig == INTR_TRIGGER_EDGE)) { - if (bootverbose) - printf("ioapic%u: Changing trigger for pin %u to %s\n", - io->io_id, intpin->io_intpin, - trig == INTR_TRIGGER_EDGE ? "edge" : "level"); - intpin->io_edgetrigger = (trig == INTR_TRIGGER_EDGE); - changed++; - } - if (intpin->io_activehi != (pol == INTR_POLARITY_HIGH)) { - if (bootverbose) - printf("ioapic%u: Changing polarity for pin %u to %s\n", - io->io_id, intpin->io_intpin, - pol == INTR_POLARITY_HIGH ? "high" : "low"); - intpin->io_activehi = (pol == INTR_POLARITY_HIGH); - changed++; - } - if (changed) - ioapic_program_intpin(intpin); - return (0); -} - -static void -ioapic_suspend(struct intsrc *isrc) -{ - - TODO; -} - -static void -ioapic_resume(struct intsrc *isrc) -{ - - ioapic_program_intpin((struct ioapic_intsrc *)isrc); -} - -/* - * APIC enumerators call this function to indicate that the 8259A AT PICs - * are available and that mixed mode can be used. - */ -void -ioapic_enable_mixed_mode(void) -{ - - mixed_mode_enabled = 1; -} - -/* - * Allocate and return a logical cluster ID. Note that the first time - * this is called, it returns cluster 0. ioapic_enable_intr() treats - * the two cases of logical_clusters == 0 and logical_clusters == 1 the - * same: one cluster of ID 0 exists. The logical_clusters == 0 case is - * for UP kernels, which should never call this function. - */ -int -ioapic_next_logical_cluster(void) -{ - - if (logical_clusters >= APIC_MAX_CLUSTER) - panic("WARNING: Local APIC cluster IDs exhausted!"); - return (logical_clusters++); -} - -/* - * Create a plain I/O APIC object. - */ -void * -ioapic_create(uintptr_t addr, int32_t apic_id, int intbase) -{ - struct ioapic *io; - struct ioapic_intsrc *intpin; - volatile ioapic_t *apic; - u_int numintr, i; - uint32_t value; - - apic = (ioapic_t *)pmap_mapdev(addr, IOAPIC_MEM_REGION); - mtx_lock_spin(&icu_lock); - numintr = ((ioapic_read(apic, IOAPIC_VER) & IOART_VER_MAXREDIR) >> - MAXREDIRSHIFT) + 1; - mtx_unlock_spin(&icu_lock); - io = malloc(sizeof(struct ioapic) + - numintr * sizeof(struct ioapic_intsrc), M_IOAPIC, M_WAITOK); - io->io_pic = ioapic_template; - mtx_lock_spin(&icu_lock); - io->io_id = next_id++; - io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; - if (apic_id != -1 && io->io_apic_id != apic_id) { - ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT); - mtx_unlock_spin(&icu_lock); - io->io_apic_id = apic_id; - printf("ioapic%u: Changing APIC ID to %d\n", io->io_id, - apic_id); - } else - mtx_unlock_spin(&icu_lock); - if (intbase == -1) { - intbase = next_ioapic_base; - printf("ioapic%u: Assuming intbase of %d\n", io->io_id, - intbase); - } else if (intbase != next_ioapic_base) - printf("ioapic%u: WARNING: intbase %d != expected base %d\n", - io->io_id, intbase, next_ioapic_base); - io->io_intbase = intbase; - next_ioapic_base = intbase + numintr; - io->io_numintr = numintr; - io->io_addr = apic; - - /* - * Initialize pins. Start off with interrupts disabled. Default - * to active-hi and edge-triggered for ISA interrupts and active-lo - * and level-triggered for all others. - */ - bzero(io->io_pins, sizeof(struct ioapic_intsrc) * numintr); - mtx_lock_spin(&icu_lock); - for (i = 0, intpin = io->io_pins; i < numintr; i++, intpin++) { - intpin->io_intsrc.is_pic = (struct pic *)io; - intpin->io_intpin = i; - intpin->io_vector = intbase + i; - - /* - * Assume that pin 0 on the first I/O APIC is an ExtINT pin - * and that pins 1-15 are ISA interrupts. Assume that all - * other pins are PCI interrupts. - */ - if (intpin->io_vector == 0) - ioapic_set_extint(io, i); - else if (intpin->io_vector < IOAPIC_ISA_INTS) { - intpin->io_bus = APIC_BUS_ISA; - intpin->io_activehi = 1; - intpin->io_edgetrigger = 1; - intpin->io_masked = 1; - } else { - intpin->io_bus = APIC_BUS_PCI; - intpin->io_activehi = 0; - intpin->io_edgetrigger = 0; - intpin->io_masked = 1; - } - - /* - * Route interrupts to the BSP by default using physical - * addressing. Vectored interrupts get readdressed using - * logical IDs to CPU clusters when they are enabled. - */ - intpin->io_dest = DEST_NONE; - if (bootverbose && intpin->io_vector != VECTOR_DISABLED) { - printf("ioapic%u: intpin %d -> ", io->io_id, i); - ioapic_print_vector(intpin); - printf(" (%s, %s)\n", intpin->io_edgetrigger ? - "edge" : "level", intpin->io_activehi ? "high" : - "low"); - } - value = ioapic_read(apic, IOAPIC_REDTBL_LO(i)); - ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET); - } - mtx_unlock_spin(&icu_lock); - - return (io); -} - -int -ioapic_get_vector(void *cookie, u_int pin) -{ - struct ioapic *io; - - io = (struct ioapic *)cookie; - if (pin >= io->io_numintr) - return (-1); - return (io->io_pins[pin].io_vector); -} - -int -ioapic_disable_pin(void *cookie, u_int pin) -{ - struct ioapic *io; - - io = (struct ioapic *)cookie; - if (pin >= io->io_numintr) - return (EINVAL); - if (io->io_pins[pin].io_vector == VECTOR_DISABLED) - return (EINVAL); - io->io_pins[pin].io_vector = VECTOR_DISABLED; - if (bootverbose) - printf("ioapic%u: intpin %d disabled\n", io->io_id, pin); - return (0); -} - -int -ioapic_remap_vector(void *cookie, u_int pin, int vector) -{ - struct ioapic *io; - - io = (struct ioapic *)cookie; - if (pin >= io->io_numintr || vector < 0) - return (EINVAL); - if (io->io_pins[pin].io_vector >= NUM_IO_INTS) - return (EINVAL); - io->io_pins[pin].io_vector = vector; - if (bootverbose) - printf("ioapic%u: Routing IRQ %d -> intpin %d\n", io->io_id, - vector, pin); - return (0); -} - -int -ioapic_set_bus(void *cookie, u_int pin, int bus_type) -{ - struct ioapic *io; - - if (bus_type < 0 || bus_type > APIC_BUS_MAX) - return (EINVAL); - io = (struct ioapic *)cookie; - if (pin >= io->io_numintr) - return (EINVAL); - if (io->io_pins[pin].io_vector >= NUM_IO_INTS) - return (EINVAL); - io->io_pins[pin].io_bus = bus_type; - if (bootverbose) - printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin, - ioapic_bus_string(bus_type)); - return (0); -} - -int -ioapic_set_nmi(void *cookie, u_int pin) -{ - struct ioapic *io; - - io = (struct ioapic *)cookie; - if (pin >= io->io_numintr) - return (EINVAL); - if (io->io_pins[pin].io_vector == VECTOR_NMI) - return (0); - if (io->io_pins[pin].io_vector >= NUM_IO_INTS) - return (EINVAL); - io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; - io->io_pins[pin].io_vector = VECTOR_NMI; - io->io_pins[pin].io_masked = 0; - io->io_pins[pin].io_edgetrigger = 1; - io->io_pins[pin].io_activehi = 1; - if (bootverbose) - printf("ioapic%u: Routing NMI -> intpin %d\n", - io->io_id, pin); - return (0); -} - -int -ioapic_set_smi(void *cookie, u_int pin) -{ - struct ioapic *io; - - io = (struct ioapic *)cookie; - if (pin >= io->io_numintr) - return (EINVAL); - if (io->io_pins[pin].io_vector == VECTOR_SMI) - return (0); - if (io->io_pins[pin].io_vector >= NUM_IO_INTS) - return (EINVAL); - io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; - io->io_pins[pin].io_vector = VECTOR_SMI; - io->io_pins[pin].io_masked = 0; - io->io_pins[pin].io_edgetrigger = 1; - io->io_pins[pin].io_activehi = 1; - if (bootverbose) - printf("ioapic%u: Routing SMI -> intpin %d\n", - io->io_id, pin); - return (0); -} - -int -ioapic_set_extint(void *cookie, u_int pin) -{ - struct ioapic *io; - - io = (struct ioapic *)cookie; - if (pin >= io->io_numintr) - return (EINVAL); - if (io->io_pins[pin].io_vector == VECTOR_EXTINT) - return (0); - if (io->io_pins[pin].io_vector >= NUM_IO_INTS) - return (EINVAL); - io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; - io->io_pins[pin].io_vector = VECTOR_EXTINT; - - /* Enable this pin if mixed mode is available and active. */ - if (mixed_mode_enabled && mixed_mode_active) - io->io_pins[pin].io_masked = 0; - else - io->io_pins[pin].io_masked = 1; - io->io_pins[pin].io_edgetrigger = 1; - io->io_pins[pin].io_activehi = 1; - if (bootverbose) - printf("ioapic%u: Routing external 8259A's -> intpin %d\n", - io->io_id, pin); - return (0); -} - -int -ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) -{ - struct ioapic *io; - - io = (struct ioapic *)cookie; - if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM) - return (EINVAL); - if (io->io_pins[pin].io_vector >= NUM_IO_INTS) - return (EINVAL); - io->io_pins[pin].io_activehi = (pol == INTR_POLARITY_HIGH); - if (bootverbose) - printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin, - pol == INTR_POLARITY_HIGH ? "high" : "low"); - return (0); -} - -int -ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) -{ - struct ioapic *io; - - io = (struct ioapic *)cookie; - if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM) - return (EINVAL); - if (io->io_pins[pin].io_vector >= NUM_IO_INTS) - return (EINVAL); - io->io_pins[pin].io_edgetrigger = (trigger == INTR_TRIGGER_EDGE); - if (bootverbose) - printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin, - trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); - return (0); -} - -/* - * Register a complete I/O APIC object with the interrupt subsystem. - */ -void -ioapic_register(void *cookie) -{ - struct ioapic_intsrc *pin; - struct ioapic *io; - volatile ioapic_t *apic; - uint32_t flags; - int i; - - io = (struct ioapic *)cookie; - apic = io->io_addr; - mtx_lock_spin(&icu_lock); - flags = ioapic_read(apic, IOAPIC_VER) & IOART_VER_VERSION; - STAILQ_INSERT_TAIL(&ioapic_list, io, io_next); - mtx_unlock_spin(&icu_lock); - printf("ioapic%u <Version %u.%u> irqs %u-%u on motherboard\n", - io->io_id, flags >> 4, flags & 0xf, io->io_intbase, - io->io_intbase + io->io_numintr - 1); - bsp_id = PCPU_GET(apic_id); - for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) { - /* - * Finish initializing the pins by programming the vectors - * and delivery mode. - */ - if (pin->io_vector == VECTOR_DISABLED) - continue; - ioapic_program_intpin(pin); - if (pin->io_vector >= NUM_IO_INTS) - continue; - /* - * Route IRQ0 via the 8259A using mixed mode if mixed mode - * is available and turned on. - */ - if (pin->io_vector == 0 && mixed_mode_active && - mixed_mode_enabled) - ioapic_setup_mixed_mode(pin); - else - intr_register_source(&pin->io_intsrc); - } -} - -/* - * Program all the intpins to use logical destinations once the AP's - * have been launched. - */ -static void -ioapic_set_logical_destinations(void *arg __unused) -{ - struct ioapic *io; - int i; - - program_logical_dest = 1; - STAILQ_FOREACH(io, &ioapic_list, io_next) - for (i = 0; i < io->io_numintr; i++) - if (io->io_pins[i].io_dest != DEST_NONE && - io->io_pins[i].io_dest != DEST_EXTINT) - ioapic_program_destination(&io->io_pins[i]); -} -SYSINIT(ioapic_destinations, SI_SUB_SMP, SI_ORDER_SECOND, - ioapic_set_logical_destinations, NULL) - -/* - * Support for mixed-mode interrupt sources. These sources route an ISA - * IRQ through the 8259A's via the ExtINT on pin 0 of the I/O APIC that - * routes the ISA interrupts. We just ignore the intpins that use this - * mode and allow the atpic driver to register its interrupt source for - * that IRQ instead. - */ - -static void -ioapic_setup_mixed_mode(struct ioapic_intsrc *intpin) -{ - struct ioapic_intsrc *extint; - struct ioapic *io; - - /* - * Mark the associated I/O APIC intpin as being delivered via - * ExtINT and enable the ExtINT pin on the I/O APIC if needed. - */ - intpin->io_dest = DEST_EXTINT; - io = (struct ioapic *)intpin->io_intsrc.is_pic; - extint = &io->io_pins[0]; - if (extint->io_vector != VECTOR_EXTINT) - panic("Can't find ExtINT pin to route through!"); -#ifdef ENABLE_EXTINT_LOGICAL_DESTINATION - if (extint->io_dest == DEST_NONE) - ioapic_assign_cluster(extint); -#endif -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,763 +0,0 @@ -/*- - * Copyright (c) 2003 John Baldwin <jhb@xxxxxxxxxxx> - * Copyright (c) 1996, by Steve Passe - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. The name of the developer may NOT be used to endorse or promote products - * derived from this software without specific prior written permission. - * 3. Neither the name of the author nor the names of any co-contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * Local APIC support on Pentium and later processors. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/local_apic.c,v 1.9 2004/07/14 18:12:15 jhb Exp $"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bus.h> -#include <sys/kernel.h> -#include <sys/pcpu.h> - -#include <vm/vm.h> -#include <vm/pmap.h> - -#include <machine/apicreg.h> -#include <machine/cputypes.h> -#include <machine/frame.h> -#include <machine/intr_machdep.h> -#include <machine/apicvar.h> -#include <machine/md_var.h> -#include <machine/smp.h> -#include <machine/specialreg.h> - -/* - * We can handle up to 60 APICs via our logical cluster IDs, but currently - * the physical IDs on Intel processors up to the Pentium 4 are limited to - * 16. - */ -#define MAX_APICID 16 - -/* Sanity checks on IDT vectors. */ -CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS <= APIC_LOCAL_INTS); -CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); - -/* - * Support for local APICs. Local APICs manage interrupts on each - * individual processor as opposed to I/O APICs which receive interrupts - * from I/O devices and then forward them on to the local APICs. - * - * Local APICs can also send interrupts to each other thus providing the - * mechanism for IPIs. - */ - -struct lvt { - u_int lvt_edgetrigger:1; - u_int lvt_activehi:1; - u_int lvt_masked:1; - u_int lvt_active:1; - u_int lvt_mode:16; - u_int lvt_vector:8; -}; - -struct lapic { - struct lvt la_lvts[LVT_MAX + 1]; - u_int la_id:8; - u_int la_cluster:4; - u_int la_cluster_id:2; - u_int la_present:1; -} static lapics[MAX_APICID]; - -/* XXX: should thermal be an NMI? */ - -/* Global defaults for local APIC LVT entries. */ -static struct lvt lvts[LVT_MAX + 1] = { - { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ - { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ - { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* Timer: needs a vector */ - { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* Error: needs a vector */ - { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* PMC */ - { 1, 1, 1, 1, APIC_LVT_DM_FIXED, 0 }, /* Thermal: needs a vector */ -}; - -static inthand_t *ioint_handlers[] = { - NULL, /* 0 - 31 */ - IDTVEC(apic_isr1), /* 32 - 63 */ - IDTVEC(apic_isr2), /* 64 - 95 */ - IDTVEC(apic_isr3), /* 96 - 127 */ - IDTVEC(apic_isr4), /* 128 - 159 */ - IDTVEC(apic_isr5), /* 160 - 191 */ - IDTVEC(apic_isr6), /* 192 - 223 */ - IDTVEC(apic_isr7), /* 224 - 255 */ -}; - -volatile lapic_t *lapic; - -static uint32_t -lvt_mode(struct lapic *la, u_int pin, uint32_t value) -{ - struct lvt *lvt; - - KASSERT(pin <= LVT_MAX, ("%s: pin %u out of range", __func__, pin)); - if (la->la_lvts[pin].lvt_active) - lvt = &la->la_lvts[pin]; - else - lvt = &lvts[pin]; - - value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | - APIC_LVT_VECTOR); - if (lvt->lvt_edgetrigger == 0) - value |= APIC_LVT_TM; - if (lvt->lvt_activehi == 0) - value |= APIC_LVT_IIPP_INTALO; - if (lvt->lvt_masked) - value |= APIC_LVT_M; - value |= lvt->lvt_mode; - switch (lvt->lvt_mode) { - case APIC_LVT_DM_NMI: - case APIC_LVT_DM_SMI: - case APIC_LVT_DM_INIT: - case APIC_LVT_DM_EXTINT: - if (!lvt->lvt_edgetrigger) { - printf("lapic%u: Forcing LINT%u to edge trigger\n", - la->la_id, pin); - value |= APIC_LVT_TM; - } - /* Use a vector of 0. */ - break; - case APIC_LVT_DM_FIXED: -#if 0 - value |= lvt->lvt_vector; -#else - panic("Fixed LINT pins not supported"); -#endif - break; - default: - panic("bad APIC LVT delivery mode: %#x\n", value); - } - return (value); -} - -/* - * Map the local APIC and setup necessary interrupt vectors. - */ -void -lapic_init(uintptr_t addr) -{ - u_int32_t value; - - /* Map the local APIC and setup the spurious interrupt handler. */ - KASSERT(trunc_page(addr) == addr, - ("local APIC not aligned on a page boundary")); - lapic = (lapic_t *)pmap_mapdev(addr, sizeof(lapic_t)); - setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - - /* Perform basic initialization of the BSP's local APIC. */ - value = lapic->svr; - value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); - value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT); - lapic->svr = value; - - /* Set BSP's per-CPU local APIC ID. */ - PCPU_SET(apic_id, lapic_id()); - - /* XXX: timer/error/thermal interrupts */ -} - -/* - * Create a local APIC instance. - */ -void -lapic_create(u_int apic_id, int boot_cpu) -{ - int i; - - if (apic_id >= MAX_APICID) { - printf("APIC: Ignoring local APIC with ID %d\n", apic_id); - if (boot_cpu) - panic("Can't ignore BSP"); - return; - } - KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", - apic_id)); - - /* - * Assume no local LVT overrides and a cluster of 0 and - * intra-cluster ID of 0. - */ - lapics[apic_id].la_present = 1; - lapics[apic_id].la_id = apic_id; - for (i = 0; i < LVT_MAX; i++) { - lapics[apic_id].la_lvts[i] = lvts[i]; - lapics[apic_id].la_lvts[i].lvt_active = 0; - } - -#ifdef SMP - cpu_add(apic_id, boot_cpu); -#endif -} - -/* - * Dump contents of local APIC registers - */ -void -lapic_dump(const char* str) -{ - - printf("cpu%d %s:\n", PCPU_GET(cpuid), str); - printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n", - lapic->id, lapic->version, lapic->ldr, lapic->dfr); - printf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", - lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr); -} - -void -lapic_enable_intr(u_int irq) -{ - u_int vector; - - vector = apic_irq_to_idt(irq); - KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); - KASSERT(ioint_handlers[vector / 32] != NULL, - ("No ISR handler for IRQ %u", irq)); - setidt(vector, ioint_handlers[vector / 32], SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); -} - -void -lapic_setup(void) -{ - struct lapic *la; - u_int32_t value, maxlvt; - register_t eflags; - - la = &lapics[lapic_id()]; - KASSERT(la->la_present, ("missing APIC structure")); - eflags = intr_disable(); - maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; - - /* Program LINT[01] LVT entries. */ - lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0); - lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1); - - /* XXX: more LVT entries */ - - /* Clear the TPR. */ - value = lapic->tpr; - value &= ~APIC_TPR_PRIO; - lapic->tpr = value; - - /* Use the cluster model for logical IDs. */ - value = lapic->dfr; - value &= ~APIC_DFR_MODEL_MASK; - value |= APIC_DFR_MODEL_CLUSTER; - lapic->dfr = value; - - /* Set this APIC's logical ID. */ - value = lapic->ldr; - value &= ~APIC_ID_MASK; - value |= (la->la_cluster << APIC_ID_CLUSTER_SHIFT | - 1 << la->la_cluster_id) << APIC_ID_SHIFT; - lapic->ldr = value; - - /* Setup spurious vector and enable the local APIC. */ - value = lapic->svr; - value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); - value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT); - lapic->svr = value; - intr_restore(eflags); -} - -void -lapic_disable(void) -{ - uint32_t value; - - /* Software disable the local APIC. */ - value = lapic->svr; - value &= ~APIC_SVR_SWEN; - lapic->svr = value; -} - -int -lapic_id(void) -{ - - KASSERT(lapic != NULL, ("local APIC is not mapped")); - return (lapic->id >> APIC_ID_SHIFT); -} - -int -lapic_intr_pending(u_int vector) -{ - volatile u_int32_t *irr; - - /* - * The IRR registers are an array of 128-bit registers each of - * which only describes 32 interrupts in the low 32 bits.. Thus, - * we divide the vector by 32 to get the 128-bit index. We then - * multiply that index by 4 to get the equivalent index from - * treating the IRR as an array of 32-bit registers. Finally, we - * modulus the vector by 32 to determine the individual bit to - * test. - */ - irr = &lapic->irr0; - return (irr[(vector / 32) * 4] & 1 << (vector % 32)); -} - -void -lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) -{ - struct lapic *la; - - KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", - __func__, apic_id)); - KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", - __func__, cluster)); - KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, - ("%s: intra cluster id %u too big", __func__, cluster_id)); - la = &lapics[apic_id]; - la->la_cluster = cluster; - la->la_cluster_id = cluster_id; -} - -int -lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) -{ - - if (pin > LVT_MAX) - return (EINVAL); - if (apic_id == APIC_ID_ALL) { - lvts[pin].lvt_masked = masked; - if (bootverbose) - printf("lapic:"); - } else { - KASSERT(lapics[apic_id].la_present, - ("%s: missing APIC %u", __func__, apic_id)); - lapics[apic_id].la_lvts[pin].lvt_masked = masked; - lapics[apic_id].la_lvts[pin].lvt_active = 1; - if (bootverbose) - printf("lapic%u:", apic_id); - } - if (bootverbose) - printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); - return (0); -} - -int -lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) -{ - struct lvt *lvt; - - if (pin > LVT_MAX) - return (EINVAL); - if (apic_id == APIC_ID_ALL) { - lvt = &lvts[pin]; - if (bootverbose) - printf("lapic:"); - } else { - KASSERT(lapics[apic_id].la_present, - ("%s: missing APIC %u", __func__, apic_id)); - lvt = &lapics[apic_id].la_lvts[pin]; - lvt->lvt_active = 1; - if (bootverbose) - printf("lapic%u:", apic_id); - } - lvt->lvt_mode = mode; - switch (mode) { - case APIC_LVT_DM_NMI: - case APIC_LVT_DM_SMI: - case APIC_LVT_DM_INIT: - case APIC_LVT_DM_EXTINT: - lvt->lvt_edgetrigger = 1; - lvt->lvt_activehi = 1; - if (mode == APIC_LVT_DM_EXTINT) - lvt->lvt_masked = 1; - else - lvt->lvt_masked = 0; - break; - default: - panic("Unsupported delivery mode: 0x%x\n", mode); - } - if (bootverbose) { - printf(" Routing "); - switch (mode) { - case APIC_LVT_DM_NMI: - printf("NMI"); - break; - case APIC_LVT_DM_SMI: - printf("SMI"); - break; - case APIC_LVT_DM_INIT: - printf("INIT"); - break; - case APIC_LVT_DM_EXTINT: - printf("ExtINT"); - break; - } - printf(" -> LINT%u\n", pin); - } - return (0); -} - -int -lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) -{ - - if (pin > LVT_MAX || pol == INTR_POLARITY_CONFORM) - return (EINVAL); - if (apic_id == APIC_ID_ALL) { - lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); - if (bootverbose) - printf("lapic:"); - } else { - KASSERT(lapics[apic_id].la_present, - ("%s: missing APIC %u", __func__, apic_id)); - lapics[apic_id].la_lvts[pin].lvt_active = 1; - lapics[apic_id].la_lvts[pin].lvt_activehi = - (pol == INTR_POLARITY_HIGH); - if (bootverbose) - printf("lapic%u:", apic_id); - } - if (bootverbose) - printf(" LINT%u polarity: active-%s\n", pin, - pol == INTR_POLARITY_HIGH ? "high" : "low"); - return (0); -} - -int -lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger) -{ - - if (pin > LVT_MAX || trigger == INTR_TRIGGER_CONFORM) - return (EINVAL); - if (apic_id == APIC_ID_ALL) { - lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); - if (bootverbose) - printf("lapic:"); - } else { - KASSERT(lapics[apic_id].la_present, - ("%s: missing APIC %u", __func__, apic_id)); - lapics[apic_id].la_lvts[pin].lvt_edgetrigger = - (trigger == INTR_TRIGGER_EDGE); - lapics[apic_id].la_lvts[pin].lvt_active = 1; - if (bootverbose) - printf("lapic%u:", apic_id); - } - if (bootverbose) - printf(" LINT%u trigger: %s\n", pin, - trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); - return (0); -} - -void -lapic_eoi(void) -{ - - lapic->eoi = 0; -} - -void -lapic_handle_intr(struct intrframe frame) -{ - struct intsrc *isrc; - - if (frame.if_vec == -1) - panic("Couldn't get vector from ISR!"); - isrc = intr_lookup_source(apic_idt_to_irq(frame.if_vec)); - intr_execute_handlers(isrc, &frame); -} - -/* Translate between IDT vectors and IRQ vectors. */ -u_int -apic_irq_to_idt(u_int irq) -{ - u_int vector; - - KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); - vector = irq + APIC_IO_INTS; - if (vector >= IDT_SYSCALL) - vector++; - return (vector); -} - -u_int -apic_idt_to_irq(u_int vector) -{ - - KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && - vector <= APIC_IO_INTS + NUM_IO_INTS, - ("Vector %u does not map to an IRQ line", vector)); - if (vector > IDT_SYSCALL) - vector--; - return (vector - APIC_IO_INTS); -} - -/* - * APIC probing support code. This includes code to manage enumerators. - */ - -static SLIST_HEAD(, apic_enumerator) enumerators = - SLIST_HEAD_INITIALIZER(enumerators); -static struct apic_enumerator *best_enum; - -void -apic_register_enumerator(struct apic_enumerator *enumerator) -{ -#ifdef INVARIANTS - struct apic_enumerator *apic_enum; - - SLIST_FOREACH(apic_enum, &enumerators, apic_next) { - if (apic_enum == enumerator) - panic("%s: Duplicate register of %s", __func__, - enumerator->apic_name); - } -#endif - SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); -} - -/* - * Probe the APIC enumerators, enumerate CPUs, and initialize the - * local APIC. - */ -static void -apic_init(void *dummy __unused) -{ - uint64_t apic_base; - int retval, best; - - /* We only support built in local APICs. */ - if (!(cpu_feature & CPUID_APIC)) - return; - - /* Don't probe if APIC mode is disabled. */ - if (resource_disabled("apic", 0)) - return; - - /* First, probe all the enumerators to find the best match. */ - best_enum = NULL; - best = 0; -#ifndef XEN - SLIST_FOREACH(enumerator, &enumerators, apic_next) { - retval = enumerator->apic_probe(); - if (retval > 0) - continue; - if (best_enum == NULL || best < retval) { - best_enum = enumerator; - best = retval; - } - } -#endif - if (best_enum == NULL) { - if (bootverbose) - printf("APIC: Could not find any APICs.\n"); - return; - } - - if (bootverbose) - printf("APIC: Using the %s enumerator.\n", - best_enum->apic_name); - - /* - * To work around an errata, we disable the local APIC on some - * CPUs during early startup. We need to turn the local APIC back - * on on such CPUs now. - */ - if (cpu == CPU_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 && - (cpu_id & 0xff0) == 0x610) { - apic_base = rdmsr(MSR_APICBASE); - apic_base |= APICBASE_ENABLED; - wrmsr(MSR_APICBASE, apic_base); - } - - /* Second, probe the CPU's in the system. */ - retval = best_enum->apic_probe_cpus(); - if (retval != 0) - printf("%s: Failed to probe CPUs: returned %d\n", - best_enum->apic_name, retval); - - /* Third, initialize the local APIC. */ - retval = best_enum->apic_setup_local(); - if (retval != 0) - printf("%s: Failed to setup the local APIC: returned %d\n", - best_enum->apic_name, retval); -#ifdef SMP - /* Last, setup the cpu topology now that we have probed CPUs */ - mp_topology(); -#endif -} -SYSINIT(apic_init, SI_SUB_CPU, SI_ORDER_FIRST, apic_init, NULL) - -/* - * Setup the I/O APICs. - */ -static void -apic_setup_io(void *dummy __unused) -{ - int retval; - - if (best_enum == NULL) - return; - retval = best_enum->apic_setup_io(); - if (retval != 0) - printf("%s: Failed to setup I/O APICs: returned %d\n", - best_enum->apic_name, retval); - - /* - * Finish setting up the local APIC on the BSP once we know how to - * properly program the LINT pins. - */ - lapic_setup(); - if (bootverbose) - lapic_dump("BSP"); -} -SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL) - -#ifdef SMP -/* - * Inter Processor Interrupt functions. The lapic_ipi_*() functions are - * private the sys/i386 code. The public interface for the rest of the - * kernel is defined in mp_machdep.c. - */ - -int -lapic_ipi_wait(int delay) -{ - int x, incr; - - /* - * Wait delay loops for IPI to be sent. This is highly bogus - * since this is sensitive to CPU clock speed. If delay is - * -1, we wait forever. - */ - if (delay == -1) { - incr = 0; - delay = 1; - } else - incr = 1; - for (x = 0; x < delay; x += incr) { - if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE) - return (1); - ia32_pause(); - } - return (0); -} - -void -lapic_ipi_raw(register_t icrlo, u_int dest) -{ - register_t value, eflags; - - /* XXX: Need more sanity checking of icrlo? */ - KASSERT(lapic != NULL, ("%s called too early", __func__)); - KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, - ("%s: invalid dest field", __func__)); - KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, - ("%s: reserved bits set in ICR LO register", __func__)); - - /* Set destination in ICR HI register if it is being used. */ - eflags = intr_disable(); - if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { - value = lapic->icr_hi; - value &= ~APIC_ID_MASK; - value |= dest << APIC_ID_SHIFT; - lapic->icr_hi = value; - } - - /* Program the contents of the IPI and dispatch it. */ - value = lapic->icr_lo; - value &= APIC_ICRLO_RESV_MASK; - value |= icrlo; - lapic->icr_lo = value; - intr_restore(eflags); -} - -#define BEFORE_SPIN 1000000 -#ifdef DETECT_DEADLOCK -#define AFTER_SPIN 1000 -#endif - -void -lapic_ipi_vectored(u_int vector, int dest) -{ - register_t icrlo, destfield; - - KASSERT((vector & ~APIC_VECTOR_MASK) == 0, - ("%s: invalid vector %d", __func__, vector)); - - icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY | - APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE; - destfield = 0; - switch (dest) { - case APIC_IPI_DEST_SELF: - icrlo |= APIC_DEST_SELF; - break; - case APIC_IPI_DEST_ALL: - icrlo |= APIC_DEST_ALLISELF; - break; - case APIC_IPI_DEST_OTHERS: - icrlo |= APIC_DEST_ALLESELF; - break; - default: - KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, - ("%s: invalid destination 0x%x", __func__, dest)); - destfield = dest; - } - - /* Wait for an earlier IPI to finish. */ - if (!lapic_ipi_wait(BEFORE_SPIN)) - panic("APIC: Previous IPI is stuck"); - - lapic_ipi_raw(icrlo, destfield); - -#ifdef DETECT_DEADLOCK - /* Wait for IPI to be delivered. */ - if (!lapic_ipi_wait(AFTER_SPIN)) { -#ifdef needsattention - /* - * XXX FIXME: - * - * The above function waits for the message to actually be - * delivered. It breaks out after an arbitrary timeout - * since the message should eventually be delivered (at - * least in theory) and that if it wasn't we would catch - * the failure with the check above when the next IPI is - * sent. - * - * We could skiip this wait entirely, EXCEPT it probably - * protects us from other routines that assume that the - * message was delivered and acted upon when this function - * returns. - */ - printf("APIC: IPI might be stuck\n"); -#else /* !needsattention */ - /* Wait until mesage is sent without a timeout. */ - while (lapic->icr_lo & APIC_DELSTAT_PEND) - ia32_pause(); -#endif /* needsattention */ - } -#endif /* DETECT_DEADLOCK */ -} -#endif /* SMP */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,949 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 - * $FreeBSD: src/sys/i386/i386/locore.s,v 1.181 2003/11/03 21:53:37 jhb Exp $ - * - * originally from: locore.s, by William F. Jolitz - * - * Substantially rewritten by David Greenman, Rod Grimes, - * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp - * and many others. - */ - -#include "opt_bootp.h" -#include "opt_compat.h" -#include "opt_nfsroot.h" -#include "opt_pmap.h" - -#include <sys/syscall.h> -#include <sys/reboot.h> - -#include <machine/asmacros.h> -#include <machine/cputypes.h> -#include <machine/psl.h> -#include <machine/pmap.h> -#include <machine/specialreg.h> - -#include "assym.s" - -.section __xen_guest - .asciz "LOADER=generic,GUEST_VER=5.3,XEN_VER=3.0,BSD_SYMTAB" - - -/* - * XXX - * - * Note: This version greatly munged to avoid various assembler errors - * that may be fixed in newer versions of gas. Perhaps newer versions - * will have more pleasant appearance. - */ - -/* - * PTmap is recursive pagemap at top of virtual address space. - * Within PTmap, the page directory can be found (third indirection). - */ - .globl PTmap,PTD,PTDpde - .set PTmap,(PTDPTDI << PDRSHIFT) - .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) - .set PTDpde,PTD + (PTDPTDI * PDESIZE) - -#ifdef SMP -/* - * Define layout of per-cpu address space. - * This is "constructed" in locore.s on the BSP and in mp_machdep.c - * for each AP. DO NOT REORDER THESE WITHOUT UPDATING THE REST! - */ - .globl SMP_prvspace - .set SMP_prvspace,(MPPTDI << PDRSHIFT) -#endif /* SMP */ - -/* - * Compiled KERNBASE location and the kernel load address - */ - .globl kernbase - .set kernbase,KERNBASE - .globl kernload - .set kernload,KERNLOAD - -/* - * Globals - */ - .data - ALIGN_DATA /* just to be sure */ - - .space 0x2000 /* space for tmpstk - temporary stack */ -tmpstk: - - .globl bootinfo -bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ - - .globl KERNend -KERNend: .long 0 /* phys addr end of kernel (just after bss) */ -physfree: .long 0 /* phys addr of next free page */ - -#ifdef SMP - .globl cpu0prvpage -cpu0pp: .long 0 /* phys addr cpu0 private pg */ -cpu0prvpage: .long 0 /* relocated version */ - - .globl SMPpt -SMPptpa: .long 0 /* phys addr SMP page table */ -SMPpt: .long 0 /* relocated version */ -#endif /* SMP */ - - .globl IdlePTD -IdlePTD: .long 0 /* phys addr of kernel PTD */ - - - .globl KPTphys -KPTphys: .long 0 /* phys addr of kernel page tables */ - - .globl proc0uarea, proc0kstack -proc0uarea: .long 0 /* address of proc 0 uarea space */ -proc0kstack: .long 0 /* address of proc 0 kstack space */ -p0upa: .long 0 /* phys addr of proc0's UAREA */ -p0kpa: .long 0 /* phys addr of proc0's STACK */ - -#ifdef PC98 - .globl pc98_system_parameter -pc98_system_parameter: - .space 0x240 -#endif - -/********************************************************************** - * - * Some handy macros - * - */ - -#define R(foo) ((foo)) - -#define ALLOCPAGES(foo) \ - movl R(physfree), %esi ; \ - movl $((foo)*PAGE_SIZE), %eax ; \ - addl %esi, %eax ; \ - movl %eax, R(physfree) ; \ - movl %esi, %edi ; \ - movl $((foo)*PAGE_SIZE),%ecx ; \ - xorl %eax,%eax ; \ - cld ; \ - rep ; \ - stosb - -/* - * fillkpt - * eax = page frame address - * ebx = index into page table - * ecx = how many pages to map - * base = base address of page dir/table - * prot = protection bits - */ -#define fillkpt(base, prot) \ - shll $PTESHIFT,%ebx ; \ - addl base,%ebx ; \ - orl $PG_V,%eax ; \ - orl prot,%eax ; \ -1: movl %eax,(%ebx) ; \ - addl $PAGE_SIZE,%eax ; /* increment physical address */ \ - addl $PTESIZE,%ebx ; /* next pte */ \ - loop 1b - -/* - * fillkptphys(prot) - * eax = physical address - * ecx = how many pages to map - * prot = protection bits - */ -#define fillkptphys(prot) \ - movl %eax, %ebx ; \ - shrl $PAGE_SHIFT, %ebx ; \ - fillkpt(R(KPTphys), prot) - - .text -/********************************************************************** - * - * This is where the bootblocks start us, set the ball rolling... - * - */ -NON_GPROF_ENTRY(btext) - pushl %esi - call initvalues - popl %esi - call identify_cpu - movl proc0kstack,%eax - leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp - xorl %ebp,%ebp /* mark end of frames */ - movl IdlePTD,%esi - movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) - call init386 - call mi_startup - int $3 - - -#ifdef PC98 - /* save SYSTEM PARAMETER for resume (NS/T or other) */ - movl $0xa1400,%esi - movl $R(pc98_system_parameter),%edi - movl $0x0240,%ecx - cld - rep - movsb -#else /* IBM-PC */ -/* Tell the bios to warmboot next time */ - movw $0x1234,0x472 -#endif /* PC98 */ - -/* Set up a real frame in case the double return in newboot is executed. */ - pushl %ebp - movl %esp, %ebp - -/* Don't trust what the BIOS gives for eflags. */ - pushl $PSL_KERNEL - popfl - -/* - * Don't trust what the BIOS gives for %fs and %gs. Trust the bootstrap - * to set %cs, %ds, %es and %ss. - */ - mov %ds, %ax - mov %ax, %fs - mov %ax, %gs - -/* - * Clear the bss. Not all boot programs do it, and it is our job anyway. - * - * XXX we don't check that there is memory for our bss and page tables - * before using it. - * - * Note: we must be careful to not overwrite an active gdt or idt. They - * inactive from now until we switch to new ones, since we don't load any - * more segment registers or permit interrupts until after the switch. - */ - movl $R(end),%ecx - movl $R(edata),%edi - subl %edi,%ecx - xorl %eax,%eax - cld - rep - stosb - - call recover_bootinfo - -/* Get onto a stack that we can trust. */ -/* - * XXX this step is delayed in case recover_bootinfo needs to return via - * the old stack, but it need not be, since recover_bootinfo actually - * returns via the old frame. - */ - movl $R(tmpstk),%esp - -#ifdef PC98 - /* pc98_machine_type & M_EPSON_PC98 */ - testb $0x02,R(pc98_system_parameter)+220 - jz 3f - /* epson_machine_id <= 0x0b */ - cmpb $0x0b,R(pc98_system_parameter)+224 - ja 3f - - /* count up memory */ - movl $0x100000,%eax /* next, talley remaining memory */ - movl $0xFFF-0x100,%ecx -1: movl 0(%eax),%ebx /* save location to check */ - movl $0xa55a5aa5,0(%eax) /* write test pattern */ - cmpl $0xa55a5aa5,0(%eax) /* does not check yet for rollover */ - jne 2f - movl %ebx,0(%eax) /* restore memory */ - addl $PAGE_SIZE,%eax - loop 1b -2: subl $0x100000,%eax - shrl $17,%eax - movb %al,R(pc98_system_parameter)+1 -3: - - movw R(pc98_system_parameter+0x86),%ax - movw %ax,R(cpu_id) -#endif - - call identify_cpu - call create_pagetables - -/* - * If the CPU has support for VME, turn it on. - */ - testl $CPUID_VME, R(cpu_feature) - jz 1f - movl %cr4, %eax - orl $CR4_VME, %eax - movl %eax, %cr4 -1: - -/* Now enable paging */ - movl R(IdlePTD), %eax - movl %eax,%cr3 /* load ptd addr into mmu */ - movl %cr0,%eax /* get control word */ - orl $CR0_PE|CR0_PG,%eax /* enable paging */ - movl %eax,%cr0 /* and let's page NOW! */ - - pushl $begin /* jump to high virtualized address */ - ret - -/* now running relocated at KERNBASE where the system is linked to run */ -begin: - /* set up bootstrap stack */ - movl proc0kstack,%eax /* location of in-kernel stack */ - /* bootstrap stack end location */ - leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp - - xorl %ebp,%ebp /* mark end of frames */ - -#ifdef PAE - movl IdlePDPT,%esi -#else - movl IdlePTD,%esi -#endif - movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) - - pushl physfree /* value of first for init386(first) */ - call init386 /* wire 386 chip for unix operation */ - - /* - * Clean up the stack in a way that db_numargs() understands, so - * that backtraces in ddb don't underrun the stack. Traps for - * inaccessible memory are more fatal than usual this early. - */ - addl $4,%esp - - call mi_startup /* autoconfiguration, mountroot etc */ - /* NOTREACHED */ - addl $0,%esp /* for db_numargs() again */ - -/* - * Signal trampoline, copied to top of user stack - */ -NON_GPROF_ENTRY(sigcode) - calll *SIGF_HANDLER(%esp) - leal SIGF_UC(%esp),%eax /* get ucontext */ - pushl %eax - testl $PSL_VM,UC_EFLAGS(%eax) - jne 1f - movl UC_GS(%eax),%gs /* restore %gs */ -1: - movl $SYS_sigreturn,%eax - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ - /* on stack */ -1: - jmp 1b - -#ifdef COMPAT_FREEBSD4 - ALIGN_TEXT -freebsd4_sigcode: - calll *SIGF_HANDLER(%esp) - leal SIGF_UC4(%esp),%eax /* get ucontext */ - pushl %eax - testl $PSL_VM,UC4_EFLAGS(%eax) - jne 1f - movl UC4_GS(%eax),%gs /* restore %gs */ -1: - movl $344,%eax /* 4.x SYS_sigreturn */ - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ - /* on stack */ -1: - jmp 1b -#endif - -#ifdef COMPAT_43 - ALIGN_TEXT -osigcode: - call *SIGF_HANDLER(%esp) /* call signal handler */ - lea SIGF_SC(%esp),%eax /* get sigcontext */ - pushl %eax - testl $PSL_VM,SC_PS(%eax) - jne 9f - movl SC_GS(%eax),%gs /* restore %gs */ -9: - movl $103,%eax /* 3.x SYS_sigreturn */ - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ -0: jmp 0b -#endif /* COMPAT_43 */ - - ALIGN_TEXT -esigcode: - - .data - .globl szsigcode -szsigcode: - .long esigcode-sigcode -#ifdef COMPAT_FREEBSD4 - .globl szfreebsd4_sigcode -szfreebsd4_sigcode: - .long esigcode-freebsd4_sigcode -#endif -#ifdef COMPAT_43 - .globl szosigcode -szosigcode: - .long esigcode-osigcode -#endif - .text - -/********************************************************************** - * - * Recover the bootinfo passed to us from the boot program - * - */ -recover_bootinfo: - /* - * This code is called in different ways depending on what loaded - * and started the kernel. This is used to detect how we get the - * arguments from the other code and what we do with them. - * - * Old disk boot blocks: - * (*btext)(howto, bootdev, cyloffset, esym); - * [return address == 0, and can NOT be returned to] - * [cyloffset was not supported by the FreeBSD boot code - * and always passed in as 0] - * [esym is also known as total in the boot code, and - * was never properly supported by the FreeBSD boot code] - * - * Old diskless netboot code: - * (*btext)(0,0,0,0,&nfsdiskless,0,0,0); - * [return address != 0, and can NOT be returned to] - * If we are being booted by this code it will NOT work, - * so we are just going to halt if we find this case. - * - * New uniform boot code: - * (*btext)(howto, bootdev, 0, 0, 0, &bootinfo) - * [return address != 0, and can be returned to] - * - * There may seem to be a lot of wasted arguments in here, but - * that is so the newer boot code can still load very old kernels - * and old boot code can load new kernels. - */ - - /* - * The old style disk boot blocks fake a frame on the stack and - * did an lret to get here. The frame on the stack has a return - * address of 0. - */ - cmpl $0,4(%ebp) - je olddiskboot - - /* - * We have some form of return address, so this is either the - * old diskless netboot code, or the new uniform code. That can - * be detected by looking at the 5th argument, if it is 0 - * we are being booted by the new uniform boot code. - */ - cmpl $0,24(%ebp) - je newboot - - /* - * Seems we have been loaded by the old diskless boot code, we - * don't stand a chance of running as the diskless structure - * changed considerably between the two, so just halt. - */ - hlt - - /* - * We have been loaded by the new uniform boot code. - * Let's check the bootinfo version, and if we do not understand - * it we return to the loader with a status of 1 to indicate this error - */ -newboot: - movl 28(%ebp),%ebx /* &bootinfo.version */ - movl BI_VERSION(%ebx),%eax - cmpl $1,%eax /* We only understand version 1 */ - je 1f - movl $1,%eax /* Return status */ - leave - /* - * XXX this returns to our caller's caller (as is required) since - * we didn't set up a frame and our caller did. - */ - ret - -1: - /* - * If we have a kernelname copy it in - */ - movl BI_KERNELNAME(%ebx),%esi - cmpl $0,%esi - je 2f /* No kernelname */ - movl $MAXPATHLEN,%ecx /* Brute force!!! */ - movl $R(kernelname),%edi - cmpb $'/',(%esi) /* Make sure it starts with a slash */ - je 1f - movb $'/',(%edi) - incl %edi - decl %ecx -1: - cld - rep - movsb - -2: - /* - * Determine the size of the boot loader's copy of the bootinfo - * struct. This is impossible to do properly because old versions - * of the struct don't contain a size field and there are 2 old - * versions with the same version number. - */ - movl $BI_ENDCOMMON,%ecx /* prepare for sizeless version */ - testl $RB_BOOTINFO,8(%ebp) /* bi_size (and bootinfo) valid? */ - je got_bi_size /* no, sizeless version */ - movl BI_SIZE(%ebx),%ecx -got_bi_size: - - /* - * Copy the common part of the bootinfo struct - */ - movl %ebx,%esi - movl $R(bootinfo),%edi - cmpl $BOOTINFO_SIZE,%ecx - jbe got_common_bi_size - movl $BOOTINFO_SIZE,%ecx -got_common_bi_size: - cld - rep - movsb - -#ifdef NFS_ROOT -#ifndef BOOTP_NFSV3 - /* - * If we have a nfs_diskless structure copy it in - */ - movl BI_NFS_DISKLESS(%ebx),%esi - cmpl $0,%esi - je olddiskboot - movl $R(nfs_diskless),%edi - movl $NFSDISKLESS_SIZE,%ecx - cld - rep - movsb - movl $R(nfs_diskless_valid),%edi - movl $1,(%edi) -#endif -#endif - - /* - * The old style disk boot. - * (*btext)(howto, bootdev, cyloffset, esym); - * Note that the newer boot code just falls into here to pick - * up howto and bootdev, cyloffset and esym are no longer used - */ -olddiskboot: - movl 8(%ebp),%eax - movl %eax,R(boothowto) - movl 12(%ebp),%eax - movl %eax,R(bootdev) - - ret - - -/********************************************************************** - * - * Identify the CPU and initialize anything special about it - * - */ -identify_cpu: - - /* Try to toggle alignment check flag ; does not exist on 386. */ - pushfl - popl %eax - movl %eax,%ecx - orl $PSL_AC,%eax - pushl %eax - popfl - pushfl - popl %eax - xorl %ecx,%eax - andl $PSL_AC,%eax - pushl %ecx - popfl - - testl %eax,%eax - jnz try486 - - /* NexGen CPU does not have aligment check flag. */ - pushfl - movl $0x5555, %eax - xorl %edx, %edx - movl $2, %ecx - clc - divl %ecx - jz trynexgen - popfl - movl $CPU_386,R(cpu) - jmp 3f - -trynexgen: - popfl - movl $CPU_NX586,R(cpu) - movl $0x4778654e,R(cpu_vendor) # store vendor string - movl $0x72446e65,R(cpu_vendor+4) - movl $0x6e657669,R(cpu_vendor+8) - movl $0,R(cpu_vendor+12) - jmp 3f - -try486: /* Try to toggle identification flag ; does not exist on early 486s. */ - pushfl - popl %eax - movl %eax,%ecx - xorl $PSL_ID,%eax - pushl %eax - popfl - pushfl - popl %eax - xorl %ecx,%eax - andl $PSL_ID,%eax - pushl %ecx - popfl - - testl %eax,%eax - jnz trycpuid - movl $CPU_486,R(cpu) - - /* - * Check Cyrix CPU - * Cyrix CPUs do not change the undefined flags following - * execution of the divide instruction which divides 5 by 2. - * - * Note: CPUID is enabled on M2, so it passes another way. - */ - pushfl - movl $0x5555, %eax - xorl %edx, %edx - movl $2, %ecx - clc - divl %ecx - jnc trycyrix - popfl - jmp 3f /* You may use Intel CPU. */ - -trycyrix: - popfl - /* - * IBM Bluelighting CPU also doesn't change the undefined flags. - * Because IBM doesn't disclose the information for Bluelighting - * CPU, we couldn't distinguish it from Cyrix's (including IBM - * brand of Cyrix CPUs). - */ - movl $0x69727943,R(cpu_vendor) # store vendor string - movl $0x736e4978,R(cpu_vendor+4) - movl $0x64616574,R(cpu_vendor+8) - jmp 3f - -trycpuid: /* Use the `cpuid' instruction. */ - xorl %eax,%eax - cpuid # cpuid 0 - movl %eax,R(cpu_high) # highest capability - movl %ebx,R(cpu_vendor) # store vendor string - movl %edx,R(cpu_vendor+4) - movl %ecx,R(cpu_vendor+8) - movb $0,R(cpu_vendor+12) - - movl $1,%eax - cpuid # cpuid 1 - movl %eax,R(cpu_id) # store cpu_id - movl %ebx,R(cpu_procinfo) # store cpu_procinfo - movl %edx,R(cpu_feature) # store cpu_feature - rorl $8,%eax # extract family type - andl $15,%eax - cmpl $5,%eax - jae 1f - - /* less than Pentium ; must be 486 */ - movl $CPU_486,R(cpu) - jmp 3f -1: - /* a Pentium? */ - cmpl $5,%eax - jne 2f - movl $CPU_586,R(cpu) - jmp 3f -2: - /* Greater than Pentium...call it a Pentium Pro */ - movl $CPU_686,R(cpu) -3: - ret - -/********************************************************************** - * - * Create the first page directory and its page tables. - * - */ - -create_pagetables: - -/* Find end of kernel image (rounded up to a page boundary). */ - movl $R(_end),%esi - -/* Include symbols, if any. */ - movl R(bootinfo+BI_ESYMTAB),%edi - testl %edi,%edi - je over_symalloc - movl %edi,%esi - movl $KERNBASE,%edi - addl %edi,R(bootinfo+BI_SYMTAB) - addl %edi,R(bootinfo+BI_ESYMTAB) -over_symalloc: - -/* If we are told where the end of the kernel space is, believe it. */ - movl R(bootinfo+BI_KERNEND),%edi - testl %edi,%edi - je no_kernend - movl %edi,%esi -no_kernend: - - addl $PDRMASK,%esi /* Play conservative for now, and */ - andl $~PDRMASK,%esi /* ... wrap to next 4M. */ - movl %esi,R(KERNend) /* save end of kernel */ - movl %esi,R(physfree) /* next free page is at end of kernel */ - -/* Allocate Kernel Page Tables */ - ALLOCPAGES(NKPT) - movl %esi,R(KPTphys) - -/* Allocate Page Table Directory */ -#ifdef PAE - /* XXX only need 32 bytes (easier for now) */ - ALLOCPAGES(1) - movl %esi,R(IdlePDPT) -#endif - ALLOCPAGES(NPGPTD) - movl %esi,R(IdlePTD) - -/* Allocate UPAGES */ - ALLOCPAGES(UAREA_PAGES) - movl %esi,R(p0upa) - addl $KERNBASE, %esi - movl %esi, R(proc0uarea) - - ALLOCPAGES(KSTACK_PAGES) - movl %esi,R(p0kpa) - addl $KERNBASE, %esi - movl %esi, R(proc0kstack) -#if 0 - ALLOCPAGES(1) /* vm86/bios stack */ - movl %esi,R(vm86phystk) - - ALLOCPAGES(3) /* pgtable + ext + IOPAGES */ - movl %esi,R(vm86pa) - addl $KERNBASE, %esi - movl %esi, R(vm86paddr) -#endif -#ifdef SMP -/* Allocate cpu0's private data page */ - ALLOCPAGES(1) - movl %esi,R(cpu0pp) - addl $KERNBASE, %esi - movl %esi, R(cpu0prvpage) /* relocated to KVM space */ - -/* Allocate SMP page table page */ - ALLOCPAGES(1) - movl %esi,R(SMPptpa) - addl $KERNBASE, %esi - movl %esi, R(SMPpt) /* relocated to KVM space */ -#endif /* SMP */ - -/* Map page zero read-write so bios32 calls can use it */ - xorl %eax, %eax - movl $PG_RW,%edx - movl $1,%ecx - fillkptphys(%edx) - -/* Map read-only from page 1 to the beginning of the kernel text section */ - movl $PAGE_SIZE, %eax - xorl %edx,%edx - movl $R(btext),%ecx - addl $PAGE_MASK,%ecx - subl %eax,%ecx - shrl $PAGE_SHIFT,%ecx - fillkptphys(%edx) - -/* - * Enable PSE and PGE. - */ -#ifndef DISABLE_PSE - testl $CPUID_PSE, R(cpu_feature) - jz 1f - movl $PG_PS, R(pseflag) - movl %cr4, %eax - orl $CR4_PSE, %eax - movl %eax, %cr4 -1: -#endif -#ifndef DISABLE_PG_G - testl $CPUID_PGE, R(cpu_feature) - jz 2f - movl $PG_G, R(pgeflag) - movl %cr4, %eax - orl $CR4_PGE, %eax - movl %eax, %cr4 -2: -#endif - -/* - * Write page tables for the kernel starting at btext and - * until the end. Make sure to map read+write. We do this even - * if we've enabled PSE above, we'll just switch the corresponding kernel - * PDEs before we turn on paging. - * - * XXX: We waste some pages here in the PSE case! DON'T BLINDLY REMOVE - * THIS! SMP needs the page table to be there to map the kernel P==V. - */ - movl $R(btext),%eax - addl $PAGE_MASK, %eax - andl $~PAGE_MASK, %eax - movl $PG_RW,%edx - movl R(KERNend),%ecx - subl %eax,%ecx - shrl $PAGE_SHIFT,%ecx - fillkptphys(%edx) - -/* Map page directory. */ - movl R(IdlePTD), %eax - movl $NPGPTD, %ecx - fillkptphys($PG_RW) - -/* Map proc0's UPAGES in the physical way ... */ - movl R(p0upa), %eax - movl $(UAREA_PAGES), %ecx - fillkptphys($PG_RW) - -/* Map proc0's KSTACK in the physical way ... */ - movl R(p0kpa), %eax - movl $(KSTACK_PAGES), %ecx - fillkptphys($PG_RW) - -/* Map ISA hole */ - movl $ISA_HOLE_START, %eax - movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx - fillkptphys($PG_RW) -#if 0 -/* Map space for the vm86 region */ - movl R(vm86phystk), %eax - movl $4, %ecx - fillkptphys($PG_RW) - -/* Map page 0 into the vm86 page table */ - movl $0, %eax - movl $0, %ebx - movl $1, %ecx - fillkpt(R(vm86pa), $PG_RW|PG_U) - -/* ...likewise for the ISA hole */ - movl $ISA_HOLE_START, %eax - movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx - movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx - fillkpt(R(vm86pa), $PG_RW|PG_U) -#endif -#ifdef SMP -/* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */ - movl R(cpu0pp), %eax - movl $1, %ecx - fillkptphys($PG_RW) - -/* Map SMP page table page into global kmem FWIW */ - movl R(SMPptpa), %eax - movl $1, %ecx - fillkptphys($PG_RW) - -/* Map the private page into the SMP page table */ - movl R(cpu0pp), %eax - movl $0, %ebx /* pte offset = 0 */ - movl $1, %ecx /* one private page coming right up */ - fillkpt(R(SMPptpa), $PG_RW) - -/* ... and put the page table table in the pde. */ - movl R(SMPptpa), %eax - movl $MPPTDI, %ebx - movl $1, %ecx - fillkpt(R(IdlePTD), $PG_RW) - -/* Fakeup VA for the local apic to allow early traps. */ - ALLOCPAGES(1) - movl %esi, %eax - movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */ - movl $1, %ecx /* one private pt coming right up */ - fillkpt(R(SMPptpa), $PG_RW) -#endif /* SMP */ - -/* install a pde for temporary double map of bottom of VA */ - movl R(KPTphys), %eax - xorl %ebx, %ebx - movl $NKPT, %ecx - fillkpt(R(IdlePTD), $PG_RW) - -/* - * For the non-PSE case, install PDEs for PTs covering the kernel. - * For the PSE case, do the same, but clobber the ones corresponding - * to the kernel (from btext to KERNend) with 4M ('PS') PDEs immediately - * after. - */ - movl R(KPTphys), %eax - movl $KPTDI, %ebx - movl $NKPT, %ecx - fillkpt(R(IdlePTD), $PG_RW) - cmpl $0,R(pseflag) - je done_pde - - movl R(KERNend), %ecx - movl $KERNLOAD, %eax - subl %eax, %ecx - shrl $PDRSHIFT, %ecx - movl $(KPTDI+(KERNLOAD/(1 << PDRSHIFT))), %ebx - shll $PDESHIFT, %ebx - addl R(IdlePTD), %ebx - orl $(PG_V|PG_RW|PG_PS), %eax -1: movl %eax, (%ebx) - addl $(1 << PDRSHIFT), %eax - addl $PDESIZE, %ebx - loop 1b - -done_pde: -/* install a pde recursively mapping page directory as a page table */ - movl R(IdlePTD), %eax - movl $PTDPTDI, %ebx - movl $NPGPTD,%ecx - fillkpt(R(IdlePTD), $PG_RW) - -#ifdef PAE - movl R(IdlePTD), %eax - xorl %ebx, %ebx - movl $NPGPTD, %ecx - fillkpt(R(IdlePDPT), $0x0) -#endif - - ret diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,2466 +0,0 @@ -/*- - * Copyright (c) 1992 Terrence R. Lambert. - * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.584 2003/12/03 21:12:09 jhb Exp $"); - -#include "opt_apic.h" -#include "opt_atalk.h" -#include "opt_compat.h" -#include "opt_cpu.h" -#include "opt_ddb.h" -#include "opt_inet.h" -#include "opt_ipx.h" -#include "opt_isa.h" -#include "opt_kstack_pages.h" -#include "opt_maxmem.h" -#include "opt_msgbuf.h" -#include "opt_npx.h" -#include "opt_perfmon.h" -#include "opt_xen.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/sysproto.h> -#include <sys/signalvar.h> -#include <sys/imgact.h> -#include <sys/kdb.h> -#include <sys/kernel.h> -#include <sys/ktr.h> -#include <sys/linker.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/memrange.h> -#include <sys/mutex.h> -#include <sys/pcpu.h> -#include <sys/proc.h> -#include <sys/bio.h> -#include <sys/buf.h> -#include <sys/reboot.h> -#include <sys/callout.h> -#include <sys/msgbuf.h> -#include <sys/sched.h> -#include <sys/sysent.h> -#include <sys/sysctl.h> -#include <sys/smp.h> -#include <sys/ucontext.h> -#include <sys/vmmeter.h> -#include <sys/bus.h> -#include <sys/eventhandler.h> - -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/vm_kern.h> -#include <vm/vm_object.h> -#include <vm/vm_page.h> -#include <vm/vm_map.h> -#include <vm/vm_pager.h> -#include <vm/vm_extern.h> - -#include <sys/user.h> -#include <sys/exec.h> -#include <sys/cons.h> - -#ifdef DDB -#ifndef KDB -#error KDB must be enabled in order for DDB to work! -#endif -#include <ddb/ddb.h> -#include <ddb/db_sym.h> -#endif - -#include <net/netisr.h> - -#include <machine/cpu.h> -#include <machine/cputypes.h> -#include <machine/reg.h> -#include <machine/clock.h> -#include <machine/specialreg.h> -#include <machine/bootinfo.h> -#include <machine/intr_machdep.h> -#include <machine/md_var.h> -#include <machine/pc/bios.h> -#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */ -#include <machine/proc.h> -#ifdef PERFMON -#include <machine/perfmon.h> -#endif -#ifdef SMP -#include <machine/privatespace.h> -#include <machine/smp.h> -#endif - -#ifdef DEV_ISA -#include <i386/isa/icu.h> -#endif - -#include <isa/rtc.h> -#include <sys/ptrace.h> -#include <machine/sigframe.h> - - -/* XEN includes */ -#include <machine/hypervisor-ifs.h> -#include <machine/xen-os.h> -#include <machine/hypervisor.h> -#include <machine/xenfunc.h> -#include <machine/xenvar.h> -#include <machine/xen_intr.h> - -void Xhypervisor_callback(void); -void failsafe_callback(void); - -/***************/ - - -/* Sanity check for __curthread() */ -CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); - -extern void init386(void); -extern void dblfault_handler(void); - -extern void printcpuinfo(void); /* XXX header file */ -extern void finishidentcpu(void); -extern void panicifcpuunsupported(void); -extern void initializecpu(void); -void initvalues(start_info_t *startinfo); - -#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) -#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) - -#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif -#if defined(CPU_DISABLE_SSE) -#undef CPU_ENABLE_SSE -#endif - -static void cpu_startup(void *); -static void fpstate_drop(struct thread *td); -static void get_fpcontext(struct thread *td, mcontext_t *mcp); -static int set_fpcontext(struct thread *td, const mcontext_t *mcp); -#ifdef CPU_ENABLE_SSE -static void set_fpregs_xmm(struct save87 *, struct savexmm *); -static void fill_fpregs_xmm(struct savexmm *, struct save87 *); -#endif /* CPU_ENABLE_SSE */ -SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) - -#ifdef DDB -extern vm_offset_t ksym_start, ksym_end; -#endif - -int _udatasel, _ucodesel; -u_int basemem; - -start_info_t *xen_start_info; -unsigned long *xen_phys_machine; -int xendebug_flags; -int init_first = 0; -int cold = 1; - -#ifdef COMPAT_43 -static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code); -#endif -#ifdef COMPAT_FREEBSD4 -static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask, - u_long code); -#endif - -long Maxmem = 0; - -vm_paddr_t phys_avail[10]; - -/* must be 2 less so 0 0 can signal end of chunks */ -#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) - -struct kva_md_info kmi; - -static struct trapframe proc0_tf; -#ifndef SMP -static struct pcpu __pcpu; -#endif -struct mtx icu_lock; - -struct mem_range_softc mem_range_softc; - -static void -cpu_startup(void *dummy) -{ - /* - * Good {morning,afternoon,evening,night}. - */ - startrtclock(); - - printcpuinfo(); - panicifcpuunsupported(); -#ifdef PERFMON - perfmon_init(); -#endif - printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), - ptoa((uintmax_t)Maxmem) / 1048576); - /* - * Display any holes after the first chunk of extended memory. - */ - if (bootverbose) { - int indx; - - printf("Physical memory chunk(s):\n"); - for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { - vm_paddr_t size; - - size = phys_avail[indx + 1] - phys_avail[indx]; - printf( - "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", - (uintmax_t)phys_avail[indx], - (uintmax_t)phys_avail[indx + 1] - 1, - (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); - } - } - - vm_ksubmap_init(&kmi); - - printf("avail memory = %ju (%ju MB)\n", - ptoa((uintmax_t)cnt.v_free_count), - ptoa((uintmax_t)cnt.v_free_count) / 1048576); - - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); - vm_pager_bufferinit(); - - cpu_setregs(); - -} - -/* - * Send an interrupt to process. - * - * Stack is set up to allow sigcode stored - * at top to call routine, followed by kcall - * to sigreturn routine below. After sigreturn - * resets the signal mask, the stack, and the - * frame pointer, it returns to the user - * specified pc, psl. - */ -#ifdef COMPAT_43 -static void -osendsig(catcher, sig, mask, code) - sig_t catcher; - int sig; - sigset_t *mask; - u_long code; -{ - struct osigframe sf, *fp; - struct proc *p; - struct thread *td; - struct sigacts *psp; - struct trapframe *regs; - int oonstack; - - td = curthread; - p = td->td_proc; - PROC_LOCK_ASSERT(p, MA_OWNED); - psp = p->p_sigacts; - mtx_assert(&psp->ps_mtx, MA_OWNED); - regs = td->td_frame; - oonstack = sigonstack(regs->tf_esp); - - /* Allocate space for the signal handler context. */ - if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && - SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct osigframe *)(td->td_sigstk.ss_sp + - td->td_sigstk.ss_size - sizeof(struct osigframe)); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - td->td_sigstk.ss_flags |= SS_ONSTACK; -#endif - } else - fp = (struct osigframe *)regs->tf_esp - 1; - - /* Translate the signal if appropriate. */ - if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) - sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; - - /* Build the argument list for the signal handler. */ - sf.sf_signum = sig; - sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; - if (SIGISMEMBER(psp->ps_siginfo, sig)) { - /* Signal handler installed with SA_SIGINFO. */ - sf.sf_arg2 = (register_t)&fp->sf_siginfo; - sf.sf_siginfo.si_signo = sig; - sf.sf_siginfo.si_code = code; - sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; - } else { - /* Old FreeBSD-style arguments. */ - sf.sf_arg2 = code; - sf.sf_addr = regs->tf_err; - sf.sf_ahu.sf_handler = catcher; - } - mtx_unlock(&psp->ps_mtx); - PROC_UNLOCK(p); - - /* Save most if not all of trap frame. */ - sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; - sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; - sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; - sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; - sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; - sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; - sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; - sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; - sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; - sf.sf_siginfo.si_sc.sc_es = regs->tf_es; - sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; - sf.sf_siginfo.si_sc.sc_gs = rgs(); - sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; - - /* Build the signal context to be used by osigreturn(). */ - sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; - SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); - sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; - sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; - sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; - sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; - sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; - sf.sf_siginfo.si_sc.sc_err = regs->tf_err; - - /* - * Copy the sigframe out to the user's stack. - */ - if (copyout(&sf, fp, sizeof(*fp)) != 0) { -#ifdef DEBUG - printf("process %ld has trashed its stack\n", (long)p->p_pid); -#endif - PROC_LOCK(p); - sigexit(td, SIGILL); - } - - regs->tf_esp = (int)fp; - regs->tf_eip = PS_STRINGS - szosigcode; - regs->tf_eflags &= ~PSL_T; - regs->tf_cs = _ucodesel; - regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; - regs->tf_fs = _udatasel; - load_gs(_udatasel); - regs->tf_ss = _udatasel; - PROC_LOCK(p); - mtx_lock(&psp->ps_mtx); -} -#endif /* COMPAT_43 */ - -#ifdef COMPAT_FREEBSD4 -static void -freebsd4_sendsig(catcher, sig, mask, code) - sig_t catcher; - int sig; - sigset_t *mask; - u_long code; -{ - struct sigframe4 sf, *sfp; - struct proc *p; - struct thread *td; - struct sigacts *psp; - struct trapframe *regs; - int oonstack; - - td = curthread; - p = td->td_proc; - PROC_LOCK_ASSERT(p, MA_OWNED); - psp = p->p_sigacts; - mtx_assert(&psp->ps_mtx, MA_OWNED); - regs = td->td_frame; - oonstack = sigonstack(regs->tf_esp); - - /* Save user context. */ - bzero(&sf, sizeof(sf)); - sf.sf_uc.uc_sigmask = *mask; - sf.sf_uc.uc_stack = td->td_sigstk; - sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) - ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; - sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; - sf.sf_uc.uc_mcontext.mc_gs = rgs(); - bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); - - /* Allocate space for the signal handler context. */ - if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && - SIGISMEMBER(psp->ps_sigonstack, sig)) { - sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + - td->td_sigstk.ss_size - sizeof(struct sigframe4)); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - td->td_sigstk.ss_flags |= SS_ONSTACK; -#endif - } else - sfp = (struct sigframe4 *)regs->tf_esp - 1; - - /* Translate the signal if appropriate. */ - if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) - sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; - - /* Build the argument list for the signal handler. */ - sf.sf_signum = sig; - sf.sf_ucontext = (register_t)&sfp->sf_uc; - if (SIGISMEMBER(psp->ps_siginfo, sig)) { - /* Signal handler installed with SA_SIGINFO. */ - sf.sf_siginfo = (register_t)&sfp->sf_si; - sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; - - /* Fill in POSIX parts */ - sf.sf_si.si_signo = sig; - sf.sf_si.si_code = code; - sf.sf_si.si_addr = (void *)regs->tf_err; - } else { - /* Old FreeBSD-style arguments. */ - sf.sf_siginfo = code; - sf.sf_addr = regs->tf_err; - sf.sf_ahu.sf_handler = catcher; - } - mtx_unlock(&psp->ps_mtx); - PROC_UNLOCK(p); - - /* - * Copy the sigframe out to the user's stack. - */ - if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { -#ifdef DEBUG - printf("process %ld has trashed its stack\n", (long)p->p_pid); -#endif - PROC_LOCK(p); - sigexit(td, SIGILL); - } - - regs->tf_esp = (int)sfp; - regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; - regs->tf_eflags &= ~PSL_T; - regs->tf_cs = _ucodesel; - regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; - regs->tf_fs = _udatasel; - regs->tf_ss = _udatasel; - PROC_LOCK(p); - mtx_lock(&psp->ps_mtx); -} -#endif /* COMPAT_FREEBSD4 */ - -void -sendsig(catcher, sig, mask, code) - sig_t catcher; - int sig; - sigset_t *mask; - u_long code; -{ - struct sigframe sf, *sfp; - struct proc *p; - struct thread *td; - struct sigacts *psp; - char *sp; - struct trapframe *regs; - int oonstack; - - td = curthread; - p = td->td_proc; - PROC_LOCK_ASSERT(p, MA_OWNED); - psp = p->p_sigacts; - mtx_assert(&psp->ps_mtx, MA_OWNED); -#ifdef COMPAT_FREEBSD4 - if (SIGISMEMBER(psp->ps_freebsd4, sig)) { - freebsd4_sendsig(catcher, sig, mask, code); - return; - } -#endif -#ifdef COMPAT_43 - if (SIGISMEMBER(psp->ps_osigset, sig)) { - osendsig(catcher, sig, mask, code); - return; - } -#endif - regs = td->td_frame; - oonstack = sigonstack(regs->tf_esp); - - /* Save user context. */ - bzero(&sf, sizeof(sf)); - sf.sf_uc.uc_sigmask = *mask; - sf.sf_uc.uc_stack = td->td_sigstk; - sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) - ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; - sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; - sf.sf_uc.uc_mcontext.mc_gs = rgs(); - bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); - sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ - get_fpcontext(td, &sf.sf_uc.uc_mcontext); - fpstate_drop(td); - - /* Allocate space for the signal handler context. */ - if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && - SIGISMEMBER(psp->ps_sigonstack, sig)) { - sp = td->td_sigstk.ss_sp + - td->td_sigstk.ss_size - sizeof(struct sigframe); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - td->td_sigstk.ss_flags |= SS_ONSTACK; -#endif - } else - sp = (char *)regs->tf_esp - sizeof(struct sigframe); - /* Align to 16 bytes. */ - sfp = (struct sigframe *)((unsigned int)sp & ~0xF); - - /* Translate the signal if appropriate. */ - if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) - sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; - - /* Build the argument list for the signal handler. */ - sf.sf_signum = sig; - sf.sf_ucontext = (register_t)&sfp->sf_uc; - if (SIGISMEMBER(psp->ps_siginfo, sig)) { - /* Signal handler installed with SA_SIGINFO. */ - sf.sf_siginfo = (register_t)&sfp->sf_si; - sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; - - /* Fill in POSIX parts */ - sf.sf_si.si_signo = sig; - sf.sf_si.si_code = code; - sf.sf_si.si_addr = (void *)regs->tf_err; - } else { - /* Old FreeBSD-style arguments. */ - sf.sf_siginfo = code; - sf.sf_addr = regs->tf_err; - sf.sf_ahu.sf_handler = catcher; - } - mtx_unlock(&psp->ps_mtx); - PROC_UNLOCK(p); - /* - * Copy the sigframe out to the user's stack. - */ - if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { -#ifdef DEBUG - printf("process %ld has trashed its stack\n", (long)p->p_pid); -#endif - PROC_LOCK(p); - sigexit(td, SIGILL); - } - - regs->tf_esp = (int)sfp; - regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); - regs->tf_eflags &= ~PSL_T; - regs->tf_cs = _ucodesel; - regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; - regs->tf_fs = _udatasel; - regs->tf_ss = _udatasel; - PROC_LOCK(p); - mtx_lock(&psp->ps_mtx); -} - -/* - * Build siginfo_t for SA thread - */ -void -cpu_thread_siginfo(int sig, u_long code, siginfo_t *si) -{ - struct proc *p; - struct thread *td; - - td = curthread; - p = td->td_proc; - PROC_LOCK_ASSERT(p, MA_OWNED); - - bzero(si, sizeof(*si)); - si->si_signo = sig; - si->si_code = code; - si->si_addr = (void *)td->td_frame->tf_err; - /* XXXKSE fill other fields */ -} - -/* - * System call to cleanup state after a signal - * has been taken. Reset signal mask and - * stack state from context left by sendsig (above). - * Return to previous pc and psl as specified by - * context left by sendsig. Check carefully to - * make sure that the user has not modified the - * state to gain improper privileges. - * - * MPSAFE - */ -#ifdef COMPAT_43 -int -osigreturn(td, uap) - struct thread *td; - struct osigreturn_args /* { - struct osigcontext *sigcntxp; - } */ *uap; -{ - struct osigcontext sc; - struct trapframe *regs; - struct osigcontext *scp; - struct proc *p = td->td_proc; - int eflags, error; - - regs = td->td_frame; - error = copyin(uap->sigcntxp, &sc, sizeof(sc)); - if (error != 0) - return (error); - scp = ≻ - eflags = scp->sc_ps; - /* - * Don't allow users to change privileged or reserved flags. - */ - /* - * XXX do allow users to change the privileged flag PSL_RF. - * The cpu sets PSL_RF in tf_eflags for faults. Debuggers - * should sometimes set it there too. tf_eflags is kept in - * the signal context during signal handling and there is no - * other place to remember it, so the PSL_RF bit may be - * corrupted by the signal handler without us knowing. - * Corruption of the PSL_RF bit at worst causes one more or - * one less debugger trap, so allowing it is fairly harmless. - */ - if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { - return (EINVAL); - } - - /* - * Don't allow users to load a valid privileged %cs. Let the - * hardware check for invalid selectors, excess privilege in - * other selectors, invalid %eip's and invalid %esp's. - */ - if (!CS_SECURE(scp->sc_cs)) { - trapsignal(td, SIGBUS, T_PROTFLT); - return (EINVAL); - } - regs->tf_ds = scp->sc_ds; - regs->tf_es = scp->sc_es; - regs->tf_fs = scp->sc_fs; - - /* Restore remaining registers. */ - regs->tf_eax = scp->sc_eax; - regs->tf_ebx = scp->sc_ebx; - regs->tf_ecx = scp->sc_ecx; - regs->tf_edx = scp->sc_edx; - regs->tf_esi = scp->sc_esi; - regs->tf_edi = scp->sc_edi; - regs->tf_cs = scp->sc_cs; - regs->tf_ss = scp->sc_ss; - regs->tf_isp = scp->sc_isp; - regs->tf_ebp = scp->sc_fp; - regs->tf_esp = scp->sc_sp; - regs->tf_eip = scp->sc_pc; - regs->tf_eflags = eflags; - - PROC_LOCK(p); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - if (scp->sc_onstack & 1) - td->td_sigstk.ss_flags |= SS_ONSTACK; - else - td->td_sigstk.ss_flags &= ~SS_ONSTACK; -#endif - SIGSETOLD(td->td_sigmask, scp->sc_mask); - SIG_CANTMASK(td->td_sigmask); - signotify(td); - PROC_UNLOCK(p); - return (EJUSTRETURN); -} -#endif /* COMPAT_43 */ - -#ifdef COMPAT_FREEBSD4 -/* - * MPSAFE - */ -int -freebsd4_sigreturn(td, uap) - struct thread *td; - struct freebsd4_sigreturn_args /* { - const ucontext4 *sigcntxp; - } */ *uap; -{ - struct ucontext4 uc; - struct proc *p = td->td_proc; - struct trapframe *regs; - const struct ucontext4 *ucp; - int cs, eflags, error; - - error = copyin(uap->sigcntxp, &uc, sizeof(uc)); - if (error != 0) - return (error); - ucp = &uc; - regs = td->td_frame; - eflags = ucp->uc_mcontext.mc_eflags; - /* - * Don't allow users to change privileged or reserved flags. - */ - /* - * XXX do allow users to change the privileged flag PSL_RF. - * The cpu sets PSL_RF in tf_eflags for faults. Debuggers - * should sometimes set it there too. tf_eflags is kept in - * the signal context during signal handling and there is no - * other place to remember it, so the PSL_RF bit may be - * corrupted by the signal handler without us knowing. - * Corruption of the PSL_RF bit at worst causes one more or - * one less debugger trap, so allowing it is fairly harmless. - */ - if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { - printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); - return (EINVAL); - } - - /* - * Don't allow users to load a valid privileged %cs. Let the - * hardware check for invalid selectors, excess privilege in - * other selectors, invalid %eip's and invalid %esp's. - */ - cs = ucp->uc_mcontext.mc_cs; - if (!CS_SECURE(cs)) { - printf("freebsd4_sigreturn: cs = 0x%x\n", cs); - trapsignal(td, SIGBUS, T_PROTFLT); - return (EINVAL); - } - - bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); - - PROC_LOCK(p); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - if (ucp->uc_mcontext.mc_onstack & 1) - td->td_sigstk.ss_flags |= SS_ONSTACK; - else - td->td_sigstk.ss_flags &= ~SS_ONSTACK; -#endif - - td->td_sigmask = ucp->uc_sigmask; - SIG_CANTMASK(td->td_sigmask); - signotify(td); - PROC_UNLOCK(p); - return (EJUSTRETURN); -} -#endif /* COMPAT_FREEBSD4 */ - -/* - * MPSAFE - */ -int -sigreturn(td, uap) - struct thread *td; - struct sigreturn_args /* { - const __ucontext *sigcntxp; - } */ *uap; -{ - ucontext_t uc; - struct proc *p = td->td_proc; - struct trapframe *regs; - const ucontext_t *ucp; - int cs, eflags, error, ret; - - error = copyin(uap->sigcntxp, &uc, sizeof(uc)); - if (error != 0) - return (error); - ucp = &uc; - regs = td->td_frame; - eflags = ucp->uc_mcontext.mc_eflags; - /* - * Don't allow users to change privileged or reserved flags. - */ - /* - * XXX do allow users to change the privileged flag PSL_RF. - * The cpu sets PSL_RF in tf_eflags for faults. Debuggers - * should sometimes set it there too. tf_eflags is kept in - * the signal context during signal handling and there is no - * other place to remember it, so the PSL_RF bit may be - * corrupted by the signal handler without us knowing. - * Corruption of the PSL_RF bit at worst causes one more or - * one less debugger trap, so allowing it is fairly harmless. - */ -#if 0 - if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { - __asm__("int $0x3"); - printf("sigreturn: eflags = 0x%x\n", eflags); - return (EINVAL); - } -#endif - /* - * Don't allow users to load a valid privileged %cs. Let the - * hardware check for invalid selectors, excess privilege in - * other selectors, invalid %eip's and invalid %esp's. - */ - cs = ucp->uc_mcontext.mc_cs; - if (!CS_SECURE(cs)) { - __asm__("int $0x3"); - printf("sigreturn: cs = 0x%x\n", cs); - trapsignal(td, SIGBUS, T_PROTFLT); - return (EINVAL); - } - - ret = set_fpcontext(td, &ucp->uc_mcontext); - if (ret != 0) - return (ret); - bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); - PROC_LOCK(p); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - if (ucp->uc_mcontext.mc_onstack & 1) - td->td_sigstk.ss_flags |= SS_ONSTACK; - else - td->td_sigstk.ss_flags &= ~SS_ONSTACK; -#endif - - td->td_sigmask = ucp->uc_sigmask; - SIG_CANTMASK(td->td_sigmask); - signotify(td); - PROC_UNLOCK(p); - return (EJUSTRETURN); -} - -/* - * Machine dependent boot() routine - * - * I haven't seen anything to put here yet - * Possibly some stuff might be grafted back here from boot() - */ -void -cpu_boot(int howto) -{ -} - -/* - * Shutdown the CPU as much as possible - */ -void -cpu_halt(void) -{ - HYPERVISOR_shutdown(); -} - -/* - * Hook to idle the CPU when possible. In the SMP case we default to - * off because a halted cpu will not currently pick up a new thread in the - * run queue until the next timer tick. If turned on this will result in - * approximately a 4.2% loss in real time performance in buildworld tests - * (but improves user and sys times oddly enough), and saves approximately - * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). - * - * XXX we need to have a cpu mask of idle cpus and generate an IPI or - * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. - * Then we can have our cake and eat it too. - * - * XXX I'm turning it on for SMP as well by default for now. It seems to - * help lock contention somewhat, and this is critical for HTT. -Peter - */ -static int cpu_idle_hlt = 1; -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, - &cpu_idle_hlt, 0, "Idle loop HLT enable"); - -static void -cpu_idle_default(void) -{ - idle_block(); - enable_intr(); -} - -/* - * Note that we have to be careful here to avoid a race between checking - * sched_runnable() and actually halting. If we don't do this, we may waste - * the time between calling hlt and the next interrupt even though there - * is a runnable process. - */ -void -cpu_idle(void) -{ - -#ifdef SMP - if (mp_grab_cpu_hlt()) - return; -#endif - - if (cpu_idle_hlt) { - disable_intr(); - if (sched_runnable()) - enable_intr(); - else - (*cpu_idle_hook)(); - } -} - -/* Other subsystems (e.g., ACPI) can hook this later. */ -void (*cpu_idle_hook)(void) = cpu_idle_default; - -/* - * Clear registers on exec - */ -void -exec_setregs(td, entry, stack, ps_strings) - struct thread *td; - u_long entry; - u_long stack; - u_long ps_strings; -{ - struct trapframe *regs = td->td_frame; - struct pcb *pcb = td->td_pcb; - - /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ - pcb->pcb_gs = _udatasel; - load_gs(_udatasel); - - if (td->td_proc->p_md.md_ldt) - user_ldt_free(td); - - bzero((char *)regs, sizeof(struct trapframe)); - regs->tf_eip = entry; - regs->tf_esp = stack; - regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); - regs->tf_ss = _udatasel; - regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; - regs->tf_fs = _udatasel; - regs->tf_cs = _ucodesel; - - /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ - regs->tf_ebx = ps_strings; - - /* - * Reset the hardware debug registers if they were in use. - * They won't have any meaning for the newly exec'd process. - */ - if (pcb->pcb_flags & PCB_DBREGS) { - pcb->pcb_dr0 = 0; - pcb->pcb_dr1 = 0; - pcb->pcb_dr2 = 0; - pcb->pcb_dr3 = 0; - pcb->pcb_dr6 = 0; - pcb->pcb_dr7 = 0; - if (pcb == PCPU_GET(curpcb)) { - /* - * Clear the debug registers on the running - * CPU, otherwise they will end up affecting - * the next process we switch to. - */ - reset_dbregs(); - } - pcb->pcb_flags &= ~PCB_DBREGS; - } - - /* - * Initialize the math emulator (if any) for the current process. - * Actually, just clear the bit that says that the emulator has - * been initialized. Initialization is delayed until the process - * traps to the emulator (if it is done at all) mainly because - * emulators don't provide an entry point for initialization. - */ - td->td_pcb->pcb_flags &= ~FP_SOFTFP; - - /* Initialize the npx (if any) for the current process. */ - /* - * XXX the above load_cr0() also initializes it and is a layering - * violation if NPX is configured. It drops the npx partially - * and this would be fatal if we were interrupted now, and decided - * to force the state to the pcb, and checked the invariant - * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL). - * ALL of this can happen except the check. The check used to - * happen and be fatal later when we didn't complete the drop - * before returning to user mode. This should be fixed properly - * soon. - */ - fpstate_drop(td); - - /* - * XXX - Linux emulator - * Make sure sure edx is 0x0 on entry. Linux binaries depend - * on it. - */ - td->td_retval[1] = 0; -} - -void -cpu_setregs(void) -{ - /* nothing for Xen to do */ -} - -static int -sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) -{ - int error; - error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, - req); - if (!error && req->newptr) - resettodr(); - return (error); -} - -SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, - &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); - -SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, - CTLFLAG_RW, &disable_rtc_set, 0, ""); - -SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, - CTLFLAG_RD, &bootinfo, bootinfo, ""); - -u_long bootdev; /* not a dev_t - encoding is different */ -SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, - CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); - -/* - * Initialize 386 and configure to run kernel - */ - -/* - * Initialize segments & interrupt table - */ - -int _default_ldt; -union descriptor *gdt; /* global descriptor table */ -static struct gate_descriptor idt0[NIDT]; -struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ -union descriptor *ldt; /* local descriptor table */ -struct region_descriptor r_idt; /* table descriptors */ - -int private_tss; /* flag indicating private tss */ - -#if defined(I586_CPU) && !defined(NO_F00F_HACK) -extern int has_f00f_bug; -#endif - -static struct i386tss dblfault_tss; -static char dblfault_stack[PAGE_SIZE]; - -extern struct user *proc0uarea; -extern vm_offset_t proc0kstack; - - -/* software prototypes -- in more palatable form */ -struct soft_segment_descriptor gdt_segs[] = { -/* GNULL_SEL 0 Null Descriptor */ -{ 0x0, /* segment base address */ - 0x0, /* length */ - 0, /* segment type */ - SEL_KPL, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GCODE_SEL 1 Code Descriptor for kernel */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, - -/* GDATA_SEL 2 Data Descriptor for kernel */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, - -/* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMRWA, /* segment type */ - SEL_KPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -#if 0 -/* GPROC0_SEL 4 Proc 0 Tss Descriptor */ -{ - 0x0, /* segment base address */ - sizeof(struct i386tss)-1,/* length */ - SDT_SYS386TSS, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* unused - default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GLDT_SEL 5 LDT Descriptor */ -{ (int) ldt, /* segment base address */ - sizeof(ldt)-1, /* length - all address space */ - SDT_SYSLDT, /* segment type */ - SEL_UPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* unused - default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GUSERLDT_SEL 6 User LDT Descriptor per process */ -{ (int) ldt, /* segment base address */ - (512 * sizeof(union descriptor)-1), /* length */ - SDT_SYSLDT, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* unused - default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GTGATE_SEL 7 Null Descriptor - Placeholder */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ -{ 0x400, /* segment base address */ - 0xfffff, /* length */ - SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -/* GPANIC_SEL 9 Panic Tss Descriptor */ -{ (int) &dblfault_tss, /* segment base address */ - sizeof(struct i386tss)-1,/* length - all address space */ - SDT_SYS386TSS, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* unused - default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ - SDT_MEMERA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -/* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ - SDT_MEMERA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -/* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ - SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -/* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ - SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -/* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ - SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -#endif -}; - -static struct soft_segment_descriptor ldt_segs[] = { - /* Null Descriptor - overwritten by call gate */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, - /* Null Descriptor - overwritten by call gate */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, - /* Null Descriptor - overwritten by call gate */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, - /* Code Descriptor for user */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMERA, /* segment type */ - SEL_UPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, - /* Null Descriptor - overwritten by call gate */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, - /* Data Descriptor for user */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMRWA, /* segment type */ - SEL_UPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -}; - -struct proc_ldt default_proc_ldt; - -void -setidt(idx, func, typ, dpl, selec) - int idx; - inthand_t *func; - int typ; - int dpl; - int selec; -{ - struct gate_descriptor *ip; - - ip = idt + idx; - ip->gd_looffset = (int)func; - ip->gd_selector = selec; - ip->gd_stkcpy = 0; - ip->gd_xx = 0; - ip->gd_type = typ; - ip->gd_dpl = dpl; - ip->gd_p = 1; - ip->gd_hioffset = ((int)func)>>16 ; -} - -#define IDTVEC(name) __CONCAT(X,name) - -extern inthand_t - IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), - IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), - IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), - IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), - IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); - -#ifdef DDB -/* - * Display the index and function name of any IDT entries that don't use - * the default 'rsvd' entry point. - */ -DB_SHOW_COMMAND(idt, db_show_idt) -{ - struct gate_descriptor *ip; - int idx, quit; - uintptr_t func; - - ip = idt; - db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE); - for (idx = 0, quit = 0; idx < NIDT; idx++) { - func = (ip->gd_hioffset << 16 | ip->gd_looffset); - if (func != (uintptr_t)&IDTVEC(rsvd)) { - db_printf("%3d\t", idx); - db_printsym(func, DB_STGY_PROC); - db_printf("\n"); - } - ip++; - } -} -#endif - -void -sdtossd(sd, ssd) - struct segment_descriptor *sd; - struct soft_segment_descriptor *ssd; -{ - ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; - ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; - ssd->ssd_type = sd->sd_type; - ssd->ssd_dpl = sd->sd_dpl; - ssd->ssd_p = sd->sd_p; - ssd->ssd_def32 = sd->sd_def32; - ssd->ssd_gran = sd->sd_gran; -} - -#define PHYSMAP_SIZE (2 * 8) - -/* - * Populate the (physmap) array with base/bound pairs describing the - * available physical memory in the system, then test this memory and - * build the phys_avail array describing the actually-available memory. - * - * If we cannot accurately determine the physical memory map, then use - * value from the 0xE801 call, and failing that, the RTC. - * - * Total memory size may be set by the kernel environment variable - * hw.physmem or the compile-time define MAXMEM. - * - * XXX first should be vm_paddr_t. - */ -static void -getmemsize(void) -{ - int i; - printf("start_info %p\n", xen_start_info); - printf("start_info->nr_pages %ld\n", xen_start_info->nr_pages); - Maxmem = xen_start_info->nr_pages - init_first; - /* call pmap initialization to make new kernel address space */ - pmap_bootstrap((init_first)<< PAGE_SHIFT, 0); - for (i = 0; i < 10; i++) - phys_avail[i] = 0; - physmem = Maxmem; - avail_end = ptoa(Maxmem) - round_page(MSGBUF_SIZE); - phys_avail[0] = init_first << PAGE_SHIFT; - phys_avail[1] = avail_end; -} - -extern unsigned long cpu0prvpage; -extern unsigned long *SMPpt; -pteinfo_t *pteinfo_list; -unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0)); -int preemptable; -int gdt_set; -static int ncpus; - -/* Linux infection */ -#define PAGE_OFFSET KERNBASE -#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -void -initvalues(start_info_t *startinfo) -{ - int i; - vm_paddr_t pdir_shadow_ma, KPTphys; - vm_offset_t *pdir_shadow; -#ifdef SMP - int j; -#endif - -#ifdef WRITABLE_PAGETABLES - printk("using writable pagetables\n"); - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); -#endif - - xen_start_info = startinfo; - xen_phys_machine = (unsigned long *)startinfo->mfn_list; - unsigned long tmpindex = ((__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + xen_start_info->nr_pt_frames) + 3 /* number of pages allocated after the pts + 1*/; - xendebug_flags = 0xffffffff; - /* pre-zero unused mapped pages */ - bzero((char *)(KERNBASE + (tmpindex << PAGE_SHIFT)), (1024 - tmpindex)*PAGE_SIZE); - IdlePTD = (pd_entry_t *)xpmap_ptom(__pa(startinfo->pt_base)); - KPTphys = xpmap_ptom(__pa(startinfo->pt_base + PAGE_SIZE)); - XENPRINTF("IdlePTD %p\n", IdlePTD); - XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx " - "mod_start: 0x%lx mod_len: 0x%lx\n", - xen_start_info->nr_pages, xen_start_info->shared_info, - xen_start_info->flags, xen_start_info->pt_base, - xen_start_info->mod_start, xen_start_info->mod_len); - - - - - /* Map proc0's UPAGES */ - proc0uarea = (struct user *)(KERNBASE + (tmpindex << PAGE_SHIFT)); - tmpindex += UAREA_PAGES; - - /* Map proc0's KSTACK */ - proc0kstack = KERNBASE + (tmpindex << PAGE_SHIFT); - tmpindex += KSTACK_PAGES; - - /* allocate page for gdt */ - gdt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT)); - tmpindex++; - - /* allocate page for ldt */ - ldt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT)); - tmpindex++; - - /* initialize page directory shadow page */ - pdir_shadow = (vm_offset_t *)(KERNBASE + (tmpindex << PAGE_SHIFT)); - i686_pagezero(pdir_shadow); - pdir_shadow_ma = xpmap_ptom(tmpindex << PAGE_SHIFT); - PT_SET_MA(pdir_shadow, pdir_shadow_ma | PG_V | PG_A); - tmpindex++; - - /* setup shadow mapping first so vtomach will work */ - xen_pt_pin((vm_paddr_t)pdir_shadow_ma); - xen_queue_pt_update((vm_paddr_t)(IdlePTD + PTDPTDI), - pdir_shadow_ma | PG_V | PG_A | PG_RW | PG_M); - xen_queue_pt_update(pdir_shadow_ma + PTDPTDI*sizeof(vm_paddr_t), - ((vm_paddr_t)IdlePTD) | PG_V | PG_A); - xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), - KPTphys | PG_V | PG_A); - - xen_flush_queue(); - /* allocate remainder of NKPT pages */ - - -#ifdef SMP -#if 0 - /* allocate cpu0 private page */ - cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT)); - tmpindex++; -#endif - /* allocate SMP page table */ - SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT)); -#if 0 - /* Map the private page into the SMP page table */ - SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A; -#endif - /* map SMP page table RO */ - PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW); - - /* put the page table into the page directory */ - xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI), - xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_M | PG_RW | PG_V | PG_A); - xen_queue_pt_update(pdir_shadow_ma + MPPTDI*sizeof(vm_paddr_t), - xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_V | PG_A); - tmpindex++; -#endif - -#ifdef PMAP_DEBUG - pteinfo_list = (pteinfo_t *)(KERNBASE + (tmpindex << PAGE_SHIFT)); - tmpindex += ((xen_start_info->nr_pages >> 10) + 1)*(1 + XPQ_CALL_DEPTH*XPQ_CALL_COUNT); - - if (tmpindex > 980) - __asm__("int3"); -#endif - /* unmap remaining pages from initial 4MB chunk */ - for (i = tmpindex; i%1024 != 0; i++) - xen_queue_pt_update(KPTphys + i*sizeof(vm_paddr_t), 0); - xen_flush_queue(); - - /* allocate remainder of NKPT pages */ - for (i = 0; i < NKPT-1; i++, tmpindex++) { - xen_queue_pt_update((vm_paddr_t)(IdlePTD + KPTDI + i + 1), - xpmap_ptom((tmpindex << PAGE_SHIFT)| PG_M | PG_RW | PG_V | PG_A)); - xen_queue_pt_update(pdir_shadow_ma + (KPTDI + i + 1)*sizeof(vm_paddr_t), - xpmap_ptom((tmpindex << PAGE_SHIFT)| PG_V | PG_A)); - } - tmpindex += NKPT-1; - PT_UPDATES_FLUSH(); - - HYPERVISOR_shared_info = (shared_info_t *)(KERNBASE + (tmpindex << PAGE_SHIFT)); - PT_SET_MA(HYPERVISOR_shared_info, - xen_start_info->shared_info | PG_A | PG_V | PG_RW | PG_M); - tmpindex++; - - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine; - ncpus = HYPERVISOR_shared_info->n_vcpu; -#ifdef SMP - for (i = 0; i < ncpus; i++) { - int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE; - for (j = 0; j < npages; j++) { - vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT); - tmpindex++; - PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW | PG_M, FALSE); - } - } - xen_flush_queue(); -#endif - - init_first = tmpindex; - -} - - -trap_info_t trap_table[] = { - { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)}, - { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)}, - { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)}, - { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)}, - /* This is UPL on Linux and KPL on BSD */ - { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)}, - { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)}, - { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)}, - /* - * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)}, - * no handler for double fault - */ - { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)}, - {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)}, - {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)}, - {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)}, - {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)}, - {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)}, - {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)}, - {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)}, - {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)}, - {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)}, - {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)}, - {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)}, - { 0, 0, 0, 0 } -}; - -void -init386(void) -{ - int gsel_tss, metadata_missing, off, x, error; - struct pcpu *pc; - unsigned long gdtmachpfn; -#ifdef SMP - int i; -#endif - proc0.p_uarea = proc0uarea; - thread0.td_kstack = proc0kstack; - thread0.td_pcb = (struct pcb *) - (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; - - /* - * This may be done better later if it gets more high level - * components in it. If so just link td->td_proc here. - */ - proc_linkup(&proc0, &ksegrp0, &thread0); - - metadata_missing = 0; - if (xen_start_info->mod_start) - preload_metadata = (caddr_t)xen_start_info->mod_start; - else - metadata_missing = 1; - - /* XXX - temporary hack */ - preload_metadata = (caddr_t)0; - /* XXX */ - - if (envmode == 1) - kern_envp = static_env; - else if ((caddr_t)xen_start_info->cmd_line) - kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line); - - boothowto |= xen_boothowto(kern_envp); - - if (boothowto & RB_GDB_PAUSE) - __asm__("int $0x3;"); - - /* Init basic tunables, hz etc */ - init_param1(); - /* - * make gdt memory segments, the code segment goes up to end of the - * page with etext in it, the data segment goes to the end of - * the address space - */ -#if 0 - /* - * XEN occupies the upper 64MB of virtual address space - * At its base it manages an array mapping machine page frames - * to physical page frames - hence we need to be able to - * access 4GB - (64MB - 4MB + 64k) - */ - gdt_segs[GCODE_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16))); - gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16))); -#endif -#ifdef SMP - /* XXX this will blow up if there are more than 512/NGDT vcpus */ - pc = &SMP_prvspace[0].pcpu; - for (i = 0; i < ncpus; i++) { - cpu_add(i, (i == 0)); - - gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i]; - gdt_segs[GPRIV_SEL].ssd_limit = - atop(sizeof(struct privatespace) - 1); - gdt_segs[GPROC0_SEL].ssd_base = - (int) &SMP_prvspace[i].pcpu.pc_common_tss; - SMP_prvspace[i].pcpu.pc_prvspace = - &SMP_prvspace[i].pcpu; - - for (x = 0; x < NGDT; x++) { - ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd); - } - } -#else - pc = &__pcpu; - gdt_segs[GPRIV_SEL].ssd_limit = - atop(sizeof(struct pcpu) - 1); - gdt_segs[GPRIV_SEL].ssd_base = (int) pc; - gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; - for (x = 0; x < NGDT; x++) - ssdtosd(&gdt_segs[x], &gdt[x].sd); -#endif - - - PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW); - gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; - PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0); - - - lgdt_finish(); - gdt_set = 1; - - if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) { - panic("set_trap_table failed - error %d\n", error); - } - HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback, - GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); - - - - pcpu_init(pc, 0, sizeof(struct pcpu)); - PCPU_SET(prvspace, pc); - PCPU_SET(curthread, &thread0); - PCPU_SET(curpcb, thread0.td_pcb); - PCPU_SET(pdir, (unsigned long)IdlePTD); - /* - * Initialize mutexes. - * - */ - mutex_init(); - - mtx_init(&clock_lock, "clk", NULL, MTX_SPIN); - mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); - - - - /* make ldt memory segments */ - /* - * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it - * should be spelled ...MAX_USER... - */ - ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1); - ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1); - for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) - ssdtosd(&ldt_segs[x], &ldt[x].sd); - default_proc_ldt.ldt_base = (caddr_t)ldt; - default_proc_ldt.ldt_len = 6; - _default_ldt = (int)&default_proc_ldt; - PCPU_SET(currentldt, _default_ldt) - PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW); - xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0])); - - - /* - * Initialize the console before we print anything out. - */ - cninit(); - if (metadata_missing) - printf("WARNING: loader(8) metadata is missing!\n"); - -#ifdef DDB - ksym_start = bootinfo.bi_symtab; - ksym_end = bootinfo.bi_esymtab; -#endif - kdb_init(); -#ifdef KDB - if (boothowto & RB_KDB) - kdb_enter("Boot flags requested debugger"); -#endif - - finishidentcpu(); /* Final stage of CPU initialization */ - setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - initializecpu(); /* Initialize CPU registers */ - - /* make an initial tss so cpu can get interrupt stack on syscall! */ - /* Note: -16 is so we can grow the trapframe if we came from vm86 */ - PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + - KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); - PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); - gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); -#if 0 - private_tss = 0; - PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); - PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); - PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); -#endif - HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), PCPU_GET(common_tss.tss_esp0)); - - - dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = - dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; - dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = - dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); - - dblfault_tss.tss_cr3 = (int)IdlePTD; - dblfault_tss.tss_eip = (int)dblfault_handler; - dblfault_tss.tss_eflags = PSL_KERNEL; - dblfault_tss.tss_ds = dblfault_tss.tss_es = - dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); - dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); - dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); - dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); - - getmemsize(); - init_param2(physmem); - /* now running on new page tables, configured,and u/iom is accessible */ - /* Map the message buffer. */ - for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) - pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); - PT_UPDATES_FLUSH(); - - /* safe to enable xen page queue locking */ - - msgbufinit(msgbufp, MSGBUF_SIZE); - /* XXX KMM I don't think we need call gates */ -#if 0 - printf("modify ldt\n"); - /* make a call gate to reenter kernel with */ - gdp = &ldt[LSYS5CALLS_SEL].gd; - - x = (int) &IDTVEC(lcall_syscall); - gdp->gd_looffset = x; - gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); - gdp->gd_stkcpy = 1; - gdp->gd_type = SDT_SYS386CGT; - gdp->gd_dpl = SEL_UPL; - gdp->gd_p = 1; - gdp->gd_hioffset = x >> 16; - - /* XXX does this work? */ - ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; - ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; -#endif - /* transfer to user mode */ - - _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); - _udatasel = LSEL(LUDATA_SEL, SEL_UPL); - - /* setup proc 0's pcb */ - thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ - thread0.td_pcb->pcb_cr3 = (int)IdlePTD; - thread0.td_pcb->pcb_ext = 0; - thread0.td_frame = &proc0_tf; -} - -void -cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) -{ - - pcpu->pc_acpi_id = 0xffffffff; -} - -/* - * Construct a PCB from a trapframe. This is called from kdb_trap() where - * we want to start a backtrace from the function that caused us to enter - * the debugger. We have the context in the trapframe, but base the trace - * on the PCB. The PCB doesn't have to be perfect, as long as it contains - * enough for a backtrace. - */ -void -makectx(struct trapframe *tf, struct pcb *pcb) -{ - - pcb->pcb_edi = tf->tf_edi; - pcb->pcb_esi = tf->tf_esi; - pcb->pcb_ebp = tf->tf_ebp; - pcb->pcb_ebx = tf->tf_ebx; - pcb->pcb_eip = tf->tf_eip; - pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; -} - -int -ptrace_set_pc(struct thread *td, u_long addr) -{ - - td->td_frame->tf_eip = addr; - return (0); -} - -int -ptrace_single_step(struct thread *td) -{ - td->td_frame->tf_eflags |= PSL_T; - return (0); -} - -int -ptrace_clear_single_step(struct thread *td) -{ - td->td_frame->tf_eflags &= ~PSL_T; - return (0); -} - -int -fill_regs(struct thread *td, struct reg *regs) -{ - struct pcb *pcb; - struct trapframe *tp; - - tp = td->td_frame; - regs->r_fs = tp->tf_fs; - regs->r_es = tp->tf_es; - regs->r_ds = tp->tf_ds; - regs->r_edi = tp->tf_edi; - regs->r_esi = tp->tf_esi; - regs->r_ebp = tp->tf_ebp; - regs->r_ebx = tp->tf_ebx; - regs->r_edx = tp->tf_edx; - regs->r_ecx = tp->tf_ecx; - regs->r_eax = tp->tf_eax; - regs->r_eip = tp->tf_eip; - regs->r_cs = tp->tf_cs; - regs->r_eflags = tp->tf_eflags; - regs->r_esp = tp->tf_esp; - regs->r_ss = tp->tf_ss; - pcb = td->td_pcb; - regs->r_gs = pcb->pcb_gs; - return (0); -} - -int -set_regs(struct thread *td, struct reg *regs) -{ - struct pcb *pcb; - struct trapframe *tp; - - tp = td->td_frame; - if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || - !CS_SECURE(regs->r_cs)) - return (EINVAL); - tp->tf_fs = regs->r_fs; - tp->tf_es = regs->r_es; - tp->tf_ds = regs->r_ds; - tp->tf_edi = regs->r_edi; - tp->tf_esi = regs->r_esi; - tp->tf_ebp = regs->r_ebp; - tp->tf_ebx = regs->r_ebx; - tp->tf_edx = regs->r_edx; - tp->tf_ecx = regs->r_ecx; - tp->tf_eax = regs->r_eax; - tp->tf_eip = regs->r_eip; - tp->tf_cs = regs->r_cs; - tp->tf_eflags = regs->r_eflags; - tp->tf_esp = regs->r_esp; - tp->tf_ss = regs->r_ss; - pcb = td->td_pcb; - pcb->pcb_gs = regs->r_gs; - return (0); -} - -#ifdef CPU_ENABLE_SSE -static void -fill_fpregs_xmm(sv_xmm, sv_87) - struct savexmm *sv_xmm; - struct save87 *sv_87; -{ - register struct env87 *penv_87 = &sv_87->sv_env; - register struct envxmm *penv_xmm = &sv_xmm->sv_env; - int i; - - bzero(sv_87, sizeof(*sv_87)); - - /* FPU control/status */ - penv_87->en_cw = penv_xmm->en_cw; - penv_87->en_sw = penv_xmm->en_sw; - penv_87->en_tw = penv_xmm->en_tw; - penv_87->en_fip = penv_xmm->en_fip; - penv_87->en_fcs = penv_xmm->en_fcs; - penv_87->en_opcode = penv_xmm->en_opcode; - penv_87->en_foo = penv_xmm->en_foo; - penv_87->en_fos = penv_xmm->en_fos; - - /* FPU registers */ - for (i = 0; i < 8; ++i) - sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; -} - -static void -set_fpregs_xmm(sv_87, sv_xmm) - struct save87 *sv_87; - struct savexmm *sv_xmm; -{ - register struct env87 *penv_87 = &sv_87->sv_env; - register struct envxmm *penv_xmm = &sv_xmm->sv_env; - int i; - - /* FPU control/status */ - penv_xmm->en_cw = penv_87->en_cw; - penv_xmm->en_sw = penv_87->en_sw; - penv_xmm->en_tw = penv_87->en_tw; - penv_xmm->en_fip = penv_87->en_fip; - penv_xmm->en_fcs = penv_87->en_fcs; - penv_xmm->en_opcode = penv_87->en_opcode; - penv_xmm->en_foo = penv_87->en_foo; - penv_xmm->en_fos = penv_87->en_fos; - - /* FPU registers */ - for (i = 0; i < 8; ++i) - sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; -} -#endif /* CPU_ENABLE_SSE */ - -int -fill_fpregs(struct thread *td, struct fpreg *fpregs) -{ -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) { - fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, - (struct save87 *)fpregs); - return (0); - } -#endif /* CPU_ENABLE_SSE */ - bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); - return (0); -} - -int -set_fpregs(struct thread *td, struct fpreg *fpregs) -{ -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) { - set_fpregs_xmm((struct save87 *)fpregs, - &td->td_pcb->pcb_save.sv_xmm); - return (0); - } -#endif /* CPU_ENABLE_SSE */ - bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); - return (0); -} - -/* - * Get machine context. - */ -int -get_mcontext(struct thread *td, mcontext_t *mcp, int flags) -{ - struct trapframe *tp; - - tp = td->td_frame; - - PROC_LOCK(curthread->td_proc); - mcp->mc_onstack = sigonstack(tp->tf_esp); - PROC_UNLOCK(curthread->td_proc); - mcp->mc_gs = td->td_pcb->pcb_gs; - mcp->mc_fs = tp->tf_fs; - mcp->mc_es = tp->tf_es; - mcp->mc_ds = tp->tf_ds; - mcp->mc_edi = tp->tf_edi; - mcp->mc_esi = tp->tf_esi; - mcp->mc_ebp = tp->tf_ebp; - mcp->mc_isp = tp->tf_isp; - if (flags & GET_MC_CLEAR_RET) { - mcp->mc_eax = 0; - mcp->mc_edx = 0; - } else { - mcp->mc_eax = tp->tf_eax; - mcp->mc_edx = tp->tf_edx; - } - mcp->mc_ebx = tp->tf_ebx; - mcp->mc_ecx = tp->tf_ecx; - mcp->mc_eip = tp->tf_eip; - mcp->mc_cs = tp->tf_cs; - mcp->mc_eflags = tp->tf_eflags; - mcp->mc_esp = tp->tf_esp; - mcp->mc_ss = tp->tf_ss; - mcp->mc_len = sizeof(*mcp); - get_fpcontext(td, mcp); - return (0); -} - -/* - * Set machine context. - * - * However, we don't set any but the user modifiable flags, and we won't - * touch the cs selector. - */ -int -set_mcontext(struct thread *td, const mcontext_t *mcp) -{ - struct trapframe *tp; - int eflags, ret; - - tp = td->td_frame; - if (mcp->mc_len != sizeof(*mcp)) - return (EINVAL); - eflags = (mcp->mc_eflags & PSL_USERCHANGE) | - (tp->tf_eflags & ~PSL_USERCHANGE); - if ((ret = set_fpcontext(td, mcp)) == 0) { - tp->tf_fs = mcp->mc_fs; - tp->tf_es = mcp->mc_es; - tp->tf_ds = mcp->mc_ds; - tp->tf_edi = mcp->mc_edi; - tp->tf_esi = mcp->mc_esi; - tp->tf_ebp = mcp->mc_ebp; - tp->tf_ebx = mcp->mc_ebx; - tp->tf_edx = mcp->mc_edx; - tp->tf_ecx = mcp->mc_ecx; - tp->tf_eax = mcp->mc_eax; - tp->tf_eip = mcp->mc_eip; - tp->tf_eflags = eflags; - tp->tf_esp = mcp->mc_esp; - tp->tf_ss = mcp->mc_ss; - td->td_pcb->pcb_gs = mcp->mc_gs; - ret = 0; - } - return (ret); -} - -static void -get_fpcontext(struct thread *td, mcontext_t *mcp) -{ -#ifndef DEV_NPX - mcp->mc_fpformat = _MC_FPFMT_NODEV; - mcp->mc_ownedfp = _MC_FPOWNED_NONE; -#else - union savefpu *addr; - - /* - * XXX mc_fpstate might be misaligned, since its declaration is not - * unportabilized using __attribute__((aligned(16))) like the - * declaration of struct savemm, and anyway, alignment doesn't work - * for auto variables since we don't use gcc's pessimal stack - * alignment. Work around this by abusing the spare fields after - * mcp->mc_fpstate. - * - * XXX unpessimize most cases by only aligning when fxsave might be - * called, although this requires knowing too much about - * npxgetregs()'s internals. - */ - addr = (union savefpu *)&mcp->mc_fpstate; - if (td == PCPU_GET(fpcurthread) && -#ifdef CPU_ENABLE_SSE - cpu_fxsr && -#endif - ((uintptr_t)(void *)addr & 0xF)) { - do - addr = (void *)((char *)addr + 4); - while ((uintptr_t)(void *)addr & 0xF); - } - mcp->mc_ownedfp = npxgetregs(td, addr); - if (addr != (union savefpu *)&mcp->mc_fpstate) { - bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); - bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); - } - mcp->mc_fpformat = npxformat(); -#endif -} - -static int -set_fpcontext(struct thread *td, const mcontext_t *mcp) -{ - union savefpu *addr; - - if (mcp->mc_fpformat == _MC_FPFMT_NODEV) - return (0); - else if (mcp->mc_fpformat != _MC_FPFMT_387 && - mcp->mc_fpformat != _MC_FPFMT_XMM) - return (EINVAL); - else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) - /* We don't care what state is left in the FPU or PCB. */ - fpstate_drop(td); - else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || - mcp->mc_ownedfp == _MC_FPOWNED_PCB) { - /* XXX align as above. */ - addr = (union savefpu *)&mcp->mc_fpstate; - if (td == PCPU_GET(fpcurthread) && -#ifdef CPU_ENABLE_SSE - cpu_fxsr && -#endif - ((uintptr_t)(void *)addr & 0xF)) { - do - addr = (void *)((char *)addr + 4); - while ((uintptr_t)(void *)addr & 0xF); - bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); - } -#ifdef DEV_NPX - /* - * XXX we violate the dubious requirement that npxsetregs() - * be called with interrupts disabled. - */ - npxsetregs(td, addr); -#endif - /* - * Don't bother putting things back where they were in the - * misaligned case, since we know that the caller won't use - * them again. - */ - } else - return (EINVAL); - return (0); -} - -static void -fpstate_drop(struct thread *td) -{ - register_t s; - - s = intr_disable(); -#ifdef DEV_NPX - if (PCPU_GET(fpcurthread) == td) - npxdrop(); -#endif - /* - * XXX force a full drop of the npx. The above only drops it if we - * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. - * - * XXX I don't much like npxgetregs()'s semantics of doing a full - * drop. Dropping only to the pcb matches fnsave's behaviour. - * We only need to drop to !PCB_INITDONE in sendsig(). But - * sendsig() is the only caller of npxgetregs()... perhaps we just - * have too many layers. - */ - curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; - intr_restore(s); -} - -int -fill_dbregs(struct thread *td, struct dbreg *dbregs) -{ - struct pcb *pcb; - - if (td == NULL) { - dbregs->dr[0] = rdr0(); - dbregs->dr[1] = rdr1(); - dbregs->dr[2] = rdr2(); - dbregs->dr[3] = rdr3(); - dbregs->dr[4] = rdr4(); - dbregs->dr[5] = rdr5(); - dbregs->dr[6] = rdr6(); - dbregs->dr[7] = rdr7(); - } else { - pcb = td->td_pcb; - dbregs->dr[0] = pcb->pcb_dr0; - dbregs->dr[1] = pcb->pcb_dr1; - dbregs->dr[2] = pcb->pcb_dr2; - dbregs->dr[3] = pcb->pcb_dr3; - dbregs->dr[4] = 0; - dbregs->dr[5] = 0; - dbregs->dr[6] = pcb->pcb_dr6; - dbregs->dr[7] = pcb->pcb_dr7; - } - return (0); -} - -int -set_dbregs(struct thread *td, struct dbreg *dbregs) -{ - struct pcb *pcb; - int i; - u_int32_t mask1, mask2; - - if (td == NULL) { - load_dr0(dbregs->dr[0]); - load_dr1(dbregs->dr[1]); - load_dr2(dbregs->dr[2]); - load_dr3(dbregs->dr[3]); - load_dr4(dbregs->dr[4]); - load_dr5(dbregs->dr[5]); - load_dr6(dbregs->dr[6]); - load_dr7(dbregs->dr[7]); - } else { - /* - * Don't let an illegal value for dr7 get set. Specifically, - * check for undefined settings. Setting these bit patterns - * result in undefined behaviour and can lead to an unexpected - * TRCTRAP. - */ - for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; - i++, mask1 <<= 2, mask2 <<= 2) - if ((dbregs->dr[7] & mask1) == mask2) - return (EINVAL); - - pcb = td->td_pcb; - - /* - * Don't let a process set a breakpoint that is not within the - * process's address space. If a process could do this, it - * could halt the system by setting a breakpoint in the kernel - * (if ddb was enabled). Thus, we need to check to make sure - * that no breakpoints are being enabled for addresses outside - * process's address space, unless, perhaps, we were called by - * uid 0. - * - * XXX - what about when the watched area of the user's - * address space is written into from within the kernel - * ... wouldn't that still cause a breakpoint to be generated - * from within kernel mode? - */ - - if (suser(td) != 0) { - if (dbregs->dr[7] & 0x3) { - /* dr0 is enabled */ - if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) - return (EINVAL); - } - - if (dbregs->dr[7] & (0x3<<2)) { - /* dr1 is enabled */ - if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) - return (EINVAL); - } - - if (dbregs->dr[7] & (0x3<<4)) { - /* dr2 is enabled */ - if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) - return (EINVAL); - } - - if (dbregs->dr[7] & (0x3<<6)) { - /* dr3 is enabled */ - if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) - return (EINVAL); - } - } - - pcb->pcb_dr0 = dbregs->dr[0]; - pcb->pcb_dr1 = dbregs->dr[1]; - pcb->pcb_dr2 = dbregs->dr[2]; - pcb->pcb_dr3 = dbregs->dr[3]; - pcb->pcb_dr6 = dbregs->dr[6]; - pcb->pcb_dr7 = dbregs->dr[7]; - - pcb->pcb_flags |= PCB_DBREGS; - } - - return (0); -} - -/* - * Return > 0 if a hardware breakpoint has been hit, and the - * breakpoint was in user space. Return 0, otherwise. - */ -int -user_dbreg_trap(void) -{ - u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ - u_int32_t bp; /* breakpoint bits extracted from dr6 */ - int nbp; /* number of breakpoints that triggered */ - caddr_t addr[4]; /* breakpoint addresses */ - int i; - - dr7 = rdr7(); - if ((dr7 & 0x000000ff) == 0) { - /* - * all GE and LE bits in the dr7 register are zero, - * thus the trap couldn't have been caused by the - * hardware debug registers - */ - return 0; - } - - nbp = 0; - dr6 = rdr6(); - bp = dr6 & 0x0000000f; - - if (!bp) { - /* - * None of the breakpoint bits are set meaning this - * trap was not caused by any of the debug registers - */ - return 0; - } - - /* - * at least one of the breakpoints were hit, check to see - * which ones and if any of them are user space addresses - */ - - if (bp & 0x01) { - addr[nbp++] = (caddr_t)rdr0(); - } - if (bp & 0x02) { - addr[nbp++] = (caddr_t)rdr1(); - } - if (bp & 0x04) { - addr[nbp++] = (caddr_t)rdr2(); - } - if (bp & 0x08) { - addr[nbp++] = (caddr_t)rdr3(); - } - - for (i=0; i<nbp; i++) { - if (addr[i] < - (caddr_t)VM_MAXUSER_ADDRESS) { - /* - * addr[i] is in user space - */ - return nbp; - } - } - - /* - * None of the breakpoints are in user space. - */ - return 0; -} - -#ifndef DEV_APIC -#include <machine/apicvar.h> - -/* - * Provide stub functions so that the MADT APIC enumerator in the acpi - * kernel module will link against a kernel without 'device apic'. - * - * XXX - This is a gross hack. - */ -void -apic_register_enumerator(struct apic_enumerator *enumerator) -{ -} - -void * -ioapic_create(uintptr_t addr, int32_t id, int intbase) -{ - return (NULL); -} - -int -ioapic_disable_pin(void *cookie, u_int pin) -{ - return (ENXIO); -} - -int -ioapic_get_vector(void *cookie, u_int pin) -{ - return (-1); -} - -void -ioapic_register(void *cookie) -{ -} - -int -ioapic_remap_vector(void *cookie, u_int pin, int vector) -{ - return (ENXIO); -} - -int -ioapic_set_extint(void *cookie, u_int pin) -{ - return (ENXIO); -} - -int -ioapic_set_nmi(void *cookie, u_int pin) -{ - return (ENXIO); -} - -int -ioapic_set_polarity(void *cookie, u_int pin,enum intr_polarity pol ) -{ - return (ENXIO); -} - -int -ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger ) -{ - return (ENXIO); -} - -void -lapic_create(u_int apic_id, int boot_cpu) -{ -} - -void -lapic_init(uintptr_t addr) -{ -} - -int -lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode) -{ - return (ENXIO); -} - -int -lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) -{ - return (ENXIO); -} - -int -lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) -{ - return (ENXIO); -} -#endif - -#ifdef KDB - -/* - * Provide inb() and outb() as functions. They are normally only - * available as macros calling inlined functions, thus cannot be - * called from the debugger. - * - * The actual code is stolen from <machine/cpufunc.h>, and de-inlined. - */ - -#undef inb -#undef outb - -/* silence compiler warnings */ -u_char inb(u_int); -void outb(u_int, u_char); - -u_char -inb(u_int port) -{ - u_char data; - /* - * We use %%dx and not %1 here because i/o is done at %dx and not at - * %edx, while gcc generates inferior code (movw instead of movl) - * if we tell it to load (u_short) port. - */ - __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); - return (data); -} - -void -outb(u_int port, u_char data) -{ - u_char al; - /* - * Use an unnecessary assignment to help gcc's register allocator. - * This make a large difference for gcc-1.40 and a tiny difference - * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for - * best results. gcc-2.6.0 can't handle this. - */ - al = data; - __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); -} - -#endif /* KDB */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_clock.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,150 +0,0 @@ -/*- - * ---------------------------------------------------------------------------- - * "THE BEER-WARE LICENSE" (Revision 42): - * <phk@xxxxxxxxxxx> wrote this file. As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you think - * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp - * ---------------------------------------------------------------------------- - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/mp_clock.c,v 1.19 2004/05/30 20:34:57 phk Exp $"); - -/*- - * Just when we thought life were beautiful, reality pops its grim face over - * the edge again: - * - * ] 20. ACPI Timer Errata - * ] - * ] Problem: The power management timer may return improper result when - * ] read. Although the timer value settles properly after incrementing, - * ] while incrementing there is a 3nS window every 69.8nS where the - * ] timer value is indeterminate (a 4.2% chance that the data will be - * ] incorrect when read). As a result, the ACPI free running count up - * ] timer specification is violated due to erroneous reads. Implication: - * ] System hangs due to the "inaccuracy" of the timer when used by - * ] software for time critical events and delays. - * ] - * ] Workaround: Read the register twice and compare. - * ] Status: This will not be fixed in the PIIX4 or PIIX4E. - * - * The counter is in other words not latched to the PCI bus clock when - * read. Notice the workaround isn't: We need to read until we have - * three monotonic samples and then use the middle one, otherwise we are - * not protected against the fact that the bits can be wrong in two - * directions. If we only cared about monosity two reads would be enough. - */ - -/* #include "opt_bus.h" */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/timetc.h> -#include <sys/kernel.h> -#include <sys/module.h> -#include <sys/sysctl.h> -#include <sys/bus.h> - -#include <dev/pci/pcireg.h> -#include <dev/pci/pcivar.h> - -static unsigned piix_get_timecount(struct timecounter *tc); - -static u_int32_t piix_timecounter_address; -static u_int piix_freq = 14318182/4; - -static struct timecounter piix_timecounter = { - piix_get_timecount, /* get_timecount */ - 0, /* no poll_pps */ - 0xffffff, /* counter_mask */ - 0, /* frequency */ - "PIIX" /* name */ -}; - - -static int -sysctl_machdep_piix_freq(SYSCTL_HANDLER_ARGS) -{ - int error; - u_int freq; - - if (piix_timecounter.tc_frequency == 0) - return (EOPNOTSUPP); - freq = piix_freq; - error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); - if (error == 0 && req->newptr != NULL) { - piix_freq = freq; - piix_timecounter.tc_frequency = piix_freq; - } - return (error); -} - -SYSCTL_PROC(_machdep, OID_AUTO, piix_freq, CTLTYPE_INT | CTLFLAG_RW, - 0, sizeof(u_int), sysctl_machdep_piix_freq, "I", ""); - -static unsigned -piix_get_timecount(struct timecounter *tc) -{ - unsigned u1, u2, u3; - - u2 = inl(piix_timecounter_address); - u3 = inl(piix_timecounter_address); - do { - u1 = u2; - u2 = u3; - u3 = inl(piix_timecounter_address); - } while (u1 > u2 || u2 > u3); - return (u2); -} - -static int -piix_probe(device_t dev) -{ - u_int32_t d; - - if (devclass_get_device(devclass_find("acpi"), 0) != NULL) - return (ENXIO); - switch (pci_get_devid(dev)) { - case 0x71138086: - device_set_desc(dev, "PIIX Timecounter"); - break; - default: - return (ENXIO); - } - - d = pci_read_config(dev, PCIR_COMMAND, 2); - if (!(d & PCIM_CMD_PORTEN)) { - device_printf(dev, "PIIX I/O space not mapped\n"); - return (ENXIO); - } - return (0); -} - -static int -piix_attach(device_t dev) -{ - u_int32_t d; - - d = pci_read_config(dev, 0x40, 4); - piix_timecounter_address = (d & 0xffc0) + 8; - piix_timecounter.tc_frequency = piix_freq; - tc_init(&piix_timecounter); - return (0); -} - -static device_method_t piix_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, piix_probe), - DEVMETHOD(device_attach, piix_attach), - { 0, 0 } -}; - -static driver_t piix_driver = { - "piix", - piix_methods, - 1, -}; - -static devclass_t piix_devclass; - -DRIVER_MODULE(piix, pci, piix_driver, piix_devclass, 0, 0); diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1487 +0,0 @@ -/*- - * Copyright (c) 1996, by Steve Passe - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. The name of the developer may NOT be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.235.2.3 2004/09/24 15:02:33 rik Exp $"); - -#include "opt_apic.h" -#include "opt_cpu.h" -#include "opt_kstack_pages.h" -#include "opt_mp_watchdog.h" - -#if !defined(lint) -#if !defined(SMP) -#error How did you get here? -#endif - -#if defined(I386_CPU) && !defined(COMPILING_LINT) -#error SMP not supported with I386_CPU -#endif -#if 0 -#ifndef DEV_APIC -#error The apic device is required for SMP, add "device apic" to your config file. -#endif -#endif -#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) -#error SMP not supported with CPU_DISABLE_CMPXCHG -#endif -#endif /* not lint */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bus.h> -#include <sys/cons.h> /* cngetc() */ -#ifdef GPROF -#include <sys/gmon.h> -#endif -#include <sys/kernel.h> -#include <sys/ktr.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/memrange.h> -#include <sys/mutex.h> -#include <sys/pcpu.h> -#include <sys/proc.h> -#include <sys/smp.h> -#include <sys/sysctl.h> - -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/pmap.h> -#include <vm/vm_kern.h> -#include <vm/vm_extern.h> - -#include <machine/apicreg.h> -#include <machine/clock.h> -#include <machine/md_var.h> -#include <machine/mp_watchdog.h> -#include <machine/pcb.h> -#include <machine/smp.h> -#include <machine/smptests.h> /** COUNT_XINVLTLB_HITS */ -#include <machine/specialreg.h> -#include <machine/privatespace.h> - - -/* XEN includes */ -#include <machine/xenfunc.h> -#include <machine/xen_intr.h> - -void Xhypervisor_callback(void); -void failsafe_callback(void); - -/***************/ - - -#define WARMBOOT_TARGET 0 -#define WARMBOOT_OFF (KERNBASE + 0x0467) -#define WARMBOOT_SEG (KERNBASE + 0x0469) - -#define CMOS_REG (0x70) -#define CMOS_DATA (0x71) -#define BIOS_RESET (0x0f) -#define BIOS_WARM (0x0a) - - -#undef POSTCODE -#define POSTCODE(x) - -/* - * this code MUST be enabled here and in mpboot.s. - * it follows the very early stages of AP boot by placing values in CMOS ram. - * it NORMALLY will never be needed and thus the primitive method for enabling. - * -#define CHECK_POINTS - */ - -#if defined(CHECK_POINTS) && !defined(PC98) -#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) -#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) - -#define CHECK_INIT(D); \ - CHECK_WRITE(0x34, (D)); \ - CHECK_WRITE(0x35, (D)); \ - CHECK_WRITE(0x36, (D)); \ - CHECK_WRITE(0x37, (D)); \ - CHECK_WRITE(0x38, (D)); \ - CHECK_WRITE(0x39, (D)); - -#define CHECK_PRINT(S); \ - printf("%s: %d, %d, %d, %d, %d, %d\n", \ - (S), \ - CHECK_READ(0x34), \ - CHECK_READ(0x35), \ - CHECK_READ(0x36), \ - CHECK_READ(0x37), \ - CHECK_READ(0x38), \ - CHECK_READ(0x39)); - -#else /* CHECK_POINTS */ - -#define CHECK_INIT(D) -#define CHECK_PRINT(S) -#define CHECK_WRITE(A, D) - -#endif /* CHECK_POINTS */ - -/* - * Values to send to the POST hardware. - */ -#define MP_BOOTADDRESS_POST 0x10 -#define MP_PROBE_POST 0x11 -#define MPTABLE_PASS1_POST 0x12 - -#define MP_START_POST 0x13 -#define MP_ENABLE_POST 0x14 -#define MPTABLE_PASS2_POST 0x15 - -#define START_ALL_APS_POST 0x16 -#define INSTALL_AP_TRAMP_POST 0x17 -#define START_AP_POST 0x18 - -#define MP_ANNOUNCE_POST 0x19 - -/* lock region used by kernel profiling */ -int mcount_lock; - -/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ -int current_postcode; - -int mp_naps; /* # of Applications processors */ -int boot_cpu_id = -1; /* designated BSP */ -extern int nkpt; - -/* - * CPU topology map datastructures for HTT. - */ -static struct cpu_group mp_groups[MAXCPU]; -static struct cpu_top mp_top; - -/* AP uses this during bootstrap. Do not staticize. */ -char *bootSTK; -static int bootAP; - -/* Hotwire a 0->4MB V==P mapping */ -extern pt_entry_t *KPTphys; - -/* SMP page table page */ -extern pt_entry_t *SMPpt; - -extern trap_info_t trap_table[]; - -struct pcb stoppcbs[MAXCPU]; - -/* Variables needed for SMP tlb shootdown. */ -vm_offset_t smp_tlb_addr1; -vm_offset_t smp_tlb_addr2; -volatile int smp_tlb_wait; - -/* - * Local data and functions. - */ - -static u_int logical_cpus; - -/* used to hold the AP's until we are ready to release them */ -static struct mtx ap_boot_mtx; - -/* Set to 1 once we're ready to let the APs out of the pen. */ -static volatile int aps_ready = 0; - -/* - * Store data from cpu_add() until later in the boot when we actually setup - * the APs. - */ -struct cpu_info { - int cpu_present:1; - int cpu_bsp:1; -} static cpu_info[MAXCPU]; -static int cpu_apic_ids[MAXCPU]; - -static u_int boot_address; - -static void set_logical_apic_ids(void); -static int start_all_aps(void); -#if 0 -static void install_ap_tramp(void); -#endif -static int start_ap(int apic_id); -static void release_aps(void *dummy); - -static int hlt_logical_cpus; -static struct sysctl_ctx_list logical_cpu_clist; - -static void -mem_range_AP_init(void) -{ - if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) - mem_range_softc.mr_op->initAP(&mem_range_softc); -} - -void -mp_topology(void) -{ - struct cpu_group *group; - int logical_cpus; - int apic_id; - int groups; - int cpu; - - /* Build the smp_topology map. */ - /* Nothing to do if there is no HTT support. */ - if ((cpu_feature & CPUID_HTT) == 0) - return; - logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; - if (logical_cpus <= 1) - return; - group = &mp_groups[0]; - groups = 1; - for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) { - if (!cpu_info[apic_id].cpu_present) - continue; - /* - * If the current group has members and we're not a logical - * cpu, create a new group. - */ - if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) { - group++; - groups++; - } - group->cg_count++; - group->cg_mask |= 1 << cpu; - cpu++; - } - - mp_top.ct_count = groups; - mp_top.ct_group = mp_groups; - smp_topology = &mp_top; -} - - -/* - * Calculate usable address in base memory for AP trampoline code. - */ -u_int -mp_bootaddress(u_int basemem) -{ - POSTCODE(MP_BOOTADDRESS_POST); - - boot_address = trunc_page(basemem); /* round down to 4k boundary */ - if ((basemem - boot_address) < bootMP_size) - boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ - - return boot_address; -} - -void -cpu_add(u_int apic_id, char boot_cpu) -{ - - if (apic_id >= MAXCPU) { - printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n", - apic_id, MAXCPU - 1); - return; - } - KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", - apic_id)); - cpu_info[apic_id].cpu_present = 1; - if (boot_cpu) { - KASSERT(boot_cpu_id == -1, - ("CPU %d claims to be BSP, but CPU %d already is", apic_id, - boot_cpu_id)); - boot_cpu_id = apic_id; - cpu_info[apic_id].cpu_bsp = 1; - } - mp_ncpus++; - if (bootverbose) - printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : - "AP"); - -} - -void -cpu_mp_setmaxid(void) -{ - - mp_maxid = MAXCPU - 1; -} - -int -cpu_mp_probe(void) -{ - - mp_ncpus = HYPERVISOR_shared_info->n_vcpu; - /* - * Always record BSP in CPU map so that the mbuf init code works - * correctly. - */ - all_cpus = 1; - if (mp_ncpus == 0) { - /* - * No CPUs were found, so this must be a UP system. Setup - * the variables to represent a system with a single CPU - * with an id of 0. - */ - mp_ncpus = 1; - return (0); - } - - /* At least one CPU was found. */ - if (mp_ncpus == 1) { - /* - * One CPU was found, so this must be a UP system with - * an I/O APIC. - */ - return (0); - } - - /* At least two CPUs were found. */ - return (1); -} - -static void -cpu_mp_ipi_init(void) -{ - int irq; - int cpu = smp_processor_id(); - /* - * these are not needed by XenFreeBSD - from Keir: - * For TLB-flush related IPIs, Xen has hypercalls - * you should use instead. You can pass a pointer - * to a vcpu bitmap to update_va_mapping(), and to - * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi. - * Xen will then make sure that those vcpus get - * flushed appropriately before returning to the - * caller. - * There is also no indication that we need to forward - * clock interrupts. - */ -#if 0 - /* Install an inter-CPU IPI for TLB invalidation */ - setidt(IPI_INVLTLB, IDTVEC(invltlb), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(IPI_INVLPG, IDTVEC(invlpg), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - setidt(IPI_INVLRNG, IDTVEC(invlrng), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - - /* Install an inter-CPU IPI for forwarding hardclock() */ - setidt(IPI_HARDCLOCK, IDTVEC(hardclock), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - - /* Install an inter-CPU IPI for forwarding statclock() */ - setidt(IPI_STATCLOCK, IDTVEC(statclock), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#endif - - /* - * These can all be consolidated. For now leaving - * as individual IPIs. - * - */ -#if 0 - /* Install an inter-CPU IPI for lazy pmap release */ - setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#else - irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP); - PCPU_SET(lazypmap, irq); - PANIC_IF(intr_add_handler("pmap_lazyfix", irq, - (driver_intr_t *)pmap_lazyfix_action, - NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); -#endif - -#if 0 - /* Install an inter-CPU IPI for all-CPU rendezvous */ - setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#else - irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS); - PCPU_SET(rendezvous, irq); - PANIC_IF(intr_add_handler("smp_rendezvous", irq, - (driver_intr_t *)smp_rendezvous_action, - NULL, INTR_TYPE_CLK | INTR_FAST, NULL)); -#endif - -#if 0 - /* Install an inter-CPU IPI for forcing an additional software trap */ - setidt(IPI_AST, IDTVEC(cpuast), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#else - irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST); - PCPU_SET(cpuast, irq); -#endif - /* XXX ignore for now */ -#if 0 - /* Install an inter-CPU IPI for CPU stop/restart */ - setidt(IPI_STOP, IDTVEC(cpustop), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#endif - -} - -SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL); - -/* - * Initialize the IPI handlers and start up the AP's. - */ -void -cpu_mp_start(void) /* --- Start here --- */ -{ - int i; - - POSTCODE(MP_START_POST); - - /* Initialize the logical ID to APIC ID table. */ - for (i = 0; i < MAXCPU; i++) - cpu_apic_ids[i] = -1; - - - /* Set boot_cpu_id if needed. */ - if (boot_cpu_id == -1) { - boot_cpu_id = PCPU_GET(apic_id); - cpu_info[boot_cpu_id].cpu_bsp = 1; - } else - KASSERT(boot_cpu_id == PCPU_GET(apic_id), - ("BSP's APIC ID doesn't match boot_cpu_id")); - cpu_apic_ids[0] = boot_cpu_id; - - /* Start each Application Processor */ - start_all_aps(); - - /* Setup the initial logical CPUs info. */ - logical_cpus = logical_cpus_mask = 0; - if (cpu_feature & CPUID_HTT) - logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; - - set_logical_apic_ids(); -} - - -/* - * Print various information about the SMP system hardware and setup. - */ -void -cpu_mp_announce(void) -{ - int i, x; - - POSTCODE(MP_ANNOUNCE_POST); - - /* List CPUs */ - printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); - for (i = 1, x = 0; x < MAXCPU; x++) { - if (cpu_info[x].cpu_present && !cpu_info[x].cpu_bsp) { - KASSERT(i < mp_ncpus, - ("mp_ncpus and actual cpus are out of whack")); - printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); - } - } -} - -/* - * AP CPU's call this to initialize themselves. - */ -void -init_secondary(void) -{ - int myid; - unsigned long gdtmachpfn; - printk("MADE IT!!"); - -#if 0 - u_int cr0; -#endif - /* Steps to booting SMP on xen as gleaned from XenLinux: - * - cpu_init() - processor specific initialization - * - smp_callin() - * - wait 2s for BP to finish its startup sequence - * - map_cpu_to_logical_apicid() - * - save cpuid info - * - set bit in callin map to let master (BP?) continue - * - local setup timer() - per cpu timer initialization - * - ldebug_setup() - bind debug IRQ to local CPU. - * - smp_intr_init() - IPI setup that we do in cpu_mp_start - * - local_irq_enable() - enable interrupts locally - * - cpu_set(id, map) - announce that we're up - * - cpu_idle() - make us schedulable - */ - - - /* bootAP is set in start_ap() to our ID. */ - myid = bootAP; - - gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; - PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0); - - - lgdt_finish(); - - PCPU_SET(cpuid, myid); - - - set_user_ldt((struct mdproc *)_default_ldt); - PCPU_SET(currentldt, _default_ldt); - - PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ - PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); - PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); - PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); - PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); -#if 0 - ltr(gsel_tss); - - /* - * Set to a known state: - * Set by mpboot.s: CR0_PG, CR0_PE - * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM - */ - cr0 = rcr0(); - cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); - load_cr0(cr0); -#endif - CHECK_WRITE(0x38, 5); - - /* Disable local APIC just to be sure. */ - lapic_disable(); - - /* signal our startup to the BSP. */ - mp_naps++; - CHECK_WRITE(0x39, 6); - - /* Spin until the BSP releases the AP's. */ - while (!aps_ready) - ia32_pause(); - - /* BSP may have changed PTD while we were waiting */ - invltlb(); - pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); - -#if defined(I586_CPU) && !defined(NO_F00F_HACK) - lidt(&r_idt); -#endif - - /* set up CPU registers and state */ - cpu_setregs(); - - /* set up FPU state on the AP */ - npxinit(__INITIAL_NPXCW__); - - /* set up SSE registers */ - enable_sse(); - - /* A quick check from sanity claus */ - if (PCPU_GET(apic_id) != lapic_id()) { - printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); - printf("SMP: actual apic_id = %d\n", lapic_id()); - printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); - printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]); - panic("cpuid mismatch! boom!!"); - } - - mtx_lock_spin(&ap_boot_mtx); - - /* Init local apic for irq's */ - lapic_setup(); - - /* Set memory range attributes for this CPU to match the BSP */ - mem_range_AP_init(); - - smp_cpus++; - - CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); - printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); - - /* Determine if we are a logical CPU. */ - if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) - logical_cpus_mask |= PCPU_GET(cpumask); - - /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); - - if (bootverbose) - lapic_dump("AP"); - - if (smp_cpus == mp_ncpus) { - /* enable IPI's, tlb shootdown, freezes etc */ - atomic_store_rel_int(&smp_started, 1); - smp_active = 1; /* historic */ - } - - mtx_unlock_spin(&ap_boot_mtx); - - /* wait until all the AP's are up */ - while (smp_started == 0) - ia32_pause(); - - /* need to wait until now to setup the IPIs as SI_SUB_CPU is - * much earlier than SI_SUB_INTR - */ - ap_evtchn_init(myid); - ap_cpu_initclocks(); - cpu_mp_ipi_init(); - - /* ok, now grab sched_lock and enter the scheduler */ - mtx_lock_spin(&sched_lock); - - binuptime(PCPU_PTR(switchtime)); - PCPU_SET(switchticks, ticks); - - cpu_throw(NULL, choosethread()); /* doesn't return */ - - panic("scheduler returned us to %s", __func__); - /* NOTREACHED */ -} - -/******************************************************************* - * local functions and data - */ - -/* - * Set the APIC logical IDs. - * - * We want to cluster logical CPU's within the same APIC ID cluster. - * Since logical CPU's are aligned simply filling in the clusters in - * APIC ID order works fine. Note that this does not try to balance - * the number of CPU's in each cluster. (XXX?) - */ -static void -set_logical_apic_ids(void) -{ - u_int apic_id, cluster, cluster_id; - - /* Force us to allocate cluster 0 at the start. */ - cluster = -1; - cluster_id = APIC_MAX_INTRACLUSTER_ID; - for (apic_id = 0; apic_id < MAXCPU; apic_id++) { - if (!cpu_info[apic_id].cpu_present) - continue; - if (cluster_id == APIC_MAX_INTRACLUSTER_ID) { - cluster = ioapic_next_logical_cluster(); - cluster_id = 0; - } else - cluster_id++; - if (bootverbose) - printf("APIC ID: physical %u, logical %u:%u\n", - apic_id, cluster, cluster_id); - lapic_set_logical_id(apic_id, cluster, cluster_id); - } -} - -/* - * start each AP in our list - */ -static int -start_all_aps(void) -{ - struct pcpu *pc; - char *stack; - int i, apic_id, cpu; - - /* - * This function corresponds most closely to - * smp_boot_cpus in XenLinux - the sequence there - * is: - * - check if SMP config is found - if not: - * - clear the I/O APIC IRQs - * - map cpu to logical apicid - * - exit - * - smp_intr_init - IPI initialization - * - map cpu to logical apicid - * - boot each of the vcpus - * - clear and then construct the cpu sibling [logical CPUs] map. - * - */ - - POSTCODE(START_ALL_APS_POST); - - mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); -#if 0 - /* install the AP 1st level boot code */ - install_ap_tramp(); - - /* save the current value of the warm-start vector */ - mpbioswarmvec = *((u_long *) WARMBOOT_OFF); - - - /* set up temporary P==V mapping for AP boot */ - /* XXX this is a hack, we should boot the AP on its own stack/PTD */ - kptbase = (uintptr_t)(void *)KPTphys; - for (i = 0; i < NKPT; i++) - PTD[i] = (pd_entry_t)(PG_V | PG_RW | - ((kptbase + i * PAGE_SIZE) & PG_FRAME)); - invltlb(); -#endif - /* start each AP */ - for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) { - if (!cpu_info[apic_id].cpu_present || - cpu_info[apic_id].cpu_bsp) - continue; - cpu++; - - /* save APIC ID for this logical ID */ - cpu_apic_ids[cpu] = apic_id; -#if 0 - /* first page of AP's private space */ - pg = cpu * i386_btop(sizeof(struct privatespace)); - - /* allocate a new private data page */ - pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE); - - /* wire it into the private page table page */ - SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc)); - - /* allocate and set up an idle stack data page */ - stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */ - for (i = 0; i < KSTACK_PAGES; i++) - SMPpt[pg + 1 + i] = (pt_entry_t) - (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); -#endif - pc = &SMP_prvspace[cpu].pcpu; - - /* prime data page for it to use */ - pcpu_init(pc, cpu, sizeof(struct pcpu)); - pc->pc_apic_id = apic_id; - -#if 0 - /* setup a vector to our boot code */ - *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; - *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); -#ifndef PC98 - outb(CMOS_REG, BIOS_RESET); - outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ -#endif -#endif - bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES * - PAGE_SIZE]; - bootAP = cpu; - - /* attempt to start the Application Processor */ - CHECK_INIT(99); /* setup checkpoints */ - if (!start_ap(apic_id)) { - printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); - CHECK_PRINT("trace"); /* show checkpoints */ - /* better panic as the AP may be running loose */ - printf("panic y/n? [y] "); - if (cngetc() != 'n') - panic("bye-bye"); - } - CHECK_PRINT("trace"); /* show checkpoints */ - - all_cpus |= (1 << cpu); /* record AP in CPU map */ - } - - /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); - -#if 0 - /* restore the warmstart vector */ - *(u_long *) WARMBOOT_OFF = mpbioswarmvec; -#endif - /* - * Set up the idle context for the BSP. Similar to above except - * that some was done by locore, some by pmap.c and some is implicit - * because the BSP is cpu#0 and the page is initially zero and also - * because we can refer to variables by name on the BSP.. - */ - - /* Allocate and setup BSP idle stack */ - stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); - for (i = 0; i < KSTACK_PAGES; i++) - SMPpt[1 + i] = (pt_entry_t) - (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); - - for (i = 0; i < NKPT; i++) - PTD[i] = 0; - pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); - - /* number of APs actually started */ - return mp_naps; -} - -/* - * load the 1st level AP boot code into base memory. - */ - -/* targets for relocation */ -extern void bigJump(void); -extern void bootCodeSeg(void); -extern void bootDataSeg(void); -extern void MPentry(void); -extern u_int MP_GDT; -extern u_int mp_gdtbase; -#if 0 -static void -install_ap_tramp(void) -{ - int x; - int size = *(int *) ((u_long) & bootMP_size); - vm_offset_t va = boot_address + KERNBASE; - u_char *src = (u_char *) ((u_long) bootMP); - u_char *dst = (u_char *) va; - u_int boot_base = (u_int) bootMP; - u_int8_t *dst8; - u_int16_t *dst16; - u_int32_t *dst32; - - POSTCODE(INSTALL_AP_TRAMP_POST); - - KASSERT (size <= PAGE_SIZE, - ("'size' do not fit into PAGE_SIZE, as expected.")); - pmap_kenter(va, boot_address); - pmap_invalidate_page (kernel_pmap, va); - for (x = 0; x < size; ++x) - *dst++ = *src++; - - /* - * modify addresses in code we just moved to basemem. unfortunately we - * need fairly detailed info about mpboot.s for this to work. changes - * to mpboot.s might require changes here. - */ - - /* boot code is located in KERNEL space */ - dst = (u_char *) va; - - /* modify the lgdt arg */ - dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); - *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); - - /* modify the ljmp target for MPentry() */ - dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); - *dst32 = ((u_int) MPentry - KERNBASE); - - /* modify the target for boot code segment */ - dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); - dst8 = (u_int8_t *) (dst16 + 1); - *dst16 = (u_int) boot_address & 0xffff; - *dst8 = ((u_int) boot_address >> 16) & 0xff; - - /* modify the target for boot data segment */ - dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); - dst8 = (u_int8_t *) (dst16 + 1); - *dst16 = (u_int) boot_address & 0xffff; - *dst8 = ((u_int) boot_address >> 16) & 0xff; -} -#endif - -static void -cpu_mp_trap_init(trap_info_t *trap_ctxt) -{ - trap_info_t *t = trap_table; - - for (t = trap_table; t->address; t++) { - trap_ctxt[t->vector].flags = t->flags; - trap_ctxt[t->vector].cs = t->cs; - trap_ctxt[t->vector].address = t->address; - } -} - -/* - * This function starts the AP (application processor) identified - * by the APIC ID 'physicalCpu'. It does quite a "song and dance" - * to accomplish this. This is necessary because of the nuances - * of the different hardware we might encounter. It isn't pretty, - * but it seems to work. - */ -static int -start_ap(int apic_id) -{ - int vector, ms, i; - int cpus, boot_error; - vcpu_guest_context_t ctxt; - - /* - * This is the FreeBSD equivalent to do_boot_cpu(apicid) in - * smpboot.c. - * its initialization sequence consists of: - * - fork_idle(cpu) to create separate idle context - * - initialization of idle's context to start_secondary - * - initialization of cpu ctxt to start in startup_32_smp - * - then we call HYPERVISOR_boot_vcpu with the cpu index and - * a pointer to the context. - * - on boot success we: - * - set ourselves in the callout_map - * - wait up to 5 seconds for us to be set in the callin map - * - set x86_cpu_to_apicid[cpu] = apicid; - * - */ - - POSTCODE(START_AP_POST); - - /* calculate the vector */ - vector = (boot_address >> 12) & 0xff; - - /* used as a watchpoint to signal AP startup */ - cpus = mp_naps; - - memset(&ctxt, 0, sizeof(ctxt)); - - ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); - ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); - ctxt.user_regs.fs = 0; - ctxt.user_regs.gs = 0; - ctxt.user_regs.ss = __KERNEL_DS; - ctxt.user_regs.cs = __KERNEL_CS; - ctxt.user_regs.eip = (unsigned long)init_secondary; - ctxt.user_regs.esp = (unsigned long)bootSTK; -#ifdef notyet - ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12); -#else - ctxt.user_regs.eflags = (1<<9) | (1<<2); -#endif - /* FPU is set up to default initial state. */ - memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); - - /* Virtual IDT is empty at start-of-day. */ - for ( i = 0; i < 256; i++ ) - { - ctxt.trap_ctxt[i].vector = i; - ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS; - } - cpu_mp_trap_init(ctxt.trap_ctxt); - - /* No LDT. */ - ctxt.ldt_ents = 0; - - /* Ring 1 stack is the initial stack. */ - ctxt.kernel_ss = __KERNEL_DS; - ctxt.kernel_sp = (unsigned long)bootSTK; - - /* Callback handlers. */ - ctxt.event_callback_cs = __KERNEL_CS; - ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; - ctxt.failsafe_callback_cs = __KERNEL_CS; - ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; - - ctxt.ctrlreg[3] = (vm_paddr_t)IdlePTD; - - boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt); - - - if (boot_error) - printk("Houston we have a problem\n"); - else - printk("boot_vcpu succeeded\n"); -#if 0 - /* - * first we do an INIT/RESET IPI this INIT IPI might be run, reseting - * and running the target CPU. OR this INIT IPI might be latched (P5 - * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be - * ignored. - */ - - /* do an INIT IPI: assert RESET */ - lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | - APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); - - /* wait for pending status end */ - lapic_ipi_wait(-1); - - /* do an INIT IPI: deassert RESET */ - lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | - APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); - - /* wait for pending status end */ - DELAY(10000); /* wait ~10mS */ - lapic_ipi_wait(-1); - - /* - * next we do a STARTUP IPI: the previous INIT IPI might still be - * latched, (P5 bug) this 1st STARTUP would then terminate - * immediately, and the previously started INIT IPI would continue. OR - * the previous INIT IPI has already run. and this STARTUP IPI will - * run. OR the previous INIT IPI was ignored. and this STARTUP IPI - * will run. - */ - - /* do a STARTUP IPI */ - lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | - APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | - vector, apic_id); - lapic_ipi_wait(-1); - DELAY(200); /* wait ~200uS */ - - /* - * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF - * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR - * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is - * recognized after hardware RESET or INIT IPI. - */ - - lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | - APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | - vector, apic_id); - lapic_ipi_wait(-1); -#endif - DELAY(200); /* wait ~200uS */ - - /* Wait up to 5 seconds for it to start. */ - for (ms = 0; ms < 5000; ms++) { - if (mp_naps > cpus) - return 1; /* return SUCCESS */ - DELAY(1000); - } - return 0; /* return FAILURE */ -} - -#ifdef COUNT_XINVLTLB_HITS -u_int xhits_gbl[MAXCPU]; -u_int xhits_pg[MAXCPU]; -u_int xhits_rng[MAXCPU]; -SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, - sizeof(xhits_gbl), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, - sizeof(xhits_pg), "IU", ""); -SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, - sizeof(xhits_rng), "IU", ""); - -u_int ipi_global; -u_int ipi_page; -u_int ipi_range; -u_int ipi_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, - 0, ""); - -u_int ipi_masked_global; -u_int ipi_masked_page; -u_int ipi_masked_range; -u_int ipi_masked_range_size; -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, - &ipi_masked_global, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, - &ipi_masked_page, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, - &ipi_masked_range, 0, ""); -SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, - &ipi_masked_range_size, 0, ""); -#endif /* COUNT_XINVLTLB_HITS */ - -/* - * Flush the TLB on all other CPU's - */ -static void -smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) -{ - u_int ncpu; - - ncpu = mp_ncpus - 1; /* does not shootdown self */ - if (ncpu < 1) - return; /* no other cpus */ - mtx_assert(&smp_rv_mtx, MA_OWNED); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - atomic_store_rel_int(&smp_tlb_wait, 0); - ipi_all_but_self(vector); - while (smp_tlb_wait < ncpu) - ia32_pause(); -} - -/* - * This is about as magic as it gets. fortune(1) has got similar code - * for reversing bits in a word. Who thinks up this stuff?? - * - * Yes, it does appear to be consistently faster than: - * while (i = ffs(m)) { - * m >>= i; - * bits++; - * } - * and - * while (lsb = (m & -m)) { // This is magic too - * m &= ~lsb; // or: m ^= lsb - * bits++; - * } - * Both of these latter forms do some very strange things on gcc-3.1 with - * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2. - * There is probably an SSE or MMX popcnt instruction. - * - * I wonder if this should be in libkern? - * - * XXX Stop the presses! Another one: - * static __inline u_int32_t - * popcnt1(u_int32_t v) - * { - * v -= ((v >> 1) & 0x55555555); - * v = (v & 0x33333333) + ((v >> 2) & 0x33333333); - * v = (v + (v >> 4)) & 0x0F0F0F0F; - * return (v * 0x01010101) >> 24; - * } - * The downside is that it has a multiply. With a pentium3 with - * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use - * an imull, and in that case it is faster. In most other cases - * it appears slightly slower. - * - * Another variant (also from fortune): - * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255) - * #define BX_(x) ((x) - (((x)>>1)&0x77777777) \ - * - (((x)>>2)&0x33333333) \ - * - (((x)>>3)&0x11111111)) - */ -static __inline u_int32_t -popcnt(u_int32_t m) -{ - - m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1); - m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2); - m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4); - m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8); - m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16); - return m; -} - -static void -smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) -{ - int ncpu, othercpus; - - othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) - return; - } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = popcnt(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) - return; - } - mtx_assert(&smp_rv_mtx, MA_OWNED); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (u_int)-1) - ipi_all_but_self(vector); - else - ipi_selected(mask, vector); - while (smp_tlb_wait < ncpu) - ia32_pause(); -} - -void -smp_invltlb(void) -{ - if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif - } -} - -void -smp_invlpg(vm_offset_t addr) -{ - if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } -} - -void -smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) -{ - if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -} - -void -smp_masked_invltlb(u_int mask) -{ - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; -#endif - } -} - -void -smp_masked_invlpg(u_int mask, vm_offset_t addr) -{ - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; -#endif - } -} - -void -smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) -{ - if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -} - - -/* - * For statclock, we send an IPI to all CPU's to have them call this - * function. - */ -void -forwarded_statclock(struct clockframe frame) -{ - struct thread *td; - - CTR0(KTR_SMP, "forwarded_statclock"); - td = curthread; - td->td_intr_nesting_level++; - if (profprocs != 0) - profclock(&frame); - if (pscnt == psdiv) - statclock(&frame); - td->td_intr_nesting_level--; -} - -void -forward_statclock(void) -{ - int map; - - CTR0(KTR_SMP, "forward_statclock"); - - if (!smp_started || cold || panicstr) - return; - - map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask); - if (map != 0) - ipi_selected(map, IPI_STATCLOCK); -} - -/* - * For each hardclock(), we send an IPI to all other CPU's to have them - * execute this function. It would be nice to reduce contention on - * sched_lock if we could simply peek at the CPU to determine the user/kernel - * state and call hardclock_process() on the CPU receiving the clock interrupt - * and then just use a simple IPI to handle any ast's if needed. - */ -void -forwarded_hardclock(struct clockframe frame) -{ - struct thread *td; - - CTR0(KTR_SMP, "forwarded_hardclock"); - td = curthread; - td->td_intr_nesting_level++; - hardclock_process(&frame); - td->td_intr_nesting_level--; -} - -void -forward_hardclock(void) -{ - u_int map; - - CTR0(KTR_SMP, "forward_hardclock"); - - if (!smp_started || cold || panicstr) - return; - - map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask); - if (map != 0) - ipi_selected(map, IPI_HARDCLOCK); -} - -/* - * send an IPI to a set of cpus. - */ -void -ipi_selected(u_int32_t cpus, u_int ipi) -{ - int cpu; - - CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); - while ((cpu = ffs(cpus)) != 0) { - cpu--; - KASSERT(cpu_apic_ids[cpu] != -1, - ("IPI to non-existent CPU %d", cpu)); - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); - cpus &= ~(1 << cpu); - } -} - -/* - * send an IPI INTerrupt containing 'vector' to all CPUs, including myself - */ -void -ipi_all(u_int ipi) -{ - - CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL); -} - -/* - * send an IPI to all CPUs EXCEPT myself - */ -void -ipi_all_but_self(u_int ipi) -{ - - CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); -} - -/* - * send an IPI to myself - */ -void -ipi_self(u_int ipi) -{ - - CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF); -} - -/* - * This is called once the rest of the system is up and running and we're - * ready to let the AP's out of the pen. - */ -static void -release_aps(void *dummy __unused) -{ - - if (mp_ncpus == 1) - return; - mtx_lock_spin(&sched_lock); - atomic_store_rel_int(&aps_ready, 1); - while (smp_started == 0) - ia32_pause(); - mtx_unlock_spin(&sched_lock); -} -SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); - -static int -sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) -{ - u_int mask; - int error; - - mask = hlt_cpus_mask; - error = sysctl_handle_int(oidp, &mask, 0, req); - if (error || !req->newptr) - return (error); - - if (logical_cpus_mask != 0 && - (mask & logical_cpus_mask) == logical_cpus_mask) - hlt_logical_cpus = 1; - else - hlt_logical_cpus = 0; - - if ((mask & all_cpus) == all_cpus) - mask &= ~(1<<0); - hlt_cpus_mask = mask; - return (error); -} -SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hlt_cpus, "IU", - "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); - -static int -sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) -{ - int disable, error; - - disable = hlt_logical_cpus; - error = sysctl_handle_int(oidp, &disable, 0, req); - if (error || !req->newptr) - return (error); - - if (disable) - hlt_cpus_mask |= logical_cpus_mask; - else - hlt_cpus_mask &= ~logical_cpus_mask; - - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); - - hlt_logical_cpus = disable; - return (error); -} - -static void -cpu_hlt_setup(void *dummy __unused) -{ - - if (logical_cpus_mask != 0) { - TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", - &hlt_logical_cpus); - sysctl_ctx_init(&logical_cpu_clist); - SYSCTL_ADD_PROC(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, - sysctl_hlt_logical_cpus, "IU", ""); - SYSCTL_ADD_UINT(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, - &logical_cpus_mask, 0, ""); - - if (hlt_logical_cpus) - hlt_cpus_mask |= logical_cpus_mask; - } -} -SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); - -int -mp_grab_cpu_hlt(void) -{ - u_int mask = PCPU_GET(cpumask); -#ifdef MP_WATCHDOG - u_int cpuid = PCPU_GET(cpuid); -#endif - int retval; - -#ifdef MP_WATCHDOG - ap_watchdog(cpuid); -#endif - - retval = mask & hlt_cpus_mask; - while (mask & hlt_cpus_mask) - __asm __volatile("sti; hlt" : : : "memory"); - return (retval); -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mptable.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,974 +0,0 @@ -/*- - * Copyright (c) 2003 John Baldwin <jhb@xxxxxxxxxxx> - * Copyright (c) 1996, by Steve Passe - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. The name of the developer may NOT be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/mptable.c,v 1.235.2.1 2004/09/28 16:24:09 jhb Exp $"); - -#include "opt_mptable_force_htt.h" -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bus.h> -#include <sys/kernel.h> -#include <sys/malloc.h> - -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/pmap.h> - -#include <machine/apicreg.h> -#include <machine/frame.h> -#include <machine/intr_machdep.h> -#include <machine/apicvar.h> -#include <machine/md_var.h> -#include <machine/mptable.h> -#include <machine/specialreg.h> - -#include <dev/pci/pcivar.h> - -/* string defined by the Intel MP Spec as identifying the MP table */ -#define MP_SIG 0x5f504d5f /* _MP_ */ - -#define NAPICID 32 /* Max number of APIC's */ - -#ifdef PC98 -#define BIOS_BASE (0xe8000) -#define BIOS_SIZE (0x18000) -#else -#define BIOS_BASE (0xf0000) -#define BIOS_SIZE (0x10000) -#endif -#define BIOS_COUNT (BIOS_SIZE/4) - -typedef void mptable_entry_handler(u_char *entry, void *arg); - -static basetable_entry basetable_entry_types[] = -{ - {0, 20, "Processor"}, - {1, 8, "Bus"}, - {2, 8, "I/O APIC"}, - {3, 8, "I/O INT"}, - {4, 8, "Local INT"} -}; - -typedef struct BUSDATA { - u_char bus_id; - enum busTypes bus_type; -} bus_datum; - -typedef struct INTDATA { - u_char int_type; - u_short int_flags; - u_char src_bus_id; - u_char src_bus_irq; - u_char dst_apic_id; - u_char dst_apic_int; - u_char int_vector; -} io_int, local_int; - -typedef struct BUSTYPENAME { - u_char type; - char name[7]; -} bus_type_name; - -/* From MP spec v1.4, table 4-8. */ -static bus_type_name bus_type_table[] = -{ - {UNKNOWN_BUSTYPE, "CBUS "}, - {UNKNOWN_BUSTYPE, "CBUSII"}, - {EISA, "EISA "}, - {UNKNOWN_BUSTYPE, "FUTURE"}, - {UNKNOWN_BUSTYPE, "INTERN"}, - {ISA, "ISA "}, - {UNKNOWN_BUSTYPE, "MBI "}, - {UNKNOWN_BUSTYPE, "MBII "}, - {MCA, "MCA "}, - {UNKNOWN_BUSTYPE, "MPI "}, - {UNKNOWN_BUSTYPE, "MPSA "}, - {UNKNOWN_BUSTYPE, "NUBUS "}, - {PCI, "PCI "}, - {UNKNOWN_BUSTYPE, "PCMCIA"}, - {UNKNOWN_BUSTYPE, "TC "}, - {UNKNOWN_BUSTYPE, "VL "}, - {UNKNOWN_BUSTYPE, "VME "}, - {UNKNOWN_BUSTYPE, "XPRESS"} -}; - -/* From MP spec v1.4, table 5-1. */ -static int default_data[7][5] = -{ -/* nbus, id0, type0, id1, type1 */ - {1, 0, ISA, 255, NOBUS}, - {1, 0, EISA, 255, NOBUS}, - {1, 0, EISA, 255, NOBUS}, - {1, 0, MCA, 255, NOBUS}, - {2, 0, ISA, 1, PCI}, - {2, 0, EISA, 1, PCI}, - {2, 0, MCA, 1, PCI} -}; - -struct pci_probe_table_args { - u_char bus; - u_char found; -}; - -struct pci_route_interrupt_args { - u_char bus; /* Source bus. */ - u_char irq; /* Source slot:pin. */ - int vector; /* Return value. */ -}; - -static mpfps_t mpfps; -static mpcth_t mpct; -static void *ioapics[NAPICID]; -static bus_datum *busses; -static int mptable_nioapics, mptable_nbusses, mptable_maxbusid; -static int pci0 = -1; - -MALLOC_DEFINE(M_MPTABLE, "MP Table", "MP Table Items"); - -static enum intr_polarity conforming_polarity(u_char src_bus, - u_char src_bus_irq); -static enum intr_trigger conforming_trigger(u_char src_bus, u_char src_bus_irq); -static enum intr_polarity intentry_polarity(int_entry_ptr intr); -static enum intr_trigger intentry_trigger(int_entry_ptr intr); -static int lookup_bus_type(char *name); -static void mptable_count_items(void); -static void mptable_count_items_handler(u_char *entry, void *arg); -#ifdef MPTABLE_FORCE_HTT -static void mptable_hyperthread_fixup(u_int id_mask); -#endif -static void mptable_parse_apics_and_busses(void); -static void mptable_parse_apics_and_busses_handler(u_char *entry, - void *arg); -static void mptable_parse_ints(void); -static void mptable_parse_ints_handler(u_char *entry, void *arg); -static void mptable_parse_io_int(int_entry_ptr intr); -static void mptable_parse_local_int(int_entry_ptr intr); -static void mptable_pci_probe_table_handler(u_char *entry, void *arg); -static void mptable_pci_route_interrupt_handler(u_char *entry, void *arg); -static void mptable_pci_setup(void); -static int mptable_probe(void); -static int mptable_probe_cpus(void); -static void mptable_probe_cpus_handler(u_char *entry, void *arg __unused); -static void mptable_register(void *dummy); -static int mptable_setup_local(void); -static int mptable_setup_io(void); -static void mptable_walk_table(mptable_entry_handler *handler, void *arg); -static int search_for_sig(u_int32_t target, int count); - -static struct apic_enumerator mptable_enumerator = { - "MPTable", - mptable_probe, - mptable_probe_cpus, - mptable_setup_local, - mptable_setup_io -}; - -/* - * look for the MP spec signature - */ - -static int -search_for_sig(u_int32_t target, int count) -{ - int x; - u_int32_t *addr = (u_int32_t *) (KERNBASE + target); - - for (x = 0; x < count; x += 4) - if (addr[x] == MP_SIG) - /* make array index a byte index */ - return (target + (x * sizeof(u_int32_t))); - return (-1); -} - -static int -lookup_bus_type(char *name) -{ - int x; - - for (x = 0; x < MAX_BUSTYPE; ++x) - if (strncmp(bus_type_table[x].name, name, 6) == 0) - return (bus_type_table[x].type); - - return (UNKNOWN_BUSTYPE); -} - -/* - * Look for an Intel MP spec table (ie, SMP capable hardware). - */ -static int -mptable_probe(void) -{ - int x; - u_long segment; - u_int32_t target; - - /* see if EBDA exists */ - if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) { - /* search first 1K of EBDA */ - target = (u_int32_t) (segment << 4); - if ((x = search_for_sig(target, 1024 / 4)) >= 0) - goto found; - } else { - /* last 1K of base memory, effective 'top of base' passed in */ - target = (u_int32_t) ((basemem * 1024) - 0x400); - if ((x = search_for_sig(target, 1024 / 4)) >= 0) - goto found; - } - - /* search the BIOS */ - target = (u_int32_t) BIOS_BASE; - if ((x = search_for_sig(target, BIOS_COUNT)) >= 0) - goto found; - - /* nothing found */ - return (ENXIO); - -found: - mpfps = (mpfps_t)(KERNBASE + x); - - /* Map in the configuration table if it exists. */ - if (mpfps->config_type != 0) - mpct = NULL; - else { - if ((uintptr_t)mpfps->pap >= 1024 * 1024) { - printf("%s: Unable to map MP Configuration Table\n", - __func__); - return (ENXIO); - } - mpct = (mpcth_t)(KERNBASE + (uintptr_t)mpfps->pap); - if (mpct->base_table_length + (uintptr_t)mpfps->pap >= - 1024 * 1024) { - printf("%s: Unable to map end of MP Config Table\n", - __func__); - return (ENXIO); - } - if (mpct->signature[0] != 'P' || mpct->signature[1] != 'C' || - mpct->signature[2] != 'M' || mpct->signature[3] != 'P') { - printf("%s: MP Config Table has bad signature: %c%c%c%c\n", - __func__, mpct->signature[0], mpct->signature[1], - mpct->signature[2], mpct->signature[3]); - return (ENXIO); - } - if (bootverbose) - printf( - "MP Configuration Table version 1.%d found at %p\n", - mpct->spec_rev, mpct); - } - - return (-100); -} - -/* - * Run through the MP table enumerating CPUs. - */ -static int -mptable_probe_cpus(void) -{ - u_int cpu_mask; - - /* Is this a pre-defined config? */ - if (mpfps->config_type != 0) { - lapic_create(0, 1); - lapic_create(1, 0); - } else { - cpu_mask = 0; - mptable_walk_table(mptable_probe_cpus_handler, &cpu_mask); -#ifdef MPTABLE_FORCE_HTT - mptable_hyperthread_fixup(cpu_mask); -#endif - } - return (0); -} - -/* - * Initialize the local APIC on the BSP. - */ -static int -mptable_setup_local(void) -{ - - /* Is this a pre-defined config? */ - printf("MPTable: <"); - if (mpfps->config_type != 0) { - lapic_init(DEFAULT_APIC_BASE); - printf("Preset Config %d", mpfps->config_type); - } else { - lapic_init((uintptr_t)mpct->apic_address); - printf("%.*s %.*s", (int)sizeof(mpct->oem_id), mpct->oem_id, - (int)sizeof(mpct->product_id), mpct->product_id); - } - printf(">\n"); - return (0); -} - -/* - * Run through the MP table enumerating I/O APICs. - */ -static int -mptable_setup_io(void) -{ - int i; - u_char byte; - - /* First, we count individual items and allocate arrays. */ - mptable_count_items(); - busses = malloc((mptable_maxbusid + 1) * sizeof(bus_datum), M_MPTABLE, - M_WAITOK); - for (i = 0; i <= mptable_maxbusid; i++) - busses[i].bus_type = NOBUS; - - /* Second, we run through adding I/O APIC's and busses. */ - ioapic_enable_mixed_mode(); - mptable_parse_apics_and_busses(); - - /* Third, we run through the table tweaking interrupt sources. */ - mptable_parse_ints(); - - /* Fourth, we register all the I/O APIC's. */ - for (i = 0; i < NAPICID; i++) - if (ioapics[i] != NULL) - ioapic_register(ioapics[i]); - - /* Fifth, we setup data structures to handle PCI interrupt routing. */ - mptable_pci_setup(); - - /* Finally, we throw the switch to enable the I/O APIC's. */ - if (mpfps->mpfb2 & MPFB2_IMCR_PRESENT) { - outb(0x22, 0x70); /* select IMCR */ - byte = inb(0x23); /* current contents */ - byte |= 0x01; /* mask external INTR */ - outb(0x23, byte); /* disconnect 8259s/NMI */ - } - - return (0); -} - -static void -mptable_register(void *dummy __unused) -{ - - apic_register_enumerator(&mptable_enumerator); -} -SYSINIT(mptable_register, SI_SUB_CPU - 1, SI_ORDER_FIRST, mptable_register, - NULL) - -/* - * Call the handler routine for each entry in the MP config table. - */ -static void -mptable_walk_table(mptable_entry_handler *handler, void *arg) -{ - u_int i; - u_char *entry; - - entry = (u_char *)(mpct + 1); - for (i = 0; i < mpct->entry_count; i++) { - switch (*entry) { - case MPCT_ENTRY_PROCESSOR: - case MPCT_ENTRY_IOAPIC: - case MPCT_ENTRY_BUS: - case MPCT_ENTRY_INT: - case MPCT_ENTRY_LOCAL_INT: - break; - default: - panic("%s: Unknown MP Config Entry %d\n", __func__, - (int)*entry); - } - handler(entry, arg); - entry += basetable_entry_types[*entry].length; - } -} - -static void -mptable_probe_cpus_handler(u_char *entry, void *arg) -{ - proc_entry_ptr proc; - u_int *cpu_mask; - - switch (*entry) { - case MPCT_ENTRY_PROCESSOR: - proc = (proc_entry_ptr)entry; - if (proc->cpu_flags & PROCENTRY_FLAG_EN) { - lapic_create(proc->apic_id, proc->cpu_flags & - PROCENTRY_FLAG_BP); - cpu_mask = (u_int *)arg; - *cpu_mask |= (1 << proc->apic_id); - } - break; - } -} - -static void -mptable_count_items_handler(u_char *entry, void *arg __unused) -{ - io_apic_entry_ptr apic; - bus_entry_ptr bus; - - switch (*entry) { - case MPCT_ENTRY_BUS: - bus = (bus_entry_ptr)entry; - mptable_nbusses++; - if (bus->bus_id > mptable_maxbusid) - mptable_maxbusid = bus->bus_id; - break; - case MPCT_ENTRY_IOAPIC: - apic = (io_apic_entry_ptr)entry; - if (apic->apic_flags & IOAPICENTRY_FLAG_EN) - mptable_nioapics++; - break; - } -} - -/* - * Count items in the table. - */ -static void -mptable_count_items(void) -{ - - /* Is this a pre-defined config? */ - if (mpfps->config_type != 0) { - mptable_nioapics = 1; - switch (mpfps->config_type) { - case 1: - case 2: - case 3: - case 4: - mptable_nbusses = 1; - break; - case 5: - case 6: - case 7: - mptable_nbusses = 2; - break; - default: - panic("Unknown pre-defined MP Table config type %d", - mpfps->config_type); - } - mptable_maxbusid = mptable_nbusses - 1; - } else - mptable_walk_table(mptable_count_items_handler, NULL); -} - -/* - * Add a bus or I/O APIC from an entry in the table. - */ -static void -mptable_parse_apics_and_busses_handler(u_char *entry, void *arg __unused) -{ - io_apic_entry_ptr apic; - bus_entry_ptr bus; - enum busTypes bus_type; - int i; - - - switch (*entry) { - case MPCT_ENTRY_BUS: - bus = (bus_entry_ptr)entry; - bus_type = lookup_bus_type(bus->bus_type); - if (bus_type == UNKNOWN_BUSTYPE) { - printf("MPTable: Unknown bus %d type \"", bus->bus_id); - for (i = 0; i < 6; i++) - printf("%c", bus->bus_type[i]); - printf("\"\n"); - } - busses[bus->bus_id].bus_id = bus->bus_id; - busses[bus->bus_id].bus_type = bus_type; - break; - case MPCT_ENTRY_IOAPIC: - apic = (io_apic_entry_ptr)entry; - if (!(apic->apic_flags & IOAPICENTRY_FLAG_EN)) - break; - if (apic->apic_id >= NAPICID) - panic("%s: I/O APIC ID %d too high", __func__, - apic->apic_id); - if (ioapics[apic->apic_id] != NULL) - panic("%s: Double APIC ID %d", __func__, - apic->apic_id); - ioapics[apic->apic_id] = ioapic_create( - (uintptr_t)apic->apic_address, apic->apic_id, -1); - break; - default: - break; - } -} - -/* - * Enumerate I/O APIC's and busses. - */ -static void -mptable_parse_apics_and_busses(void) -{ - - /* Is this a pre-defined config? */ - if (mpfps->config_type != 0) { - ioapics[0] = ioapic_create(DEFAULT_IO_APIC_BASE, 2, 0); - busses[0].bus_id = 0; - busses[0].bus_type = default_data[mpfps->config_type][2]; - if (mptable_nbusses > 1) { - busses[1].bus_id = 1; - busses[1].bus_type = - default_data[mpfps->config_type][4]; - } - } else - mptable_walk_table(mptable_parse_apics_and_busses_handler, - NULL); -} - -/* - * Determine conforming polarity for a given bus type. - */ -static enum intr_polarity -conforming_polarity(u_char src_bus, u_char src_bus_irq) -{ - - KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus)); - switch (busses[src_bus].bus_type) { - case ISA: - case EISA: - return (INTR_POLARITY_HIGH); - case PCI: - return (INTR_POLARITY_LOW); - default: - panic("%s: unknown bus type %d", __func__, - busses[src_bus].bus_type); - } -} - -/* - * Determine conforming trigger for a given bus type. - */ -static enum intr_trigger -conforming_trigger(u_char src_bus, u_char src_bus_irq) -{ - - KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus)); - switch (busses[src_bus].bus_type) { - case ISA: - return (INTR_TRIGGER_EDGE); - case PCI: - return (INTR_TRIGGER_LEVEL); -#if !defined(PC98) && !defined(XEN) - case EISA: - KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq)); - return (elcr_read_trigger(src_bus_irq)); -#endif - default: - panic("%s: unknown bus type %d", __func__, - busses[src_bus].bus_type); - } -} - -static enum intr_polarity -intentry_polarity(int_entry_ptr intr) -{ - - switch (intr->int_flags & INTENTRY_FLAGS_POLARITY) { - case INTENTRY_FLAGS_POLARITY_CONFORM: - return (conforming_polarity(intr->src_bus_id, - intr->src_bus_irq)); - case INTENTRY_FLAGS_POLARITY_ACTIVEHI: - return (INTR_POLARITY_HIGH); - case INTENTRY_FLAGS_POLARITY_ACTIVELO: - return (INTR_POLARITY_LOW); - default: - panic("Bogus interrupt flags"); - } -} - -static enum intr_trigger -intentry_trigger(int_entry_ptr intr) -{ - - switch (intr->int_flags & INTENTRY_FLAGS_TRIGGER) { - case INTENTRY_FLAGS_TRIGGER_CONFORM: - return (conforming_trigger(intr->src_bus_id, - intr->src_bus_irq)); - case INTENTRY_FLAGS_TRIGGER_EDGE: - return (INTR_TRIGGER_EDGE); - case INTENTRY_FLAGS_TRIGGER_LEVEL: - return (INTR_TRIGGER_LEVEL); - default: - panic("Bogus interrupt flags"); - } -} - -/* - * Parse an interrupt entry for an I/O interrupt routed to a pin on an I/O APIC. - */ -static void -mptable_parse_io_int(int_entry_ptr intr) -{ - void *ioapic; - u_int pin; - - if (intr->dst_apic_id == 0xff) { - printf("MPTable: Ignoring global interrupt entry for pin %d\n", - intr->dst_apic_int); - return; - } - if (intr->dst_apic_id >= NAPICID) { - printf("MPTable: Ignoring interrupt entry for ioapic%d\n", - intr->dst_apic_id); - return; - } - ioapic = ioapics[intr->dst_apic_id]; - if (ioapic == NULL) { - printf( - "MPTable: Ignoring interrupt entry for missing ioapic%d\n", - intr->dst_apic_id); - return; - } - pin = intr->dst_apic_int; - switch (intr->int_type) { - case INTENTRY_TYPE_INT: - switch (busses[intr->src_bus_id].bus_type) { - case NOBUS: - panic("interrupt from missing bus"); - case ISA: - case EISA: - if (busses[intr->src_bus_id].bus_type == ISA) - ioapic_set_bus(ioapic, pin, APIC_BUS_ISA); - else - ioapic_set_bus(ioapic, pin, APIC_BUS_EISA); - if (intr->src_bus_irq == pin) - break; - ioapic_remap_vector(ioapic, pin, intr->src_bus_irq); - if (ioapic_get_vector(ioapic, intr->src_bus_irq) == - intr->src_bus_irq) - ioapic_disable_pin(ioapic, intr->src_bus_irq); - break; - case PCI: - ioapic_set_bus(ioapic, pin, APIC_BUS_PCI); - break; - default: - ioapic_set_bus(ioapic, pin, APIC_BUS_UNKNOWN); - break; - } - break; - case INTENTRY_TYPE_NMI: - ioapic_set_nmi(ioapic, pin); - break; - case INTENTRY_TYPE_SMI: - ioapic_set_smi(ioapic, pin); - break; - case INTENTRY_TYPE_EXTINT: - ioapic_set_extint(ioapic, pin); - break; - default: - panic("%s: invalid interrupt entry type %d\n", __func__, - intr->int_type); - } - if (intr->int_type == INTENTRY_TYPE_INT || - (intr->int_flags & INTENTRY_FLAGS_TRIGGER) != - INTENTRY_FLAGS_TRIGGER_CONFORM) - ioapic_set_triggermode(ioapic, pin, intentry_trigger(intr)); - if (intr->int_type == INTENTRY_TYPE_INT || - (intr->int_flags & INTENTRY_FLAGS_POLARITY) != - INTENTRY_FLAGS_POLARITY_CONFORM) - ioapic_set_polarity(ioapic, pin, intentry_polarity(intr)); -} - -/* - * Parse an interrupt entry for a local APIC LVT pin. - */ -static void -mptable_parse_local_int(int_entry_ptr intr) -{ - u_int apic_id, pin; - - if (intr->dst_apic_id == 0xff) - apic_id = APIC_ID_ALL; - else - apic_id = intr->dst_apic_id; - if (intr->dst_apic_int == 0) - pin = LVT_LINT0; - else - pin = LVT_LINT1; - switch (intr->int_type) { - case INTENTRY_TYPE_INT: -#if 1 - printf( - "MPTable: Ignoring vectored local interrupt for LINTIN%d vector %d\n", - intr->dst_apic_int, intr->src_bus_irq); - return; -#else - lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_FIXED); - break; -#endif - case INTENTRY_TYPE_NMI: - lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI); - break; - case INTENTRY_TYPE_SMI: - lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_SMI); - break; - case INTENTRY_TYPE_EXTINT: - lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_EXTINT); - break; - default: - panic("%s: invalid interrupt entry type %d\n", __func__, - intr->int_type); - } - if ((intr->int_flags & INTENTRY_FLAGS_TRIGGER) != - INTENTRY_FLAGS_TRIGGER_CONFORM) - lapic_set_lvt_triggermode(apic_id, pin, - intentry_trigger(intr)); - if ((intr->int_flags & INTENTRY_FLAGS_POLARITY) != - INTENTRY_FLAGS_POLARITY_CONFORM) - lapic_set_lvt_polarity(apic_id, pin, intentry_polarity(intr)); -} - -/* - * Parse interrupt entries. - */ -static void -mptable_parse_ints_handler(u_char *entry, void *arg __unused) -{ - int_entry_ptr intr; - - intr = (int_entry_ptr)entry; - switch (*entry) { - case MPCT_ENTRY_INT: - mptable_parse_io_int(intr); - break; - case MPCT_ENTRY_LOCAL_INT: - mptable_parse_local_int(intr); - break; - } -} - -/* - * Configure the interrupt pins - */ -static void -mptable_parse_ints(void) -{ - - /* Is this a pre-defined config? */ - if (mpfps->config_type != 0) { - /* Configure LINT pins. */ - lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT0, APIC_LVT_DM_EXTINT); - lapic_set_lvt_mode(APIC_ID_ALL, LVT_LINT1, APIC_LVT_DM_NMI); - - /* Configure I/O APIC pins. */ - if (mpfps->config_type != 7) - ioapic_set_extint(ioapics[0], 0); - else - ioapic_disable_pin(ioapics[0], 0); - if (mpfps->config_type != 2) - ioapic_remap_vector(ioapics[0], 2, 0); - else - ioapic_disable_pin(ioapics[0], 2); - if (mpfps->config_type == 2) - ioapic_disable_pin(ioapics[0], 13); - } else - mptable_walk_table(mptable_parse_ints_handler, NULL); -} - -#ifdef MPTABLE_FORCE_HTT -/* - * Perform a hyperthreading "fix-up" to enumerate any logical CPU's - * that aren't already listed in the table. - * - * XXX: We assume that all of the physical CPUs in the - * system have the same number of logical CPUs. - * - * XXX: We assume that APIC ID's are allocated such that - * the APIC ID's for a physical processor are aligned - * with the number of logical CPU's in the processor. - */ -static void -mptable_hyperthread_fixup(u_int id_mask) -{ - u_int i, id, logical_cpus; - - /* Nothing to do if there is no HTT support. */ - if ((cpu_feature & CPUID_HTT) == 0) - return; - logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; - if (logical_cpus <= 1) - return; - - /* - * For each APIC ID of a CPU that is set in the mask, - * scan the other candidate APIC ID's for this - * physical processor. If any of those ID's are - * already in the table, then kill the fixup. - */ - for (id = 0; id < NAPICID; id++) { - if ((id_mask & 1 << id) == 0) - continue; - /* First, make sure we are on a logical_cpus boundary. */ - if (id % logical_cpus != 0) - return; - for (i = id + 1; i < id + logical_cpus; i++) - if ((id_mask & 1 << i) != 0) - return; - } - - /* - * Ok, the ID's checked out, so perform the fixup by - * adding the logical CPUs. - */ - while ((id = ffs(id_mask)) != 0) { - id--; - for (i = id + 1; i < id + logical_cpus; i++) { - if (bootverbose) - printf( - "MPTable: Adding logical CPU %d from main CPU %d\n", - i, id); - lapic_create(i, 0); - } - id_mask &= ~(1 << id); - } -} -#endif /* MPTABLE_FORCE_HTT */ - -/* - * Support code for routing PCI interrupts using the MP Table. - */ -static void -mptable_pci_setup(void) -{ - int i; - - /* - * Find the first pci bus and call it 0. Panic if pci0 is not - * bus zero and there are multiple PCI busses. - */ - for (i = 0; i <= mptable_maxbusid; i++) - if (busses[i].bus_type == PCI) { - if (pci0 == -1) - pci0 = i; - else if (pci0 != 0) - panic( - "MPTable contains multiple PCI busses but no PCI bus 0"); - } -} - -static void -mptable_pci_probe_table_handler(u_char *entry, void *arg) -{ - struct pci_probe_table_args *args; - int_entry_ptr intr; - - if (*entry != MPCT_ENTRY_INT) - return; - intr = (int_entry_ptr)entry; - args = (struct pci_probe_table_args *)arg; - KASSERT(args->bus <= mptable_maxbusid, - ("bus %d is too big", args->bus)); - KASSERT(busses[args->bus].bus_type == PCI, ("probing for non-PCI bus")); - if (intr->src_bus_id == args->bus) - args->found = 1; -} - -int -mptable_pci_probe_table(int bus) -{ - struct pci_probe_table_args args; - - if (bus < 0) - return (EINVAL); - if (pci0 == -1 || pci0 + bus > mptable_maxbusid) - return (ENXIO); - if (busses[pci0 + bus].bus_type != PCI) - return (ENXIO); - args.bus = pci0 + bus; - args.found = 0; - mptable_walk_table(mptable_pci_probe_table_handler, &args); - if (args.found == 0) - return (ENXIO); - return (0); -} - -static void -mptable_pci_route_interrupt_handler(u_char *entry, void *arg) -{ - struct pci_route_interrupt_args *args; - int_entry_ptr intr; - int vector; - - if (*entry != MPCT_ENTRY_INT) - return; - intr = (int_entry_ptr)entry; - args = (struct pci_route_interrupt_args *)arg; - if (intr->src_bus_id != args->bus || intr->src_bus_irq != args->irq) - return; - - /* Make sure the APIC maps to a known APIC. */ - KASSERT(ioapics[intr->dst_apic_id] != NULL, - ("No I/O APIC %d to route interrupt to", intr->dst_apic_id)); - - /* - * Look up the vector for this APIC / pin combination. If we - * have previously matched an entry for this PCI IRQ but it - * has the same vector as this entry, just return. Otherwise, - * we use the vector for this APIC / pin combination. - */ - vector = ioapic_get_vector(ioapics[intr->dst_apic_id], - intr->dst_apic_int); - if (args->vector == vector) - return; - KASSERT(args->vector == -1, - ("Multiple IRQs for PCI interrupt %d.%d.INT%c: %d and %d\n", - args->bus, args->irq >> 2, 'A' + (args->irq & 0x3), args->vector, - vector)); - args->vector = vector; -} - -int -mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin) -{ - struct pci_route_interrupt_args args; - int slot; - - /* Like ACPI, pin numbers are 0-3, not 1-4. */ - pin--; - KASSERT(pci0 != -1, ("do not know how to route PCI interrupts")); - args.bus = pci_get_bus(dev) + pci0; - slot = pci_get_slot(dev); - - /* - * PCI interrupt entries in the MP Table encode both the slot and - * pin into the IRQ with the pin being the two least significant - * bits, the slot being the next five bits, and the most significant - * bit being reserved. - */ - args.irq = slot << 2 | pin; - args.vector = -1; - mptable_walk_table(mptable_pci_route_interrupt_handler, &args); - if (args.vector < 0) { - device_printf(pcib, "unable to route slot %d INT%c\n", slot, - 'A' + pin); - return (PCI_INVALID_IRQ); - } - if (bootverbose) - device_printf(pcib, "slot %d INT%c routed to irq %d\n", slot, - 'A' + pin, args.vector); - return (args.vector); -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,3474 +0,0 @@ -/*- - * Copyright (c) 1991 Regents of the University of California. - * All rights reserved. - * Copyright (c) 1994 John S. Dyson - * All rights reserved. - * Copyright (c) 1994 David Greenman - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department and William Jolitz of UUNET Technologies Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - */ -/*- - * Copyright (c) 2003 Networks Associates Technology, Inc. - * All rights reserved. - * - * This software was developed for the FreeBSD Project by Jake Burkholder, - * Safeport Network Services, and Network Associates Laboratories, the - * Security Research Division of Network Associates, Inc. under - * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA - * CHATS research program. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/pmap.c,v 1.494.2.6 2004/10/10 19:08:00 alc Exp $"); - -/* - * Manages physical address maps. - * XEN NOTES: page table entries (pt_entry_t) and - * page directory entries (pd_entry_t) contain machine - * addresses and not physical addresses. Use PT_GET() before - * dereferencing these structures to convert them into a - * physical address. Use the PT_SET_VA operations to commit - * page changes back to XEN. PT_SET_VA_MA should be used with - * great care! - * - * - * In addition to hardware address maps, this - * module is called upon to provide software-use-only - * maps which may or may not be stored in the same - * form as hardware maps. These pseudo-maps are - * used to store intermediate results from copy - * operations to and from address spaces. - * - * Since the information managed by this module is - * also stored by the logical address mapping module, - * this module may throw away valid virtual-to-physical - * mappings at almost any time. However, invalidations - * of virtual-to-physical mappings must be done as - * requested. - * - * In order to cope with hardware architectures which - * make virtual-to-physical map invalidates expensive, - * this module may delay invalidate or reduced protection - * operations until such time as they are actually - * necessary. This module is given full information as - * to which processors are currently using which maps, - * and to when physical maps must be made correct. - */ - -#include "opt_cpu.h" -#include "opt_pmap.h" -#include "opt_msgbuf.h" -#include "opt_kstack_pages.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/mman.h> -#include <sys/msgbuf.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/sx.h> -#include <sys/user.h> -#include <sys/vmmeter.h> -#include <sys/sched.h> -#include <sys/sysctl.h> -#ifdef SMP -#include <sys/smp.h> -#endif - -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/vm_kern.h> -#include <vm/vm_page.h> -#include <vm/vm_map.h> -#include <vm/vm_object.h> -#include <vm/vm_extern.h> -#include <vm/vm_pageout.h> -#include <vm/vm_pager.h> -#include <vm/uma.h> - -#include <machine/cpu.h> -#include <machine/cputypes.h> -#include <machine/md_var.h> -#include <machine/specialreg.h> -#ifdef SMP -#include <machine/smp.h> -#endif - -#include <machine/xenfunc.h> - -#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif -#if defined(CPU_DISABLE_SSE) -#undef CPU_ENABLE_SSE -#endif - -#ifndef PMAP_SHPGPERPROC -#define PMAP_SHPGPERPROC 200 -#endif - -#if defined(DIAGNOSTIC) -#define PMAP_DIAGNOSTIC -#endif - -#define MINPV 2048 - -#if !defined(PMAP_DIAGNOSTIC) -#define PMAP_INLINE __inline -#else -#define PMAP_INLINE -#endif - -/* - * Get PDEs and PTEs for user/kernel address space - */ -#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) -#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) - -#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) -#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) -#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) -#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) -#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) - -#if 0 -#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ - atomic_clear_int((u_int *)(pte), PG_W)) -#else -#define pmap_pte_set_w(pte, v) { \ - if (v) \ - PT_SET_VA_MA(pte, *pte | PG_W, TRUE); \ - else \ - PT_SET_VA_MA(pte, *pte & ~PG_W, TRUE); \ -} -#endif - -struct pmap kernel_pmap_store; -LIST_HEAD(pmaplist, pmap); -static struct pmaplist allpmaps; -static struct mtx allpmaps_lock; - -vm_paddr_t avail_end; /* PA of last available physical page */ -vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ -vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ -static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ -int pgeflag = 0; /* PG_G or-in */ -int pseflag = 0; /* PG_PS or-in */ - -static int nkpt; -vm_offset_t kernel_vm_end; -extern u_int32_t KERNend; - -#ifdef PAE -static uma_zone_t pdptzone; -#endif - -/* - * Data for the pv entry allocation mechanism - */ -static uma_zone_t pvzone; -static struct vm_object pvzone_obj; -static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; -int pmap_pagedaemon_waken; - -/* - * All those kernel PT submaps that BSD is so fond of - */ -pt_entry_t *CMAP1 = 0; -static pt_entry_t *CMAP2, *CMAP3; -caddr_t CADDR1 = 0, ptvmmap = 0; -static caddr_t CADDR2, CADDR3; -static struct mtx CMAPCADDR12_lock; -struct msgbuf *msgbufp = 0; - -/* - * Crashdump maps. - */ -static caddr_t crashdumpmap; - -#ifdef SMP -extern pt_entry_t *SMPpt; -#endif -static pt_entry_t *PMAP1 = 0, *PMAP2; -static pt_entry_t *PADDR1 = 0, *PADDR2; -#ifdef SMP -static int PMAP1cpu; -static int PMAP1changedcpu; -SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, - &PMAP1changedcpu, 0, - "Number of times pmap_pte_quick changed CPU with same PMAP1"); -#endif -static int PMAP1changed; -SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, - &PMAP1changed, 0, - "Number of times pmap_pte_quick changed PMAP1"); -static int PMAP1unchanged; -SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, - &PMAP1unchanged, 0, - "Number of times pmap_pte_quick didn't change PMAP1"); -static struct mtx PMAP2mutex; - -static PMAP_INLINE void free_pv_entry(pv_entry_t pv); -static pv_entry_t get_pv_entry(void); -static void pmap_clear_ptes(vm_page_t m, int bit); - -static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); -static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); -static int pmap_remove_entry(struct pmap *pmap, vm_page_t m, - vm_offset_t va); -static void pmap_copy_ma(vm_paddr_t src, vm_paddr_t dst); -static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); - -static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); - -static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); -static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m); -static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); -static void pmap_pte_release(pt_entry_t *pte); -static int pmap_unuse_pt(pmap_t, vm_offset_t); -static vm_offset_t pmap_kmem_choose(vm_offset_t addr); -#ifdef PAE -static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); -#endif - -CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); -CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); - -#ifndef DEBUG -#define DEBUG -#endif -#ifdef PMAP_DEBUG -static void pmap_dec_ref(unsigned long ma); -static void pmap_mark_privileged(unsigned long pa); -static void pmap_mark_unprivileged(unsigned long pa); -static void pmap_dec_ref_page(vm_page_t m); -int pmap_pid_dump(int pid); -#endif - -void -pd_set(struct pmap *pmap, vm_paddr_t *ptr, vm_paddr_t val, int type) -{ - vm_paddr_t shadow_pdir_ma = pmap->pm_pdir[PTDPTDI] & ~0xFFF; - vm_paddr_t shadow_offset = (vm_paddr_t)(ptr - pmap->pm_pdir)*sizeof(vm_paddr_t); - - switch (type) { - case SH_PD_SET_VA: - xen_queue_pt_update(shadow_pdir_ma + shadow_offset, - xpmap_ptom(val & ~(PG_RW|PG_M))); - xen_queue_pt_update(vtomach(ptr), - xpmap_ptom(val)); - break; - case SH_PD_SET_VA_MA: - xen_queue_pt_update(shadow_pdir_ma + shadow_offset, - val & ~(PG_RW|PG_M)); - xen_queue_pt_update(vtomach(ptr), val); - break; - case SH_PD_SET_VA_CLEAR: - xen_queue_pt_update(shadow_pdir_ma + shadow_offset, 0); - xen_queue_pt_update(vtomach(ptr), 0); - break; - } -} - -/* - * Move the kernel virtual free pointer to the next - * 4MB. This is used to help improve performance - * by using a large (4MB) page for much of the kernel - * (.text, .data, .bss) - */ -static vm_offset_t -pmap_kmem_choose(vm_offset_t addr) -{ - vm_offset_t newaddr = addr; - -#ifndef DISABLE_PSE - if (cpu_feature & CPUID_PSE) - newaddr = (addr + PDRMASK) & ~PDRMASK; -#endif - return newaddr; -} - -/* - * Bootstrap the system enough to run with virtual memory. - * - * On the i386 this is called after mapping has already been enabled - * and just syncs the pmap module with what has already been done. - * [We can't call it easily with mapping off since the kernel is not - * mapped with PA == VA, hence we would have to relocate every address - * from the linked base (virtual) address "KERNBASE" to the actual - * (physical) address starting relative to 0] - */ -void -pmap_bootstrap(firstaddr, loadaddr) - vm_paddr_t firstaddr; - vm_paddr_t loadaddr; -{ - vm_offset_t va; - pt_entry_t *pte, *unused; - - /* - * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too - * large. It should instead be correctly calculated in locore.s and - * not based on 'first' (which is a physical address, not a virtual - * address, for the start of unused physical memory). The kernel - * page tables are NOT double mapped and thus should not be included - * in this calculation. - */ - virtual_avail = (vm_offset_t) KERNBASE + firstaddr; - virtual_avail = pmap_kmem_choose(virtual_avail); - - virtual_end = VM_MAX_KERNEL_ADDRESS; - - /* - * Initialize the kernel pmap (which is statically allocated). - */ - PMAP_LOCK_INIT(kernel_pmap); - kernel_pmap->pm_pdir = (pd_entry_t *) xen_start_info->pt_base; -#ifdef PAE - kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); -#endif - kernel_pmap->pm_active = -1; /* don't allow deactivation */ - TAILQ_INIT(&kernel_pmap->pm_pvlist); - LIST_INIT(&allpmaps); - mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); - mtx_lock_spin(&allpmaps_lock); - LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); - mtx_unlock_spin(&allpmaps_lock); - nkpt = NKPT; - - /* - * Reserve some special page table entries/VA space for temporary - * mapping of pages. - */ -#define SYSMAP(c, p, v, n) \ - v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); - - va = virtual_avail; - pte = vtopte(va); - - /* - * CMAP1/CMAP2 are used for zeroing and copying pages. - * CMAP3 is used for the idle process page zeroing. - */ - SYSMAP(caddr_t, CMAP1, CADDR1, 1); - SYSMAP(caddr_t, CMAP2, CADDR2, 1); - SYSMAP(caddr_t, CMAP3, CADDR3, 1); - - PT_CLEAR_VA(CMAP3, TRUE); - - mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF); - - /* - * Crashdump maps. - */ - SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) - - /* - * ptvmmap is used for reading arbitrary physical pages via /dev/mem. - */ - SYSMAP(caddr_t, unused, ptvmmap, 1) - - /* - * msgbufp is used to map the system message buffer. - */ - SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) - - /* - * ptemap is used for pmap_pte_quick - */ - SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); - SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1); - - mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); - - virtual_avail = va; - PT_CLEAR_VA(CMAP1, FALSE); - PT_CLEAR_VA(CMAP2, FALSE); - - PT_UPDATES_FLUSH(); -#ifdef XEN_UNNEEDED - /* Turn on PG_G on kernel page(s) */ - pmap_set_pg(); -#endif -} - -/* - * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. - */ -void -pmap_set_pg(void) -{ - pd_entry_t pdir; - pt_entry_t *pte; - vm_offset_t va, endva; - int i; - - if (pgeflag == 0) - return; - panic("this won't work"); - i = KERNLOAD/NBPDR; - endva = KERNBASE + KERNend; - - if (pseflag) { - va = KERNBASE + KERNLOAD; - while (va < endva) { - pdir = kernel_pmap->pm_pdir[KPTDI+i]; - pdir |= pgeflag; - kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir; - invltlb(); /* Play it safe, invltlb() every time */ - i++; - va += NBPDR; - } - } else { - va = (vm_offset_t)btext; - while (va < endva) { - pte = vtopte(va); - if (*pte) - *pte |= pgeflag; - invltlb(); /* Play it safe, invltlb() every time */ - va += PAGE_SIZE; - } - } -} - -#ifdef PAE - -static MALLOC_DEFINE(M_PMAPPDPT, "pmap", "pmap pdpt"); - -static void * -pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) -{ - *flags = UMA_SLAB_PRIV; - return (contigmalloc(PAGE_SIZE, M_PMAPPDPT, 0, 0x0ULL, 0xffffffffULL, - 1, 0)); -} -#endif - -/* - * Initialize the pmap module. - * Called by vm_init, to initialize any structures that the pmap - * system needs to map virtual memory. - * pmap_init has been enhanced to support in a fairly consistant - * way, discontiguous physical memory. - */ -void -pmap_init(void) -{ - int i; - - /* - * Allocate memory for random pmap data structures. Includes the - * pv_head_table. - */ - - for(i = 0; i < vm_page_array_size; i++) { - vm_page_t m; - - m = &vm_page_array[i]; - TAILQ_INIT(&m->md.pv_list); - m->md.pv_list_count = 0; - } - - /* - * init the pv free list - */ - pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, - NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); - uma_prealloc(pvzone, MINPV); - -#ifdef PAE - pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, - NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, - UMA_ZONE_VM | UMA_ZONE_NOFREE); - uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); -#endif - - /* - * Now it is safe to enable pv_table recording. - */ - pmap_initialized = TRUE; -} - -/* - * Initialize the address space (zone) for the pv_entries. Set a - * high water mark so that the system can recover from excessive - * numbers of pv entries. - */ -void -pmap_init2() -{ - int shpgperproc = PMAP_SHPGPERPROC; - - TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); - pv_entry_max = shpgperproc * maxproc + vm_page_array_size; - TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); - pv_entry_high_water = 9 * (pv_entry_max / 10); - uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); -} - - -/*************************************************** - * Low level helper routines..... - ***************************************************/ - -#if defined(PMAP_DIAGNOSTIC) - -/* - * This code checks for non-writeable/modified pages. - * This should be an invalid condition. - */ -static int -pmap_nw_modified(pt_entry_t ptea) -{ - int pte; - - pte = (int) ptea; - - if ((pte & (PG_M|PG_RW)) == PG_M) - return 1; - else - return 0; -} -#endif - - -/* - * this routine defines the region(s) of memory that should - * not be tested for the modified bit. - */ -static PMAP_INLINE int -pmap_track_modified(vm_offset_t va) -{ - if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) - return 1; - else - return 0; -} - -#ifdef I386_CPU -/* - * i386 only has "invalidate everything" and no SMP to worry about. - */ -PMAP_INLINE void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) -{ - - if (pmap == kernel_pmap || pmap->pm_active) - invltlb(); -} - -PMAP_INLINE void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) -{ - - if (pmap == kernel_pmap || pmap->pm_active) - invltlb(); -} - -PMAP_INLINE void -pmap_invalidate_all(pmap_t pmap) -{ - - if (pmap == kernel_pmap || pmap->pm_active) - invltlb(); -} -#else /* !I386_CPU */ -#ifdef SMP -/* - * For SMP, these functions have to use the IPI mechanism for coherence. - */ -void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) -{ - u_int cpumask; - u_int other_cpus; - - if (smp_started) { - if (!(read_eflags() & PSL_I)) - panic("%s: interrupts disabled", __func__); - mtx_lock_spin(&smp_rv_mtx); - } else - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - * XXX critical sections disable interrupts again - */ - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { - invlpg(va); - smp_invlpg(va); - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); - } - if (smp_started) - mtx_unlock_spin(&smp_rv_mtx); - else - critical_exit(); - PT_UPDATES_FLUSH(); -} - -void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) -{ - u_int cpumask; - u_int other_cpus; - vm_offset_t addr; - - if (smp_started) { - if (!(read_eflags() & PSL_I)) - panic("%s: interrupts disabled", __func__); - mtx_lock_spin(&smp_rv_mtx); - } else - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - * XXX critical sections disable interrupts again - */ - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - smp_invlpg_range(sva, eva); - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); - } - if (smp_started) - mtx_unlock_spin(&smp_rv_mtx); - else - critical_exit(); - PT_UPDATES_FLUSH(); -} - -void -pmap_invalidate_all(pmap_t pmap) -{ - u_int cpumask; - u_int other_cpus; - - if (smp_started) { - if (!(read_eflags() & PSL_I)) - panic("%s: interrupts disabled", __func__); - mtx_lock_spin(&smp_rv_mtx); - } else - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - * XXX critical sections disable interrupts again - */ - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { - invltlb(); - smp_invltlb(); - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); - } - if (smp_started) - mtx_unlock_spin(&smp_rv_mtx); - else - critical_exit(); - PT_UPDATES_FLUSH(); -} -#else /* !SMP */ -/* - * Normal, non-SMP, 486+ invalidation functions. - * We inline these within pmap.c for speed. - */ -PMAP_INLINE void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) -{ - - if (pmap == kernel_pmap || pmap->pm_active) - invlpg(va); - PT_UPDATES_FLUSH(); - -} - -PMAP_INLINE void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) -{ - vm_offset_t addr; - - if (pmap == kernel_pmap || pmap->pm_active) - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - PT_UPDATES_FLUSH(); - -} - -PMAP_INLINE void -pmap_invalidate_all(pmap_t pmap) -{ - - if (pmap == kernel_pmap || pmap->pm_active) - invltlb(); -} -#endif /* !SMP */ -#endif /* !I386_CPU */ - -/* - * Are we current address space or kernel? N.B. We return FALSE when - * a pmap's page table is in use because a kernel thread is borrowing - * it. The borrowed page table can change spontaneously, making any - * dependence on its continued use subject to a race condition. - */ -static __inline int -pmap_is_current(pmap_t pmap) -{ - /* XXX validate */ - return (pmap == kernel_pmap || - (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && - (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); -} - -/* - * If the given pmap is not the current or kernel pmap, the returned pte must - * be released by passing it to pmap_pte_release(). - */ -pt_entry_t * -pmap_pte(pmap_t pmap, vm_offset_t va) -{ - pd_entry_t tmppf, newpf; - pd_entry_t *pde; - - pde = pmap_pde(pmap, va); - if (*pde & PG_PS) - return (pde); - if (*pde != 0) { - /* are we current address space or kernel? */ - if (pmap_is_current(pmap)) - return (vtopte(va)); - mtx_lock(&PMAP2mutex); - newpf = PT_GET(pde) & PG_FRAME; - tmppf = PT_GET(PMAP2) & PG_FRAME; - if (tmppf != newpf) { - PT_SET_VA(PMAP2, newpf | PG_V | PG_A, FALSE); - pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); - } - return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); - } - return (0); -} - -/* - * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte - * being NULL. - */ -static __inline void -pmap_pte_release(pt_entry_t *pte) -{ - - if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) - mtx_unlock(&PMAP2mutex); -} - -static __inline void -invlcaddr(void *caddr) -{ -#ifdef I386_CPU - invltlb(); -#else - invlpg((u_int)caddr); -#endif - PT_UPDATES_FLUSH(); -} - -/* - * Super fast pmap_pte routine best used when scanning - * the pv lists. This eliminates many coarse-grained - * invltlb calls. Note that many of the pv list - * scans are across different pmaps. It is very wasteful - * to do an entire invltlb for checking a single mapping. - * - * If the given pmap is not the current pmap, vm_page_queue_mtx - * must be held and curthread pinned to a CPU. - */ -static pt_entry_t * -pmap_pte_quick(pmap_t pmap, vm_offset_t va) -{ - pd_entry_t tmppf, newpf; - pd_entry_t *pde; - - pde = pmap_pde(pmap, va); - if (*pde & PG_PS) - return (pde); - if (*pde != 0) { - /* are we current address space or kernel? */ - if (pmap_is_current(pmap)) - return (vtopte(va)); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); - newpf = PT_GET(pde) & PG_FRAME; - tmppf = PT_GET(PMAP1) & PG_FRAME; - if (tmppf != newpf) { - PT_SET_VA(PMAP1, newpf | PG_V | PG_A, TRUE); -#ifdef SMP - PMAP1cpu = PCPU_GET(cpuid); -#endif - invlcaddr(PADDR1); - PMAP1changed++; - } else -#ifdef SMP - if (PMAP1cpu != PCPU_GET(cpuid)) { - PMAP1cpu = PCPU_GET(cpuid); - invlcaddr(PADDR1); - PMAP1changedcpu++; - } else -#endif - PMAP1unchanged++; - return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); - } - return (0); -} - -/* - * Routine: pmap_extract - * Function: - * Extract the physical page address associated - * with the given map/virtual_address pair. - */ -vm_paddr_t -pmap_extract(pmap_t pmap, vm_offset_t va) -{ - vm_paddr_t rtval; - pt_entry_t *pte; - pd_entry_t pde; - - rtval = 0; - PMAP_LOCK(pmap); - pde = PT_GET(&pmap->pm_pdir[va >> PDRSHIFT]); - if (pde != 0) { - if ((pde & PG_PS) != 0) { - rtval = (pde & ~PDRMASK) | (va & PDRMASK); - PMAP_UNLOCK(pmap); - return rtval; - } - pte = pmap_pte(pmap, va); - rtval = (PT_GET(pte) & PG_FRAME) | (va & PAGE_MASK); - pmap_pte_release(pte); - } - PMAP_UNLOCK(pmap); - return (rtval); -} - -/* - * Routine: pmap_extract_and_hold - * Function: - * Atomically extract and hold the physical page - * with the given pmap and virtual address pair - * if that mapping permits the given protection. - */ -vm_page_t -pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) -{ - pd_entry_t pde; - pt_entry_t pte; - vm_page_t m; - - m = NULL; - vm_page_lock_queues(); - PMAP_LOCK(pmap); - pde = PT_GET(pmap_pde(pmap, va)); - if (pde != 0) { - if (pde & PG_PS) { - panic("4MB pages not currently supported"); - if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) | - (va & PDRMASK)); - vm_page_hold(m); - } - } else { - sched_pin(); - pte = PT_GET(pmap_pte_quick(pmap, va)); - if (pte != 0 && - ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - m = PHYS_TO_VM_PAGE(pte & PG_FRAME); - vm_page_hold(m); - } - sched_unpin(); - } - } - vm_page_unlock_queues(); - PMAP_UNLOCK(pmap); - return (m); -} - -/*************************************************** - * Low level mapping routines..... - ***************************************************/ - -/* - * Add a wired page to the kva. - * Note: not SMP coherent. - */ -PMAP_INLINE void -pmap_kenter(vm_offset_t va, vm_paddr_t pa) -{ - pt_entry_t *pte; - - pte = vtopte(va); - pte_store(pte, pa | PG_RW | PG_V | pgeflag); -} - -/* - * Add a wired page to the kva. - * Note: not SMP coherent. - */ -PMAP_INLINE void -pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma) -{ - pt_entry_t *pte; - - pte = vtopte(va); - PT_SET_VA_MA(pte, ma | PG_RW | PG_V | pgeflag, TRUE); -} - -/* - * Remove a page from the kernel pagetables. - * Note: not SMP coherent. - */ -PMAP_INLINE void -pmap_kremove(vm_offset_t va) -{ - pt_entry_t *pte; - - pte = vtopte(va); - pte_clear(pte); -} - -/* - * Used to map a range of physical addresses into kernel - * virtual address space. - * - * The value passed in '*virt' is a suggested virtual address for - * the mapping. Architectures which can support a direct-mapped - * physical to virtual region can return the appropriate address - * within that region, leaving '*virt' unchanged. Other - * architectures should map the pages starting at '*virt' and - * update '*virt' with the first usable address after the mapped - * region. - */ -vm_offset_t -pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) -{ - vm_offset_t va, sva; - - va = sva = *virt; - while (start < end) { - pmap_kenter(va, start); - va += PAGE_SIZE; - start += PAGE_SIZE; - } - /* invalidate will flush the update queue */ - pmap_invalidate_range(kernel_pmap, sva, va); - *virt = va; - return (sva); -} - - -/* - * Add a list of wired pages to the kva - * this routine is only used for temporary - * kernel mappings that do not need to have - * page modification or references recorded. - * Note that old mappings are simply written - * over. The page *must* be wired. - * Note: SMP coherent. Uses a ranged shootdown IPI. - */ -void -pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) -{ - vm_offset_t va; - - va = sva; - while (count-- > 0) { - pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); - va += PAGE_SIZE; - m++; - } - /* invalidate will flush the update queue */ - pmap_invalidate_range(kernel_pmap, sva, va); -} - -/* - * This routine tears out page mappings from the - * kernel -- it is meant only for temporary mappings. - * Note: SMP coherent. Uses a ranged shootdown IPI. - */ -void -pmap_qremove(vm_offset_t sva, int count) -{ - vm_offset_t va; - - va = sva; - while (count-- > 0) { - pmap_kremove(va); - va += PAGE_SIZE; - } - /* invalidate will flush the update queue */ - pmap_invalidate_range(kernel_pmap, sva, va); -} - -/*************************************************** - * Page table page management routines..... - ***************************************************/ - -/* - * This routine unholds page table pages, and if the hold count - * drops to zero, then it decrements the wire count. - */ -static PMAP_INLINE int -pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) -{ - - --m->wire_count; - if (m->wire_count == 0) - return _pmap_unwire_pte_hold(pmap, m); - else - return 0; -} - -static int -_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) -{ - vm_offset_t pteva; - /* - * unmap the page table page - */ - xen_pt_unpin(pmap->pm_pdir[m->pindex]); - PD_CLEAR_VA(pmap, &pmap->pm_pdir[m->pindex], TRUE); - --pmap->pm_stats.resident_count; - - /* - * Do an invltlb to make the invalidated mapping - * take effect immediately. - */ - pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); - pmap_invalidate_page(pmap, pteva); - - vm_page_free_zero(m); - atomic_subtract_int(&cnt.v_wire_count, 1); - return 1; -} - -/* - * After removing a page table entry, this routine is used to - * conditionally free the page, and manage the hold/wire counts. - */ -static int -pmap_unuse_pt(pmap_t pmap, vm_offset_t va) -{ - pd_entry_t ptepde; - vm_page_t mpte; - - if (va >= VM_MAXUSER_ADDRESS) - return 0; - ptepde = PT_GET(pmap_pde(pmap, va)); - mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); - return pmap_unwire_pte_hold(pmap, mpte); -} - -void -pmap_pinit0(pmap) - struct pmap *pmap; -{ - - PMAP_LOCK_INIT(pmap); - pmap->pm_pdir = (pd_entry_t *)(xen_start_info->pt_base); -#ifdef PAE - pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); -#endif - pmap->pm_active = 0; - PCPU_SET(curpmap, pmap); - TAILQ_INIT(&pmap->pm_pvlist); - bzero(&pmap->pm_stats, sizeof pmap->pm_stats); - mtx_lock_spin(&allpmaps_lock); - LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); - mtx_unlock_spin(&allpmaps_lock); -} - -/* - * Initialize a preallocated and zeroed pmap structure, - * such as one in a vmspace structure. - */ -void -pmap_pinit(struct pmap *pmap) -{ - vm_page_t m, ptdpg[NPGPTD*2]; - vm_paddr_t ma, ma_shadow; - static int color; - int i; - - PMAP_LOCK_INIT(pmap); - - /* - * No need to allocate page table space yet but we do need a valid - * page directory table. - */ - if (pmap->pm_pdir == NULL) { - pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, - NBPTD); -#ifdef PAE - pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); - KASSERT(((vm_offset_t)pmap->pm_pdpt & - ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, - ("pmap_pinit: pdpt misaligned")); - KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), - ("pmap_pinit: pdpt above 4g")); -#endif - } - - /* - * allocate the page directory page(s) - */ - for (i = 0; i < NPGPTD*2;) { - m = vm_page_alloc(NULL, color++, - VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | - VM_ALLOC_ZERO); - if (m == NULL) - VM_WAIT; - else { - pmap_zero_page(m); - ptdpg[i++] = m; - } - } -#ifdef PAE - #error "missing shadow handling for PAE" -#endif - - pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); - - mtx_lock_spin(&allpmaps_lock); - LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); - mtx_unlock_spin(&allpmaps_lock); - /* Wire in kernel global address entries. */ - /* XXX copies current process, does not fill in MPPTDI */ - bcopy(kernel_pmap->pm_pdir + KPTDI, pmap->pm_pdir + KPTDI, - nkpt * sizeof(pd_entry_t)); - /* XXX need to copy global address entries to page directory's L1 shadow */ - ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD])); - /* L1 pin shadow page director{y,ies} */ - for (i = 0; i < NPGPTD; i++) { - ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD + i])); - pmap_copy_ma(kernel_pmap->pm_pdir[PTDPTDI + i] & ~(PG_RW|PG_M), ma); - xen_pt_pin(ma); - } - -#ifdef SMP - pmap->pm_pdir[MPPTDI] = kernel_pmap->pm_pdir[MPPTDI]; -#endif - - /* pin and install L1 shadow */ - for (i = 0; i < NPGPTD; i++) { - ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); - ma_shadow = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD+i])); - /* re-map page directory read-only and pin */ - PT_SET_MA(pmap->pm_pdir + i*PAGE_SIZE, ma | PG_V | PG_A); - xen_pgd_pin(ma); - /* add L1 shadow of L2 */ - xen_queue_pt_update(vtomach(&pmap->pm_pdir[PTDPTDI + i]), - ma_shadow | PG_V | PG_A); - xen_queue_pt_update(ma_shadow + PTDPTDI*sizeof(vm_paddr_t), - vtomach(pmap->pm_pdir) | PG_V | PG_A); - -#ifdef PAE - #error "unsupported currently" - pmap->pm_pdpt[i] = ma | PG_V; -#endif - } - xen_flush_queue(); - - pmap->pm_active = 0; - TAILQ_INIT(&pmap->pm_pvlist); - bzero(&pmap->pm_stats, sizeof pmap->pm_stats); -} - -/* - * this routine is called if the page table page is not - * mapped correctly. - */ -static vm_page_t -_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) -{ - vm_paddr_t ptepa; - vm_page_t m; - - KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || - (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, - ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); - - /* - * Allocate a page table page. - */ - if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | - VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { - if (flags & M_WAITOK) { - PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); - VM_WAIT; - vm_page_lock_queues(); - PMAP_LOCK(pmap); - } - - /* - * Indicate the need to retry. While waiting, the page table - * page may have been allocated. - */ - return (NULL); - } - if ((m->flags & PG_ZERO) == 0) - pmap_zero_page(m); - - /* - * Map the pagetable page into the process address space, if - * it isn't already there. - */ - - pmap->pm_stats.resident_count++; - - ptepa = VM_PAGE_TO_PHYS(m); - xen_pt_pin(xpmap_ptom(ptepa)); - PD_SET_VA(pmap, &pmap->pm_pdir[ptepindex], - (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE); - - return m; -} - -static vm_page_t -pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) -{ - unsigned ptepindex; - pd_entry_t ptepa; - vm_page_t m; - - KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || - (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, - ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); - - /* - * Calculate pagetable page index - */ - ptepindex = va >> PDRSHIFT; -retry: - /* - * Get the page directory entry - */ - ptepa = PT_GET(&pmap->pm_pdir[ptepindex]); - - /* - * This supports switching from a 4MB page to a - * normal 4K page. - */ - if (ptepa & PG_PS) { - pmap->pm_pdir[ptepindex] = 0; - ptepa = 0; - pmap_invalidate_all(kernel_pmap); - } - - /* - * If the page table page is mapped, we just increment the - * hold count, and activate it. - */ - if (ptepa) { - m = PHYS_TO_VM_PAGE(ptepa); - m->wire_count++; - } else { - /* - * Here if the pte page isn't mapped, or if it has - * been deallocated. - */ - m = _pmap_allocpte(pmap, ptepindex, flags); - if (m == NULL && (flags & M_WAITOK)) - goto retry; - } - return (m); -} - - -/*************************************************** -* Pmap allocation/deallocation routines. - ***************************************************/ - -#ifdef SMP -/* - * Deal with a SMP shootdown of other users of the pmap that we are - * trying to dispose of. This can be a bit hairy. - */ -static u_int *lazymask; -static u_int lazyptd; -static volatile u_int lazywait; - - -void -pmap_lazyfix_action(void) -{ - u_int mymask = PCPU_GET(cpumask); - - if (PCPU_GET(curpcb)->pcb_cr3 == lazyptd) - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); - atomic_store_rel_int(&lazywait, 1); -} - -static void -pmap_lazyfix_self(u_int mymask) -{ - - if (PCPU_GET(curpcb)->pcb_cr3 == lazyptd) - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); -} - - -static void -pmap_lazyfix(pmap_t pmap) -{ - u_int mymask = PCPU_GET(cpumask); - u_int mask; - register u_int spins; - - while ((mask = pmap->pm_active) != 0) { - spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ - mtx_lock_spin(&smp_rv_mtx); -#ifdef PAE - lazyptd = vtophys(pmap->pm_pdpt); -#else - lazyptd = vtophys(pmap->pm_pdir); -#endif - if (mask == mymask) { - lazymask = &pmap->pm_active; - pmap_lazyfix_self(mymask); - } else { - atomic_store_rel_int((u_int *)&lazymask, - (u_int)&pmap->pm_active); - atomic_store_rel_int(&lazywait, 0); - ipi_selected(mask, IPI_LAZYPMAP); - while (lazywait == 0) { - ia32_pause(); - if (--spins == 0) - break; - } - } - mtx_unlock_spin(&smp_rv_mtx); - if (spins == 0) - printf("pmap_lazyfix: spun for 50000000\n"); - } -} - -#else /* SMP */ - -/* - * Cleaning up on uniprocessor is easy. For various reasons, we're - * unlikely to have to even execute this code, including the fact - * that the cleanup is deferred until the parent does a wait(2), which - * means that another userland process has run. - */ -static void -pmap_lazyfix(pmap_t pmap) -{ - u_int cr3; - - cr3 = vtophys(pmap->pm_pdir); - if (cr3 == PCPU_GET(curpcb)->pcb_cr3) { - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); - } -} -#endif /* SMP */ - -/* - * Release any resources held by the given physical map. - * Called when a pmap initialized by pmap_pinit is being released. - * Should only be called if the map contains no valid mappings. - */ -void -pmap_release(pmap_t pmap) -{ - vm_page_t m, ptdpg[NPGPTD + 1]; - vm_paddr_t ma; - int i; - - KASSERT(pmap->pm_stats.resident_count == 0, - ("pmap_release: pmap resident count %ld != 0", - pmap->pm_stats.resident_count)); - - pmap_lazyfix(pmap); - mtx_lock_spin(&allpmaps_lock); - LIST_REMOVE(pmap, pm_list); - mtx_unlock_spin(&allpmaps_lock); - - for (i = 0; i < NPGPTD; i++) { - ptdpg[i] = PHYS_TO_VM_PAGE(PT_GET(&pmap->pm_pdir[PTDPTDI + i])); - } - ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir)); - for (i = 0; i < nkpt + NPGPTD; i++) - PD_CLEAR_VA(pmap, &pmap->pm_pdir[PTDPTDI + i], FALSE); - - bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * - sizeof(*pmap->pm_pdir)); -#ifdef SMP - PD_CLEAR_VA(pmap, &pmap->pm_pdir[MPPTDI], FALSE); -#endif - PT_UPDATES_FLUSH(); - pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); - - vm_page_lock_queues(); - for (i = 0; i < NPGPTD + 1; i++) { - m = ptdpg[i]; - - ma = xpmap_ptom(VM_PAGE_TO_PHYS(m)); - /* unpinning L1 and L2 treated the same */ - xen_pgd_unpin(ma); -#ifdef PAE - KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), - ("pmap_release: got wrong ptd page")); -#endif - m->wire_count--; - atomic_subtract_int(&cnt.v_wire_count, 1); - - vm_page_free_zero(m); - } - vm_page_unlock_queues(); - PMAP_LOCK_DESTROY(pmap); -} - -static int -kvm_size(SYSCTL_HANDLER_ARGS) -{ - unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; - - return sysctl_handle_long(oidp, &ksize, 0, req); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_size, "IU", "Size of KVM"); - -static int -kvm_free(SYSCTL_HANDLER_ARGS) -{ - unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; - - return sysctl_handle_long(oidp, &kfree, 0, req); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_free, "IU", "Amount of KVM free"); - -/* - * grow the number of kernel page table entries, if needed - */ -void -pmap_growkernel(vm_offset_t addr) -{ - struct pmap *pmap; - vm_paddr_t ptppaddr; - vm_page_t nkpg; - pd_entry_t newpdir; - pt_entry_t *pde; - - mtx_assert(&kernel_map->system_mtx, MA_OWNED); - if (kernel_vm_end == 0) { - kernel_vm_end = KERNBASE; - nkpt = 0; - while (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - nkpt++; - } - } - addr = roundup2(addr, PAGE_SIZE * NPTEPG); - while (kernel_vm_end < addr) { - if (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - continue; - } - - /* - * This index is bogus, but out of the way - */ - nkpg = vm_page_alloc(NULL, nkpt, - VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); - if (!nkpg) - panic("pmap_growkernel: no memory to grow kernel"); - - nkpt++; - - pmap_zero_page(nkpg); - ptppaddr = VM_PAGE_TO_PHYS(nkpg); - newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); - PD_SET_VA(kernel_pmap, &pdir_pde(kernel_pmap->pm_pdir, kernel_vm_end), newpdir, TRUE); - - mtx_lock_spin(&allpmaps_lock); - LIST_FOREACH(pmap, &allpmaps, pm_list) { - pde = pmap_pde(pmap, kernel_vm_end); - PD_SET_VA(pmap, pde, newpdir, FALSE); - } - PT_UPDATES_FLUSH(); - mtx_unlock_spin(&allpmaps_lock); - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - } -} - - -/*************************************************** - * page management routines. - ***************************************************/ - -/* - * free the pv_entry back to the free list - */ -static PMAP_INLINE void -free_pv_entry(pv_entry_t pv) -{ - pv_entry_count--; - uma_zfree(pvzone, pv); -} - -/* - * get a new pv_entry, allocating a block from the system - * when needed. - * the memory allocation is performed bypassing the malloc code - * because of the possibility of allocations at interrupt time. - */ -static pv_entry_t -get_pv_entry(void) -{ - pv_entry_count++; - if (pv_entry_high_water && - (pv_entry_count > pv_entry_high_water) && - (pmap_pagedaemon_waken == 0)) { - pmap_pagedaemon_waken = 1; - wakeup (&vm_pages_needed); - } - return uma_zalloc(pvzone, M_NOWAIT); -} - - -static int -pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) -{ - pv_entry_t pv; - int rtval; - - PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (m->md.pv_list_count < pmap->pm_stats.resident_count) { - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pmap == pv->pv_pmap && va == pv->pv_va) - break; - } - } else { - TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { - if (va == pv->pv_va) - break; - } - } - - rtval = 0; - if (pv) { - rtval = pmap_unuse_pt(pmap, va); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - m->md.pv_list_count--; - if (TAILQ_FIRST(&m->md.pv_list) == NULL) - vm_page_flag_clear(m, PG_WRITEABLE); - - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); - free_pv_entry(pv); - } - - return rtval; -} - -/* - * Create a pv entry for page at pa for - * (pmap, va). - */ -static void -pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) -{ - pv_entry_t pv; - pv = get_pv_entry(); - pv->pv_va = va; - pv->pv_pmap = pmap; - - PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); - m->md.pv_list_count++; -} - -/* - * pmap_remove_pte: do the things to unmap a page in a process - */ -static int -pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) -{ - pt_entry_t oldpte; - vm_page_t m; - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - PMAP_LOCK_ASSERT(pmap, MA_OWNED); - oldpte = pte_load_clear(ptq); - if (oldpte & PG_W) - pmap->pm_stats.wired_count -= 1; - /* - * Machines that don't support invlpg, also don't support - * PG_G. - */ - if (oldpte & PG_G) - pmap_invalidate_page(kernel_pmap, va); - pmap->pm_stats.resident_count -= 1; - if (oldpte & PG_MANAGED) { - m = PHYS_TO_VM_PAGE(oldpte); - if (oldpte & PG_M) { -#if defined(PMAP_DIAGNOSTIC) - if (pmap_nw_modified((pt_entry_t) oldpte)) { - printf( - "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", - va, oldpte); - } -#endif - if (pmap_track_modified(va)) - vm_page_dirty(m); - } - if (oldpte & PG_A) - vm_page_flag_set(m, PG_REFERENCED); - return pmap_remove_entry(pmap, m, va); - } else { - return pmap_unuse_pt(pmap, va); - } -} - -/* - * Remove a single page from a process address space - */ -static void -pmap_remove_page(pmap_t pmap, vm_offset_t va) -{ - pt_entry_t *pte; - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); - PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) - return; - pmap_remove_pte(pmap, pte, va); - pmap_invalidate_page(pmap, va); -} - -/* - * Remove the given range of addresses from the specified map. - * - * It is assumed that the start and end are properly - * rounded to the page size. - */ -void -pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) -{ - vm_offset_t pdnxt; - pd_entry_t ptpaddr; - pt_entry_t *pte; - int anyvalid; - - /* - * Perform an unsynchronized read. This is, however, safe. - */ - if (pmap->pm_stats.resident_count == 0) - return; - - anyvalid = 0; - - vm_page_lock_queues(); - sched_pin(); - PMAP_LOCK(pmap); - - /* - * special handling of removing one page. a very - * common operation and easy to short circuit some - * code. - */ - if ((sva + PAGE_SIZE == eva) && - ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { - pmap_remove_page(pmap, sva); - goto out; - } - - for (; sva < eva; sva = pdnxt) { - unsigned pdirindex; - - /* - * Calculate index for next page table. - */ - pdnxt = (sva + NBPDR) & ~PDRMASK; - if (pmap->pm_stats.resident_count == 0) - break; - - pdirindex = sva >> PDRSHIFT; - ptpaddr = PT_GET(&pmap->pm_pdir[pdirindex]); - - /* - * Weed out invalid mappings. Note: we assume that the page - * directory table is always allocated, and in kernel virtual. - */ - if (ptpaddr == 0) - continue; - - /* - * Check for large page. - */ - if ((ptpaddr & PG_PS) != 0) { - PD_CLEAR_VA(pmap, &pmap->pm_pdir[pdirindex], TRUE); - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anyvalid = 1; - continue; - } - - /* - * Limit our scan to either the end of the va represented - * by the current page table page, or to the end of the - * range being removed. - */ - if (pdnxt > eva) - pdnxt = eva; - - for (; sva != pdnxt; sva += PAGE_SIZE) { - if ((pte = pmap_pte_quick(pmap, sva)) == NULL || - *pte == 0) - continue; - anyvalid = 1; - if (pmap_remove_pte(pmap, pte, sva)) - break; - } - } -out: - sched_unpin(); - vm_page_unlock_queues(); - if (anyvalid) - pmap_invalidate_all(pmap); - PMAP_UNLOCK(pmap); -} - -/* - * Routine: pmap_remove_all - * Function: - * Removes this physical page from - * all physical maps in which it resides. - * Reflects back modify bits to the pager. - * - * Notes: - * Original versions of this routine were very - * inefficient because they iteratively called - * pmap_remove (slow...) - */ - -void -pmap_remove_all(vm_page_t m) -{ - pv_entry_t pv; - pt_entry_t *pte, tpte; - -#if defined(PMAP_DIAGNOSTIC) - /* - * XXX This makes pmap_remove_all() illegal for non-managed pages! - */ - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { - panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", - VM_PAGE_TO_PHYS(m)); - } -#endif - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - sched_pin(); - while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { - PMAP_LOCK(pv->pv_pmap); - pv->pv_pmap->pm_stats.resident_count--; - pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - tpte = pte_load_clear(pte); - if (tpte & PG_W) - pv->pv_pmap->pm_stats.wired_count--; - if (tpte & PG_A) - vm_page_flag_set(m, PG_REFERENCED); - - /* - * Update the vm_page_t clean and reference bits. - */ - if (tpte & PG_M) { -#if defined(PMAP_DIAGNOSTIC) - if (pmap_nw_modified((pt_entry_t) tpte)) { - printf( - "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", - pv->pv_va, tpte); - } -#endif - if (pmap_track_modified(pv->pv_va)) - vm_page_dirty(m); - } - pmap_invalidate_page(pv->pv_pmap, pv->pv_va); - TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - m->md.pv_list_count--; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va); - PMAP_UNLOCK(pv->pv_pmap); - free_pv_entry(pv); - } - vm_page_flag_clear(m, PG_WRITEABLE); - sched_unpin(); -} - -/* - * Set the physical protection on the - * specified range of this map as requested. - */ -void -pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) -{ - vm_offset_t pdnxt; - pd_entry_t ptpaddr; - int anychanged; - - if ((prot & VM_PROT_READ) == VM_PROT_NONE) { - pmap_remove(pmap, sva, eva); - return; - } - - if (prot & VM_PROT_WRITE) - return; - - anychanged = 0; - - vm_page_lock_queues(); - sched_pin(); - PMAP_LOCK(pmap); - for (; sva < eva; sva = pdnxt) { - unsigned obits, pbits, pdirindex; - - pdnxt = (sva + NBPDR) & ~PDRMASK; - - pdirindex = sva >> PDRSHIFT; - ptpaddr = PT_GET(&pmap->pm_pdir[pdirindex]); - - /* - * Weed out invalid mappings. Note: we assume that the page - * directory table is always allocated, and in kernel virtual. - */ - if (ptpaddr == 0) - continue; - - /* - * Check for large page. - */ - if ((ptpaddr & PG_PS) != 0) { - pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anychanged = 1; - continue; - } - - if (pdnxt > eva) - pdnxt = eva; - - for (; sva != pdnxt; sva += PAGE_SIZE) { - pt_entry_t *pte; - vm_page_t m; - - if ((pte = pmap_pte_quick(pmap, sva)) == NULL) - continue; -#ifdef notyet -retry: -#endif - /* - * Regardless of whether a pte is 32 or 64 bits in - * size, PG_RW, PG_A, and PG_M are among the least - * significant 32 bits. - */ - obits = pbits = PT_GET(pte); - if (pbits & PG_MANAGED) { - m = NULL; - if (pbits & PG_A) { - m = PHYS_TO_VM_PAGE(pbits); - vm_page_flag_set(m, PG_REFERENCED); - pbits &= ~PG_A; - } - if ((pbits & PG_M) != 0 && - pmap_track_modified(sva)) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(pbits); - vm_page_dirty(m); - } - } - - pbits &= ~(PG_RW | PG_M); - - if (pbits != obits) { -#ifdef notyet - if (!atomic_cmpset_int((u_int *)pte, obits, - pbits)) - goto retry; -#endif - PT_SET_VA(pte, pbits, FALSE); - anychanged = 1; - } - } - } - sched_unpin(); - vm_page_unlock_queues(); - if (anychanged) - pmap_invalidate_all(pmap); - PMAP_UNLOCK(pmap); -} - -/* - * Insert the given physical page (p) at - * the specified virtual address (v) in the - * target physical map with the protection requested. - * - * If specified, the page will be wired down, meaning - * that the related pte can not be reclaimed. - * - * NB: This is the only routine which MAY NOT lazy-evaluate - * or lose information. That is, this routine must actually - * insert this page into the given map NOW. - */ -void -pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, - boolean_t wired) -{ - vm_paddr_t pa; - register pt_entry_t *pte; - vm_paddr_t opa; - pt_entry_t origpte, newpte; - vm_page_t mpte, om; - - va &= PG_FRAME; -#ifdef PMAP_DIAGNOSTIC - if (va > VM_MAX_KERNEL_ADDRESS) - panic("pmap_enter: toobig"); - if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) - panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); -#endif - - mpte = NULL; - - vm_page_lock_queues(); - PMAP_LOCK(pmap); - sched_pin(); - - /* - * In the case that a page table page is not - * resident, we are creating it here. - */ - if (va < VM_MAXUSER_ADDRESS) { - mpte = pmap_allocpte(pmap, va, M_WAITOK); - } -#if 0 && defined(PMAP_DIAGNOSTIC) - else { - pd_entry_t *pdeaddr = pmap_pde(pmap, va); - origpte = PT_GET(pdeaddr); - if ((origpte & PG_V) == 0) { - panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", - pmap->pm_pdir[PTDPTDI], origpte, va); - } - } -#endif - - pte = pmap_pte_quick(pmap, va); - - /* - * Page Directory table entry not valid, we need a new PT page - */ - if (pte == NULL) { - panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n", - (uintmax_t)pmap->pm_pdir[PTDPTDI], va); - } - - pa = VM_PAGE_TO_PHYS(m); - om = NULL; - origpte = PT_GET(pte); - opa = origpte & PG_FRAME; - - if (origpte & PG_PS) { - /* - * Yes, I know this will truncate upper address bits for PAE, - * but I'm actually more interested in the lower bits - */ - printf("pmap_enter: va %p, pte %p, origpte %p\n", - (void *)va, (void *)pte, (void *)(uintptr_t)origpte); - panic("pmap_enter: attempted pmap_enter on 4MB page"); - } - - /* - * Mapping has not changed, must be protection or wiring change. - */ - if (origpte && (opa == pa)) { - /* - * Wiring change, just update stats. We don't worry about - * wiring PT pages as they remain resident as long as there - * are valid mappings in them. Hence, if a user page is wired, - * the PT page will be also. - */ - if (wired && ((origpte & PG_W) == 0)) - pmap->pm_stats.wired_count++; - else if (!wired && (origpte & PG_W)) - pmap->pm_stats.wired_count--; - -#if defined(PMAP_DIAGNOSTIC) - if (pmap_nw_modified((pt_entry_t) origpte)) { - printf( - "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", - va, origpte); - } -#endif - - /* - * Remove extra pte reference - */ - if (mpte) - mpte->wire_count--; - - /* - * We might be turning off write access to the page, - * so we go ahead and sense modify status. - */ - if (origpte & PG_MANAGED) { - om = m; - pa |= PG_MANAGED; - } - goto validate; - } - /* - * Mapping has changed, invalidate old range and fall through to - * handle validating new mapping. - */ - if (opa) { - int err; - if (origpte & PG_W) - pmap->pm_stats.wired_count--; - if (origpte & PG_MANAGED) { - om = PHYS_TO_VM_PAGE(opa); - err = pmap_remove_entry(pmap, om, va); - } else - err = pmap_unuse_pt(pmap, va); - if (err) - panic("pmap_enter: pte vanished, va: 0x%x", va); - } else - pmap->pm_stats.resident_count++; - - /* - * Enter on the PV list if part of our managed memory. Note that we - * raise IPL while manipulating pv_table since pmap_enter can be - * called at interrupt time. - */ - if (pmap_initialized && - (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { - pmap_insert_entry(pmap, va, m); - pa |= PG_MANAGED; - } - - /* - * Increment counters - */ - if (wired) - pmap->pm_stats.wired_count++; - -validate: - /* - * Now validate mapping with desired protection/wiring. - */ - newpte = (pt_entry_t)(pa | PG_V); - if ((prot & VM_PROT_WRITE) != 0) - newpte |= PG_RW; - if (wired) - newpte |= PG_W; - if (va < VM_MAXUSER_ADDRESS) - newpte |= PG_U; - if (pmap == kernel_pmap) - newpte |= pgeflag; - - /* - * if the mapping or permission bits are different, we need - * to update the pte. - */ - if ((origpte & ~(PG_M|PG_A)) != newpte) { - if (origpte & PG_MANAGED) { - origpte = PT_GET(pte); - PT_SET_VA(pte, newpte | PG_A, TRUE); - if ((origpte & PG_M) && pmap_track_modified(va)) - vm_page_dirty(om); - if (origpte & PG_A) - vm_page_flag_set(om, PG_REFERENCED); - } else - PT_SET_VA(pte, newpte | PG_A, TRUE); - if (origpte) { - pmap_invalidate_page(pmap, va); - } - } - sched_unpin(); - vm_page_unlock_queues(); - PMAP_UNLOCK(pmap); -} - -/* - * this code makes some *MAJOR* assumptions: - * 1. Current pmap & pmap exists. - * 2. Not wired. - * 3. Read access. - * 4. No page table pages. - * 5. Tlbflush is deferred to calling procedure. - * 6. Page IS managed. - * but is *MUCH* faster than pmap_enter... - */ - -vm_page_t -pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) -{ - pt_entry_t *pte; - vm_paddr_t pa; - - vm_page_lock_queues(); - PMAP_LOCK(pmap); - - /* - * In the case that a page table page is not - * resident, we are creating it here. - */ - if (va < VM_MAXUSER_ADDRESS) { - unsigned ptepindex; - pd_entry_t ptepa; - - /* - * Calculate pagetable page index - */ - ptepindex = va >> PDRSHIFT; - if (mpte && (mpte->pindex == ptepindex)) { - mpte->wire_count++; - } else { -retry: - /* - * Get the page directory entry - */ - ptepa = PT_GET(&pmap->pm_pdir[ptepindex]); - - /* - * If the page table page is mapped, we just increment - * the hold count, and activate it. - */ - if (ptepa) { - if (ptepa & PG_PS) - panic("pmap_enter_quick: unexpected mapping into 4MB page"); - mpte = PHYS_TO_VM_PAGE(ptepa); - mpte->wire_count++; - } else { - mpte = _pmap_allocpte(pmap, ptepindex, - M_WAITOK); - if (mpte == NULL) - goto retry; - } - } - } else { - mpte = NULL; - } - - /* - * This call to vtopte makes the assumption that we are - * entering the page into the current pmap. In order to support - * quick entry into any pmap, one would likely use pmap_pte_quick. - * But that isn't as quick as vtopte. - */ - pte = vtopte(va); - if (PT_GET(pte)) { - if (mpte != NULL) { - pmap_unwire_pte_hold(pmap, mpte); - mpte = NULL; - } - goto out; - } - - /* - * Enter on the PV list if part of our managed memory. Note that we - * raise IPL while manipulating pv_table since pmap_enter can be - * called at interrupt time. - */ - if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) - pmap_insert_entry(pmap, va, m); - - /* - * Increment counters - */ - pmap->pm_stats.resident_count++; - - pa = VM_PAGE_TO_PHYS(m); - - /* - * Now validate mapping with RO protection - */ - if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) - pte_store(pte, pa | PG_V | PG_U); - else - pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); -out: - vm_page_unlock_queues(); - PMAP_UNLOCK(pmap); - return mpte; -} - -/* - * Make a temporary mapping for a physical address. This is only intended - * to be used for panic dumps. - */ -void * -pmap_kenter_temporary(vm_paddr_t pa, int i) -{ - vm_offset_t va; - - va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); - pmap_kenter(va, pa); -#ifndef I386_CPU - invlpg(va); -#else - invltlb(); -#endif - return ((void *)crashdumpmap); -} - -/* - * This code maps large physical mmap regions into the - * processor address space. Note that some shortcuts - * are taken, but the code works. - */ -void -pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, - vm_object_t object, vm_pindex_t pindex, - vm_size_t size) -{ - vm_page_t p; - - VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - KASSERT(object->type == OBJT_DEVICE, - ("pmap_object_init_pt: non-device object")); - if (pseflag && - ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { - int i; - vm_page_t m[1]; - unsigned int ptepindex; - int npdes; - pd_entry_t ptepa; - - PMAP_LOCK(pmap); - if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) - goto out; - PMAP_UNLOCK(pmap); -retry: - p = vm_page_lookup(object, pindex); - if (p != NULL) { - vm_page_lock_queues(); - if (vm_page_sleep_if_busy(p, FALSE, "init4p")) - goto retry; - } else { - p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); - if (p == NULL) - return; - m[0] = p; - - if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { - vm_page_lock_queues(); - vm_page_free(p); - vm_page_unlock_queues(); - return; - } - - p = vm_page_lookup(object, pindex); - vm_page_lock_queues(); - vm_page_wakeup(p); - } - vm_page_unlock_queues(); - - ptepa = VM_PAGE_TO_PHYS(p); - if (ptepa & (NBPDR - 1)) - return; - - p->valid = VM_PAGE_BITS_ALL; - - PMAP_LOCK(pmap); - pmap->pm_stats.resident_count += size >> PAGE_SHIFT; - npdes = size >> PDRSHIFT; - for(i = 0; i < npdes; i++) { - PD_SET_VA(pmap, &pmap->pm_pdir[ptepindex], - ptepa | PG_U | PG_RW | PG_V | PG_PS, FALSE); - ptepa += NBPDR; - ptepindex += 1; - } - pmap_invalidate_all(pmap); -out: - PMAP_UNLOCK(pmap); - } -} - -void -pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len) -{ - int i, npages = round_page(len) >> PAGE_SHIFT; - for (i = 0; i < npages; i++) { - pt_entry_t *pte; - pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); - pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M))); - PMAP_MARK_PRIV(xpmap_mtop(*pte)); - pmap_pte_release(pte); - } - PT_UPDATES_FLUSH(); -} - -void -pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len) -{ - int i, npages = round_page(len) >> PAGE_SHIFT; - for (i = 0; i < npages; i++) { - pt_entry_t *pte; - pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); - PMAP_MARK_UNPRIV(xpmap_mtop(*pte)); - pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M)); - pmap_pte_release(pte); - } - PT_UPDATES_FLUSH(); -} - -/* - * Routine: pmap_change_wiring - * Function: Change the wiring attribute for a map/virtual-address - * pair. - * In/out conditions: - * The mapping must already exist in the pmap. - */ -void -pmap_change_wiring(pmap, va, wired) - register pmap_t pmap; - vm_offset_t va; - boolean_t wired; -{ - register pt_entry_t *pte; - - PMAP_LOCK(pmap); - pte = pmap_pte(pmap, va); - - if (wired && !pmap_pte_w(pte)) - pmap->pm_stats.wired_count++; - else if (!wired && pmap_pte_w(pte)) - pmap->pm_stats.wired_count--; - - /* - * Wiring is not a hardware characteristic so there is no need to - * invalidate TLB. - */ - pmap_pte_set_w(pte, wired); - pmap_pte_release(pte); - PMAP_UNLOCK(pmap); -} - - - -/* - * Copy the range specified by src_addr/len - * from the source map to the range dst_addr/len - * in the destination map. - * - * This routine is only advisory and need not do anything. - */ - -void -pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, - vm_offset_t src_addr) -{ - vm_offset_t addr; - vm_offset_t end_addr = src_addr + len; - vm_offset_t pdnxt; - vm_page_t m; - - if (dst_addr != src_addr) - return; - - if (!pmap_is_current(src_pmap)) - return; - - vm_page_lock_queues(); - if (dst_pmap < src_pmap) { - PMAP_LOCK(dst_pmap); - PMAP_LOCK(src_pmap); - } else { - PMAP_LOCK(src_pmap); - PMAP_LOCK(dst_pmap); - } - sched_pin(); - for (addr = src_addr; addr < end_addr; addr = pdnxt) { - pt_entry_t *src_pte, *dst_pte; - vm_page_t dstmpte, srcmpte; - pd_entry_t srcptepaddr; - unsigned ptepindex; - - if (addr >= UPT_MIN_ADDRESS) - panic("pmap_copy: invalid to pmap_copy page tables"); - - /* - * Don't let optional prefaulting of pages make us go - * way below the low water mark of free pages or way - * above high water mark of used pv entries. - */ - if (cnt.v_free_count < cnt.v_free_reserved || - pv_entry_count > pv_entry_high_water) - break; - - pdnxt = (addr + NBPDR) & ~PDRMASK; - ptepindex = addr >> PDRSHIFT; - - srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]); - if (srcptepaddr == 0) - continue; - - if (srcptepaddr & PG_PS) { - if (dst_pmap->pm_pdir[ptepindex] == 0) { - PD_SET_VA(dst_pmap, &dst_pmap->pm_pdir[ptepindex], srcptepaddr, TRUE); - dst_pmap->pm_stats.resident_count += - NBPDR / PAGE_SIZE; - } - continue; - } - - srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); - if (srcmpte->wire_count == 0) - panic("pmap_copy: source page table page is unused"); - - if (pdnxt > end_addr) - pdnxt = end_addr; - - src_pte = vtopte(addr); - while (addr < pdnxt) { - pt_entry_t ptetemp; - ptetemp = PT_GET(src_pte); - /* - * we only virtual copy managed pages - */ - if ((ptetemp & PG_MANAGED) != 0) { - /* - * We have to check after allocpte for the - * pte still being around... allocpte can - * block. - */ - dstmpte = pmap_allocpte(dst_pmap, addr, - M_NOWAIT); - if (dstmpte == NULL) - break; - dst_pte = pmap_pte_quick(dst_pmap, addr); - if (*dst_pte == 0) { - /* - * Clear the modified and - * accessed (referenced) bits - * during the copy. - */ - m = PHYS_TO_VM_PAGE(ptetemp); - PT_SET_VA(dst_pte, ptetemp & ~(PG_M | PG_A), FALSE); - dst_pmap->pm_stats.resident_count++; - pmap_insert_entry(dst_pmap, addr, m); - } else - pmap_unwire_pte_hold(dst_pmap, dstmpte); - if (dstmpte->wire_count >= srcmpte->wire_count) - break; - } - addr += PAGE_SIZE; - src_pte++; - } - } - PT_UPDATES_FLUSH(); - sched_unpin(); - vm_page_unlock_queues(); - PMAP_UNLOCK(src_pmap); - PMAP_UNLOCK(dst_pmap); -} - -static __inline void -pagezero(void *page) -{ -#if defined(I686_CPU) - if (cpu_class == CPUCLASS_686) { -#if defined(CPU_ENABLE_SSE) - if (cpu_feature & CPUID_SSE2) - sse2_pagezero(page); - else -#endif - i686_pagezero(page); - } else -#endif - bzero(page, PAGE_SIZE); -} - -/* - * pmap_zero_page zeros the specified hardware page by mapping - * the page into KVM and using bzero to clear its contents. - */ -void -pmap_zero_page(vm_page_t m) -{ - - mtx_lock(&CMAPCADDR12_lock); - if (*CMAP2) - panic("pmap_zero_page: CMAP2 busy"); - sched_pin(); - PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE); - invlcaddr(CADDR2); - pagezero(CADDR2); - PT_CLEAR_VA(CMAP2, TRUE); - sched_unpin(); - mtx_unlock(&CMAPCADDR12_lock); -} - -/* - * pmap_zero_page_area zeros the specified hardware page by mapping - * the page into KVM and using bzero to clear its contents. - * - * off and size may not cover an area beyond a single hardware page. - */ -void -pmap_zero_page_area(vm_page_t m, int off, int size) -{ - - mtx_lock(&CMAPCADDR12_lock); - if (*CMAP2) - panic("pmap_zero_page: CMAP2 busy"); - sched_pin(); - PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE); - invlcaddr(CADDR2); - if (off == 0 && size == PAGE_SIZE) - pagezero(CADDR2); - else - bzero((char *)CADDR2 + off, size); - PT_CLEAR_VA(CMAP2, TRUE); - sched_unpin(); - mtx_unlock(&CMAPCADDR12_lock); -} - -/* - * pmap_zero_page_idle zeros the specified hardware page by mapping - * the page into KVM and using bzero to clear its contents. This - * is intended to be called from the vm_pagezero process only and - * outside of Giant. - */ -void -pmap_zero_page_idle(vm_page_t m) -{ - - if (*CMAP3) - panic("pmap_zero_page: CMAP3 busy"); - sched_pin(); - PT_SET_VA(CMAP3, PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, TRUE); - invlcaddr(CADDR3); - pagezero(CADDR3); - PT_CLEAR_VA(CMAP3, TRUE); - sched_unpin(); -} - -/* - * pmap_copy_page copies the specified (machine independent) - * page by mapping the page into virtual memory and using - * bcopy to copy the page, one machine dependent page at a - * time. - */ -void -pmap_copy_page(vm_page_t src, vm_page_t dst) -{ - - mtx_lock(&CMAPCADDR12_lock); - if (*CMAP1) - panic("pmap_copy_page: CMAP1 busy"); - if (*CMAP2) - panic("pmap_copy_page: CMAP2 busy"); - sched_pin(); -#ifdef I386_CPU - invltlb(); -#else - invlpg((u_int)CADDR1); - invlpg((u_int)CADDR2); -#endif - PT_SET_VA(CMAP1, PG_V | VM_PAGE_TO_PHYS(src) | PG_A, FALSE); - PT_SET_VA(CMAP2, PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M, TRUE); - - bcopy(CADDR1, CADDR2, PAGE_SIZE); - PT_CLEAR_VA(CMAP1, FALSE); - PT_CLEAR_VA(CMAP2, TRUE); - sched_unpin(); - mtx_unlock(&CMAPCADDR12_lock); -} - -void -pmap_copy_ma(vm_paddr_t src, vm_paddr_t dst) -{ - - mtx_lock(&CMAPCADDR12_lock); - if (*CMAP1) - panic("pmap_copy_ma: CMAP1 busy"); - if (*CMAP2) - panic("pmap_copy_ma: CMAP2 busy"); - sched_pin(); -#ifdef I386_CPU - invltlb(); -#else - invlpg((u_int)CADDR1); - invlpg((u_int)CADDR2); -#endif - PT_SET_VA_MA(CMAP1, PG_V | src | PG_A, FALSE); - PT_SET_VA_MA(CMAP2, PG_V | PG_RW | dst | PG_A | PG_M, TRUE); - - bcopy(CADDR1, CADDR2, PAGE_SIZE); - PT_CLEAR_VA(CMAP1, FALSE); - PT_CLEAR_VA(CMAP2, TRUE); - sched_unpin(); - mtx_unlock(&CMAPCADDR12_lock); -} - -/* - * Returns true if the pmap's pv is one of the first - * 16 pvs linked to from this page. This count may - * be changed upwards or downwards in the future; it - * is only necessary that true be returned for a small - * subset of pmaps for proper page aging. - */ -boolean_t -pmap_page_exists_quick(pmap, m) - pmap_t pmap; - vm_page_t m; -{ - pv_entry_t pv; - int loops = 0; - - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) - return FALSE; - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pv->pv_pmap == pmap) { - return TRUE; - } - loops++; - if (loops >= 16) - break; - } - return (FALSE); -} - -#define PMAP_REMOVE_PAGES_CURPROC_ONLY -/* - * Remove all pages from specified address space - * this aids process exit speeds. Also, this code - * is special cased for current process only, but - * can have the more generic (and slightly slower) - * mode enabled. This is much faster than pmap_remove - * in the case of running down an entire address space. - */ -void -pmap_remove_pages(pmap, sva, eva) - pmap_t pmap; - vm_offset_t sva, eva; -{ - pt_entry_t *pte, tpte; - vm_page_t m; - pv_entry_t pv, npv; - -#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY - if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { - printf("warning: pmap_remove_pages called with non-current pmap\n"); - return; - } -#endif - vm_page_lock_queues(); - PMAP_LOCK(pmap); - sched_pin(); - - for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { - if (pv->pv_va >= eva || pv->pv_va < sva) { - npv = TAILQ_NEXT(pv, pv_plist); - continue; - } - -#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY - pte = vtopte(pv->pv_va); -#else - pte = pmap_pte_quick(pmap, pv->pv_va); -#endif - tpte = PT_GET(pte); - - if (tpte == 0) { - printf("TPTE at %p IS ZERO @ VA %08x\n", - pte, pv->pv_va); - panic("bad pte"); - } - -/* - * We cannot remove wired pages from a process' mapping at this time - */ - if (tpte & PG_W) { - npv = TAILQ_NEXT(pv, pv_plist); - continue; - } - - m = PHYS_TO_VM_PAGE(tpte); - KASSERT(m->phys_addr == (tpte & PG_FRAME), - ("vm_page_t %p phys_addr mismatch %016jx %016jx", - m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); - - KASSERT(m < &vm_page_array[vm_page_array_size], - ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); - - pmap->pm_stats.resident_count--; - - pte_clear(pte); - - /* - * Update the vm_page_t clean and reference bits. - */ - if (tpte & PG_M) { - vm_page_dirty(m); - } - - npv = TAILQ_NEXT(pv, pv_plist); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); - - m->md.pv_list_count--; - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_flag_clear(m, PG_WRITEABLE); - - pmap_unuse_pt(pmap, pv->pv_va); - free_pv_entry(pv); - } - sched_unpin(); - pmap_invalidate_all(pmap); - PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); -} - -/* - * pmap_is_modified: - * - * Return whether or not the specified physical page was modified - * in any physical maps. - */ -boolean_t -pmap_is_modified(vm_page_t m) -{ - pv_entry_t pv; - pt_entry_t *pte; - boolean_t rv; - - rv = FALSE; - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) - return (rv); - - sched_pin(); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - /* - * if the bit being tested is the modified bit, then - * mark clean_map and ptes as never - * modified. - */ - if (!pmap_track_modified(pv->pv_va)) - continue; -#if defined(PMAP_DIAGNOSTIC) - if (!pv->pv_pmap) { - printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); - continue; - } -#endif - PMAP_LOCK(pv->pv_pmap); - pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - rv = (*pte & PG_M) != 0; - PMAP_UNLOCK(pv->pv_pmap); - if (rv) - break; - } - sched_unpin(); - return (rv); -} - -/* - * pmap_is_prefaultable: - * - * Return whether or not the specified virtual address is elgible - * for prefault. - */ -boolean_t -pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) -{ - pt_entry_t *pte; - boolean_t rv; - - rv = FALSE; - /* XXX - * in order for writable pagetables to help, - * this has to work - check if we aren't doing - * an invlpg on the page tables linear mappings - */ - return (rv); - PMAP_LOCK(pmap); - if (pmap_pde(pmap, addr)) { - pte = vtopte(addr); - rv = *pte == 0; - } - PMAP_UNLOCK(pmap); - return (rv); -} - -/* - * Clear the given bit in each of the given page's ptes. The bit is - * expressed as a 32-bit mask. Consequently, if the pte is 64 bits in - * size, only a bit within the least significant 32 can be cleared. - */ -static __inline void -pmap_clear_ptes(vm_page_t m, int bit) -{ - register pv_entry_t pv; - pt_entry_t pbits, *pte; - - if (!pmap_initialized || (m->flags & PG_FICTITIOUS) || - (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) - return; - - sched_pin(); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - /* - * Loop over all current mappings setting/clearing as appropos If - * setting RO do we need to clear the VAC? - */ - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - /* - * don't write protect pager mappings - */ - if (bit == PG_RW) { - if (!pmap_track_modified(pv->pv_va)) - continue; - } - -#if defined(PMAP_DIAGNOSTIC) - if (!pv->pv_pmap) { - printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); - continue; - } -#endif - - PMAP_LOCK(pv->pv_pmap); - pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); -#ifdef notyet -retry: -#endif - pbits = PT_GET(pte); - if (pbits & bit) { - if (bit == PG_RW) { - /* - * Regardless of whether a pte is 32 or 64 bits - * in size, PG_RW and PG_M are among the least - * significant 32 bits. - */ -#ifdef notyet - if (!atomic_cmpset_int((u_int *)pte, pbits, - pbits & ~(PG_RW | PG_M))) - goto retry; -#endif - PT_SET_VA(pte, pbits & ~(PG_M|PG_RW), TRUE); - - - if (pbits & PG_M) { - vm_page_dirty(m); - } - } else { -#ifdef notyet - atomic_clear_int((u_int *)pte, bit); -#endif - /* XXX */ - PT_SET_VA(pte, pbits & ~bit, TRUE); - } - pmap_invalidate_page(pv->pv_pmap, pv->pv_va); - } - PMAP_UNLOCK(pv->pv_pmap); - } - if (bit == PG_RW) - vm_page_flag_clear(m, PG_WRITEABLE); - sched_unpin(); -} - -/* - * pmap_page_protect: - * - * Lower the permission for all mappings to a given page. - */ -void -pmap_page_protect(vm_page_t m, vm_prot_t prot) -{ - if ((prot & VM_PROT_WRITE) == 0) { - if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { - pmap_clear_ptes(m, PG_RW); - } else { - pmap_remove_all(m); - } - } -} - -/* - * pmap_ts_referenced: - * - * Return a count of reference bits for a page, clearing those bits. - * It is not necessary for every reference bit to be cleared, but it - * is necessary that 0 only be returned when there are truly no - * reference bits set. - * - * XXX: The exact number of bits to check and clear is a matter that - * should be tested and standardized at some point in the future for - * optimal aging of shared pages. - */ -int -pmap_ts_referenced(vm_page_t m) -{ - register pv_entry_t pv, pvf, pvn; - pt_entry_t *pte; - pt_entry_t v; - int rtval = 0; - - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) - return (rtval); - - sched_pin(); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { - - pvf = pv; - - do { - pvn = TAILQ_NEXT(pv, pv_list); - - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); - - if (!pmap_track_modified(pv->pv_va)) - continue; - - PMAP_LOCK(pv->pv_pmap); - pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - - if (pte && ((v = PT_GET(pte)) & PG_A) != 0) { -#ifdef notyet - atomic_clear_int((u_int *)pte, PG_A); -#endif - PT_SET_VA(pte, v & ~PG_A, FALSE); - pmap_invalidate_page(pv->pv_pmap, pv->pv_va); - - rtval++; - if (rtval > 4) { - PMAP_UNLOCK(pv->pv_pmap); - break; - } - } - PMAP_UNLOCK(pv->pv_pmap); - } while ((pv = pvn) != NULL && pv != pvf); - } - sched_unpin(); - - return (rtval); -} - -/* - * Clear the modify bits on the specified physical page. - */ -void -pmap_clear_modify(vm_page_t m) -{ - pmap_clear_ptes(m, PG_M); -} - -/* - * pmap_clear_reference: - * - * Clear the reference bit on the specified physical page. - */ -void -pmap_clear_reference(vm_page_t m) -{ - pmap_clear_ptes(m, PG_A); -} - -/* - * Miscellaneous support routines follow - */ - -/* - * Map a set of physical memory pages into the kernel virtual - * address space. Return a pointer to where it is mapped. This - * routine is intended to be used for mapping device memory, - * NOT real memory. - */ -void * -pmap_mapdev(pa, size) - vm_paddr_t pa; - vm_size_t size; -{ - vm_offset_t va, tmpva, offset; - - offset = pa & PAGE_MASK; - size = roundup(offset + size, PAGE_SIZE); - pa = pa & PG_FRAME; - - if (pa < KERNLOAD && pa + size <= KERNLOAD) - va = KERNBASE + pa; - else - va = kmem_alloc_nofault(kernel_map, size); - if (!va) - panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); - - for (tmpva = va; size > 0; ) { - pmap_kenter(tmpva, pa); - size -= PAGE_SIZE; - tmpva += PAGE_SIZE; - pa += PAGE_SIZE; - } - pmap_invalidate_range(kernel_pmap, va, tmpva); - return ((void *)(va + offset)); -} - -void -pmap_unmapdev(va, size) - vm_offset_t va; - vm_size_t size; -{ - vm_offset_t base, offset, tmpva; - panic("unused"); - if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) - return; - base = va & PG_FRAME; - offset = va & PAGE_MASK; - size = roundup(offset + size, PAGE_SIZE); - for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) - pmap_kremove(tmpva); - pmap_invalidate_range(kernel_pmap, va, tmpva); - kmem_free(kernel_map, base, size); -} - -/* - * perform the pmap work for mincore - */ -int -pmap_mincore(pmap, addr) - pmap_t pmap; - vm_offset_t addr; -{ - pt_entry_t *ptep, pte; - vm_page_t m; - int val = 0; - - PMAP_LOCK(pmap); - ptep = pmap_pte(pmap, addr); - pte = (ptep != NULL) ? PT_GET(ptep) : 0; - pmap_pte_release(ptep); - PMAP_UNLOCK(pmap); - - if (pte != 0) { - vm_paddr_t pa; - - val = MINCORE_INCORE; - if ((pte & PG_MANAGED) == 0) - return val; - - pa = pte & PG_FRAME; - - m = PHYS_TO_VM_PAGE(pa); - - /* - * Modified by us - */ - if (pte & PG_M) - val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; - else { - /* - * Modified by someone else - */ - vm_page_lock_queues(); - if (m->dirty || pmap_is_modified(m)) - val |= MINCORE_MODIFIED_OTHER; - vm_page_unlock_queues(); - } - /* - * Referenced by us - */ - if (pte & PG_A) - val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; - else { - /* - * Referenced by someone else - */ - vm_page_lock_queues(); - if ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m)) { - val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } - vm_page_unlock_queues(); - } - } - return val; -} - -void -pmap_activate(struct thread *td) -{ - struct proc *p = td->td_proc; - pmap_t pmap, oldpmap; - u_int32_t cr3; - - critical_enter(); - pmap = vmspace_pmap(td->td_proc->p_vmspace); - oldpmap = PCPU_GET(curpmap); -#if defined(SMP) - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); -#else - oldpmap->pm_active &= ~1; - pmap->pm_active |= 1; -#endif -#ifdef PAE - cr3 = vtophys(pmap->pm_pdpt); -#else - cr3 = vtophys(pmap->pm_pdir); -#endif - /* XXXKSE this is wrong. - * pmap_activate is for the current thread on the current cpu - */ - if (p->p_flag & P_SA) { - /* Make sure all other cr3 entries are updated. */ - /* what if they are running? XXXKSE (maybe abort them) */ - FOREACH_THREAD_IN_PROC(p, td) { - td->td_pcb->pcb_cr3 = cr3; - } - } else { - td->td_pcb->pcb_cr3 = cr3; - } - load_cr3(cr3); - PCPU_SET(curpmap, pmap); - critical_exit(); -} - -vm_offset_t -pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) -{ - - if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { - return addr; - } - - addr = (addr + PDRMASK) & ~PDRMASK; - return addr; -} - - -#if defined(PMAP_DEBUG) -extern int init_first; -void -pmap_ref(pt_entry_t *pte, unsigned long ma) -{ - int ind, i, count; - unsigned long ebp_prev, eip_prev, oma = 0; - unsigned long pa = xpmap_mtop(ma); - - /* are we to the point where mappings are set up? */ - if (!init_first) - return; - - ind = pa >> PAGE_SHIFT; - /* privileged? */ - if ((pa & PG_RW) && pteinfo_list[ind].pt_ref & (1 << 31)) - BKPT; - - /* is MA already mapped ? */ - oma = *pte; - - /* old reference being lost */ - if (oma && (oma & PG_RW) && ((oma & PG_FRAME) != (ma & PG_FRAME))) - pmap_dec_ref(oma); - - /* ignore RO mappings - unless were downgrading */ - if (!(ma & PG_RW)) { - /* downgrading mapping - lose reference */ - if (((oma & PG_FRAME) == (ma & PG_FRAME)) && - (oma & PG_RW)) - pmap_dec_ref(ma); - return; - } - - if (pteinfo_list[ind].pt_ref < 0) - BKPT; - - - /* same address and not upgrading the mapping */ - if (((oma & PG_FRAME) == (ma & PG_FRAME)) && - (oma & PG_RW)) - return; - - count = pteinfo_list[ind].pt_ref; - __asm__("movl %%ebp, %0" : "=r" (ebp_prev)); - for (i = 0; i < XPQ_CALL_DEPTH && ebp_prev > KERNBASE; i++) { - __asm__("movl 4(%1), %0" : "=r" (eip_prev) : "r" (ebp_prev)); - pteinfo_list[ind].pt_eip[count%XPQ_CALL_COUNT][i] = eip_prev; - __asm__("movl (%1), %0" : "=r" (ebp_prev) : "r" (ebp_prev)); - } - - pteinfo_list[ind].pt_ref++; - -} - -void -pmap_dec_ref(unsigned long ma) -{ - unsigned long pa; - int ind, count; - - if (!ma) BKPT; - - pa = xpmap_mtop(ma); - - ind = pa >> PAGE_SHIFT; - if (pteinfo_list[ind].pt_ref & (1 << 31)) BKPT; - - count = pteinfo_list[ind].pt_ref & ~(1 << 31); - if (count < 1) { - printk("ma: %lx has ref count of 0\n", ma); - BKPT; - } - pteinfo_list[ind].pt_ref = (--count | (pteinfo_list[ind].pt_ref & (1 << 31))); - -} - -void -pmap_dec_ref_page(vm_page_t m) -{ - unsigned long *pt; - int i; - mtx_lock(&CMAPCADDR12_lock); - if (*CMAP2) - panic("pmap_zero_page: CMAP2 busy"); - sched_pin(); - PT_SET_VA(CMAP2, PG_V | VM_PAGE_TO_PHYS(m) | PG_A | PG_M, FALSE); - invlcaddr(CADDR2); - pt = (unsigned long *)CADDR2; - for (i = 0; i < 1024; i++) - if (pt[i] & PG_RW) - pmap_dec_ref(xpmap_ptom(pt[i])); - PT_CLEAR_VA(CMAP2, TRUE); - sched_unpin(); - mtx_unlock(&CMAPCADDR12_lock); -} - -void -pmap_mark_privileged(unsigned long pa) -{ - int ind = pa >> PAGE_SHIFT; - - if (pteinfo_list[ind].pt_ref & (1 << 31)) BKPT; - if ((pteinfo_list[ind].pt_ref & ~(1 << 31)) > 0) BKPT; - - pteinfo_list[ind].pt_ref |= (1 << 31); - -} - -void -pmap_mark_unprivileged(unsigned long pa) -{ - int ind = pa >> PAGE_SHIFT; - - if (pteinfo_list[ind].pt_ref != (1 << 31)) BKPT; - - pteinfo_list[ind].pt_ref &= ~(1 << 31); - -} - - -int -pmap_pid_dump(int pid) -{ - pmap_t pmap; - struct proc *p; - int npte = 0; - int index; - - sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { - if (p->p_pid != pid) - continue; - - if (p->p_vmspace) { - int i,j; - index = 0; - pmap = vmspace_pmap(p->p_vmspace); - for (i = 0; i < NPDEPTD; i++) { - pd_entry_t *pde; - pt_entry_t *pte; - vm_offset_t base = i << PDRSHIFT; - - pde = &pmap->pm_pdir[i]; - if (pde && pmap_pde_v(pde)) { - for (j = 0; j < NPTEPG; j++) { - vm_offset_t va = base + (j << PAGE_SHIFT); - if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { - if (index) { - index = 0; - printf("\n"); - } - sx_sunlock(&allproc_lock); - return npte; - } - pte = pmap_pte(pmap, va); - if (pte && pmap_pte_v(pte)) { - pt_entry_t pa; - vm_page_t m; - pa = PT_GET(pte); - m = PHYS_TO_VM_PAGE(pa); - printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", - va, pa, m->hold_count, m->wire_count, m->flags); - npte++; - index++; - if (index >= 2) { - index = 0; - printf("\n"); - } else { - printf(" "); - } - } - } - } - } - } - } - sx_sunlock(&allproc_lock); - return npte; -} -#endif /* PMAP_DEBUG */ - -#if defined(DEBUG) - -static void pads(pmap_t pm); -void pmap_pvdump(vm_offset_t pa); - -/* print address space of pmap*/ -static void -pads(pm) - pmap_t pm; -{ - int i, j; - vm_paddr_t va; - pt_entry_t *ptep; - - if (pm == kernel_pmap) - return; - for (i = 0; i < NPDEPTD; i++) - if (pm->pm_pdir[i]) - for (j = 0; j < NPTEPG; j++) { - va = (i << PDRSHIFT) + (j << PAGE_SHIFT); - if (pm == kernel_pmap && va < KERNBASE) - continue; - if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) - continue; - ptep = pmap_pte(pm, va); - if (pmap_pte_v(ptep)) - printf("%x:%x ", va, *ptep); - }; - -} - -void -pmap_pvdump(pa) - vm_paddr_t pa; -{ - pv_entry_t pv; - vm_page_t m; - - printf("pa %x", pa); - m = PHYS_TO_VM_PAGE(pa); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); - pads(pv->pv_pmap); - } - printf(" "); -} -#endif - -/* - * Local variables: - * c-basic-offset: 8 - * End: - */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/support.s Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1553 +0,0 @@ -/*- - * Copyright (c) 1993 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/support.s,v 1.100 2003/11/03 21:28:54 jhb Exp $ - */ - -#include "opt_npx.h" - -#include <machine/asmacros.h> -#include <machine/cputypes.h> -#include <machine/intr_machdep.h> -#include <machine/pmap.h> -#include <machine/specialreg.h> - -#include "assym.s" - -#define IDXSHIFT 10 - - .data - .globl bcopy_vector -bcopy_vector: - .long generic_bcopy - .globl bzero_vector -bzero_vector: - .long generic_bzero - .globl copyin_vector -copyin_vector: - .long generic_copyin - .globl copyout_vector -copyout_vector: - .long generic_copyout -#if defined(I586_CPU) && defined(DEV_NPX) -kernel_fpu_lock: - .byte 0xfe - .space 3 -#endif - ALIGN_DATA - .globl intrcnt, eintrcnt -intrcnt: - .space INTRCNT_COUNT * 4 -eintrcnt: - - .globl intrnames, eintrnames -intrnames: - .space INTRCNT_COUNT * (MAXCOMLEN + 1) -eintrnames: - - .text - -/* - * bcopy family - * void bzero(void *buf, u_int len) - */ - -ENTRY(bzero) - MEXITCOUNT - jmp *bzero_vector - -ENTRY(generic_bzero) - pushl %edi - movl 8(%esp),%edi - movl 12(%esp),%ecx - xorl %eax,%eax - shrl $2,%ecx - cld - rep - stosl - movl 12(%esp),%ecx - andl $3,%ecx - rep - stosb - popl %edi - ret - -#ifdef I486_CPU -ENTRY(i486_bzero) - movl 4(%esp),%edx - movl 8(%esp),%ecx - xorl %eax,%eax -/* - * do 64 byte chunks first - * - * XXX this is probably over-unrolled at least for DX2's - */ -2: - cmpl $64,%ecx - jb 3f - movl %eax,(%edx) - movl %eax,4(%edx) - movl %eax,8(%edx) - movl %eax,12(%edx) - movl %eax,16(%edx) - movl %eax,20(%edx) - movl %eax,24(%edx) - movl %eax,28(%edx) - movl %eax,32(%edx) - movl %eax,36(%edx) - movl %eax,40(%edx) - movl %eax,44(%edx) - movl %eax,48(%edx) - movl %eax,52(%edx) - movl %eax,56(%edx) - movl %eax,60(%edx) - addl $64,%edx - subl $64,%ecx - jnz 2b - ret - -/* - * do 16 byte chunks - */ - SUPERALIGN_TEXT -3: - cmpl $16,%ecx - jb 4f - movl %eax,(%edx) - movl %eax,4(%edx) - movl %eax,8(%edx) - movl %eax,12(%edx) - addl $16,%edx - subl $16,%ecx - jnz 3b - ret - -/* - * do 4 byte chunks - */ - SUPERALIGN_TEXT -4: - cmpl $4,%ecx - jb 5f - movl %eax,(%edx) - addl $4,%edx - subl $4,%ecx - jnz 4b - ret - -/* - * do 1 byte chunks - * a jump table seems to be faster than a loop or more range reductions - * - * XXX need a const section for non-text - */ - .data -jtab: - .long do0 - .long do1 - .long do2 - .long do3 - - .text - SUPERALIGN_TEXT -5: - jmp *jtab(,%ecx,4) - - SUPERALIGN_TEXT -do3: - movw %ax,(%edx) - movb %al,2(%edx) - ret - - SUPERALIGN_TEXT -do2: - movw %ax,(%edx) - ret - - SUPERALIGN_TEXT -do1: - movb %al,(%edx) - ret - - SUPERALIGN_TEXT -do0: - ret -#endif - -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_bzero) - movl 4(%esp),%edx - movl 8(%esp),%ecx - - /* - * The FPU register method is twice as fast as the integer register - * method unless the target is in the L1 cache and we pre-allocate a - * cache line for it (then the integer register method is 4-5 times - * faster). However, we never pre-allocate cache lines, since that - * would make the integer method 25% or more slower for the common - * case when the target isn't in either the L1 cache or the L2 cache. - * Thus we normally use the FPU register method unless the overhead - * would be too large. - */ - cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ - jb intreg_i586_bzero - - /* - * The FPU registers may belong to an application or to fastmove() - * or to another invocation of bcopy() or ourself in a higher level - * interrupt or trap handler. Preserving the registers is - * complicated since we avoid it if possible at all levels. We - * want to localize the complications even when that increases them. - * Here the extra work involves preserving CR0_TS in TS. - * `fpcurthread != NULL' is supposed to be the condition that all the - * FPU resources belong to an application, but fpcurthread and CR0_TS - * aren't set atomically enough for this condition to work in - * interrupt handlers. - * - * Case 1: FPU registers belong to the application: we must preserve - * the registers if we use them, so we only use the FPU register - * method if the target size is large enough to amortize the extra - * overhead for preserving them. CR0_TS must be preserved although - * it is very likely to end up as set. - * - * Case 2: FPU registers belong to fastmove(): fastmove() currently - * makes the registers look like they belong to an application so - * that cpu_switch() and savectx() don't have to know about it, so - * this case reduces to case 1. - * - * Case 3: FPU registers belong to the kernel: don't use the FPU - * register method. This case is unlikely, and supporting it would - * be more complicated and might take too much stack. - * - * Case 4: FPU registers don't belong to anyone: the FPU registers - * don't need to be preserved, so we always use the FPU register - * method. CR0_TS must be preserved although it is very likely to - * always end up as clear. - */ - cmpl $0,PCPU(FPCURTHREAD) - je i586_bz1 - - /* - * XXX don't use the FPU for cases 1 and 2, since preemptive - * scheduling of ithreads broke these cases. Note that we can - * no longer get here from an interrupt handler, since the - * context sitch to the interrupt handler will have saved the - * FPU state. - */ - jmp intreg_i586_bzero - - cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ - jb intreg_i586_bzero - sarb $1,kernel_fpu_lock - jc intreg_i586_bzero - smsw %ax - clts - subl $108,%esp - fnsave 0(%esp) - jmp i586_bz2 - -i586_bz1: - sarb $1,kernel_fpu_lock - jc intreg_i586_bzero - smsw %ax - clts - fninit /* XXX should avoid needing this */ -i586_bz2: - fldz - - /* - * Align to an 8 byte boundary (misalignment in the main loop would - * cost a factor of >= 2). Avoid jumps (at little cost if it is - * already aligned) by always zeroing 8 bytes and using the part up - * to the _next_ alignment position. - */ - fstl 0(%edx) - addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ - addl $8,%edx - andl $~7,%edx - subl %edx,%ecx - - /* - * Similarly align `len' to a multiple of 8. - */ - fstl -8(%edx,%ecx) - decl %ecx - andl $~7,%ecx - - /* - * This wouldn't be any faster if it were unrolled, since the loop - * control instructions are much faster than the fstl and/or done - * in parallel with it so their overhead is insignificant. - */ -fpureg_i586_bzero_loop: - fstl 0(%edx) - addl $8,%edx - subl $8,%ecx - cmpl $8,%ecx - jae fpureg_i586_bzero_loop - - cmpl $0,PCPU(FPCURTHREAD) - je i586_bz3 - - /* XXX check that the condition for cases 1-2 stayed false. */ -i586_bzero_oops: - int $3 - jmp i586_bzero_oops - - frstor 0(%esp) - addl $108,%esp - lmsw %ax - movb $0xfe,kernel_fpu_lock - ret - -i586_bz3: - fstp %st(0) - lmsw %ax - movb $0xfe,kernel_fpu_lock - ret - -intreg_i586_bzero: - /* - * `rep stos' seems to be the best method in practice for small - * counts. Fancy methods usually take too long to start up due - * to cache and BTB misses. - */ - pushl %edi - movl %edx,%edi - xorl %eax,%eax - shrl $2,%ecx - cld - rep - stosl - movl 12(%esp),%ecx - andl $3,%ecx - jne 1f - popl %edi - ret - -1: - rep - stosb - popl %edi - ret -#endif /* I586_CPU && defined(DEV_NPX) */ - -ENTRY(sse2_pagezero) - pushl %ebx - movl 8(%esp),%ecx - movl %ecx,%eax - addl $4096,%eax - xor %ebx,%ebx -1: - movnti %ebx,(%ecx) - addl $4,%ecx - cmpl %ecx,%eax - jne 1b - sfence - popl %ebx - ret - -ENTRY(i686_pagezero) - pushl %edi - pushl %ebx - - movl 12(%esp), %edi - movl $1024, %ecx - cld - - ALIGN_TEXT -1: - xorl %eax, %eax - repe - scasl - jnz 2f - - popl %ebx - popl %edi - ret - - ALIGN_TEXT - -2: - incl %ecx - subl $4, %edi - - movl %ecx, %edx - cmpl $16, %ecx - - jge 3f - - movl %edi, %ebx - andl $0x3f, %ebx - shrl %ebx - shrl %ebx - movl $16, %ecx - subl %ebx, %ecx - -3: - subl %ecx, %edx - rep - stosl - - movl %edx, %ecx - testl %edx, %edx - jnz 1b - - popl %ebx - popl %edi - ret - -/* fillw(pat, base, cnt) */ -ENTRY(fillw) - pushl %edi - movl 8(%esp),%eax - movl 12(%esp),%edi - movl 16(%esp),%ecx - cld - rep - stosw - popl %edi - ret - -ENTRY(bcopyb) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax /* overlapping && src < dst? */ - jb 1f - cld /* nope, copy forwards */ - rep - movsb - popl %edi - popl %esi - ret - - ALIGN_TEXT -1: - addl %ecx,%edi /* copy backwards. */ - addl %ecx,%esi - decl %edi - decl %esi - std - rep - movsb - popl %edi - popl %esi - cld - ret - -ENTRY(bcopy) - MEXITCOUNT - jmp *bcopy_vector - -/* - * generic_bcopy(src, dst, cnt) - * ws@xxxxxxxx (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 - */ -ENTRY(generic_bcopy) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax /* overlapping && src < dst? */ - jb 1f - - shrl $2,%ecx /* copy by 32-bit words */ - cld /* nope, copy forwards */ - rep - movsl - movl 20(%esp),%ecx - andl $3,%ecx /* any bytes left? */ - rep - movsb - popl %edi - popl %esi - ret - - ALIGN_TEXT -1: - addl %ecx,%edi /* copy backwards */ - addl %ecx,%esi - decl %edi - decl %esi - andl $3,%ecx /* any fractional bytes? */ - std - rep - movsb - movl 20(%esp),%ecx /* copy remainder by 32-bit words */ - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - popl %edi - popl %esi - cld - ret - -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_bcopy) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax /* overlapping && src < dst? */ - jb 1f - - cmpl $1024,%ecx - jb small_i586_bcopy - - sarb $1,kernel_fpu_lock - jc small_i586_bcopy - cmpl $0,PCPU(FPCURTHREAD) - je i586_bc1 - - /* XXX turn off handling of cases 1-2, as above. */ - movb $0xfe,kernel_fpu_lock - jmp small_i586_bcopy - - smsw %dx - clts - subl $108,%esp - fnsave 0(%esp) - jmp 4f - -i586_bc1: - smsw %dx - clts - fninit /* XXX should avoid needing this */ - - ALIGN_TEXT -4: - pushl %ecx -#define DCACHE_SIZE 8192 - cmpl $(DCACHE_SIZE-512)/2,%ecx - jbe 2f - movl $(DCACHE_SIZE-512)/2,%ecx -2: - subl %ecx,0(%esp) - cmpl $256,%ecx - jb 5f /* XXX should prefetch if %ecx >= 32 */ - pushl %esi - pushl %ecx - ALIGN_TEXT -3: - movl 0(%esi),%eax - movl 32(%esi),%eax - movl 64(%esi),%eax - movl 96(%esi),%eax - movl 128(%esi),%eax - movl 160(%esi),%eax - movl 192(%esi),%eax - movl 224(%esi),%eax - addl $256,%esi - subl $256,%ecx - cmpl $256,%ecx - jae 3b - popl %ecx - popl %esi -5: - ALIGN_TEXT -large_i586_bcopy_loop: - fildq 0(%esi) - fildq 8(%esi) - fildq 16(%esi) - fildq 24(%esi) - fildq 32(%esi) - fildq 40(%esi) - fildq 48(%esi) - fildq 56(%esi) - fistpq 56(%edi) - fistpq 48(%edi) - fistpq 40(%edi) - fistpq 32(%edi) - fistpq 24(%edi) - fistpq 16(%edi) - fistpq 8(%edi) - fistpq 0(%edi) - addl $64,%esi - addl $64,%edi - subl $64,%ecx - cmpl $64,%ecx - jae large_i586_bcopy_loop - popl %eax - addl %eax,%ecx - cmpl $64,%ecx - jae 4b - - cmpl $0,PCPU(FPCURTHREAD) - je i586_bc2 - - /* XXX check that the condition for cases 1-2 stayed false. */ -i586_bcopy_oops: - int $3 - jmp i586_bcopy_oops - - frstor 0(%esp) - addl $108,%esp -i586_bc2: - lmsw %dx - movb $0xfe,kernel_fpu_lock - -/* - * This is a duplicate of the main part of generic_bcopy. See the comments - * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and - * would mess up high resolution profiling. - */ - ALIGN_TEXT -small_i586_bcopy: - shrl $2,%ecx - cld - rep - movsl - movl 20(%esp),%ecx - andl $3,%ecx - rep - movsb - popl %edi - popl %esi - ret - - ALIGN_TEXT -1: - addl %ecx,%edi - addl %ecx,%esi - decl %edi - decl %esi - andl $3,%ecx - std - rep - movsb - movl 20(%esp),%ecx - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - popl %edi - popl %esi - cld - ret -#endif /* I586_CPU && defined(DEV_NPX) */ - -/* - * Note: memcpy does not support overlapping copies - */ -ENTRY(memcpy) - pushl %edi - pushl %esi - movl 12(%esp),%edi - movl 16(%esp),%esi - movl 20(%esp),%ecx - movl %edi,%eax - shrl $2,%ecx /* copy by 32-bit words */ - cld /* nope, copy forwards */ - rep - movsl - movl 20(%esp),%ecx - andl $3,%ecx /* any bytes left? */ - rep - movsb - popl %esi - popl %edi - ret - - -/*****************************************************************************/ -/* copyout and fubyte family */ -/*****************************************************************************/ -/* - * Access user memory from inside the kernel. These routines and possibly - * the math- and DOS emulators should be the only places that do this. - * - * We have to access the memory with user's permissions, so use a segment - * selector with RPL 3. For writes to user space we have to additionally - * check the PTE for write permission, because the 386 does not check - * write permissions when we are executing with EPL 0. The 486 does check - * this if the WP bit is set in CR0, so we can use a simpler version here. - * - * These routines set curpcb->onfault for the time they execute. When a - * protection violation occurs inside the functions, the trap handler - * returns to *curpcb->onfault instead of the function. - */ - -/* - * copyout(from_kernel, to_user, len) - MP SAFE (if not I386_CPU) - */ -ENTRY(copyout) - MEXITCOUNT - jmp *copyout_vector - -ENTRY(generic_copyout) - movl PCPU(CURPCB),%eax - movl $copyout_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - pushl %ebx - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%ebx - testl %ebx,%ebx /* anything to do? */ - jz done_copyout - - /* - * Check explicitly for non-user addresses. If 486 write protection - * is being used, this check is essential because we are in kernel - * mode so the h/w does not provide any protection against writing - * kernel addresses. - */ - - /* - * First, prevent address wrapping. - */ - movl %edi,%eax - addl %ebx,%eax - jc copyout_fault -/* - * XXX STOP USING VM_MAXUSER_ADDRESS. - * It is an end address, not a max, so every time it is used correctly it - * looks like there is an off by one error, and of course it caused an off - * by one error in several places. - */ - cmpl $VM_MAXUSER_ADDRESS,%eax - ja copyout_fault - - /* bcopy(%esi, %edi, %ebx) */ - movl %ebx,%ecx - -#if defined(I586_CPU) && defined(DEV_NPX) - ALIGN_TEXT -slow_copyout: -#endif - shrl $2,%ecx - cld - rep - movsl - movb %bl,%cl - andb $3,%cl - rep - movsb - -done_copyout: - popl %ebx - popl %edi - popl %esi - xorl %eax,%eax - movl PCPU(CURPCB),%edx - movl %eax,PCB_ONFAULT(%edx) - ret - - ALIGN_TEXT -copyout_fault: - popl %ebx - popl %edi - popl %esi - movl PCPU(CURPCB),%edx - movl $0,PCB_ONFAULT(%edx) - movl $EFAULT,%eax - ret - -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_copyout) - /* - * Duplicated from generic_copyout. Could be done a bit better. - */ - movl PCPU(CURPCB),%eax - movl $copyout_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - pushl %ebx - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%ebx - testl %ebx,%ebx /* anything to do? */ - jz done_copyout - - /* - * Check explicitly for non-user addresses. If 486 write protection - * is being used, this check is essential because we are in kernel - * mode so the h/w does not provide any protection against writing - * kernel addresses. - */ - - /* - * First, prevent address wrapping. - */ - movl %edi,%eax - addl %ebx,%eax - jc copyout_fault -/* - * XXX STOP USING VM_MAXUSER_ADDRESS. - * It is an end address, not a max, so every time it is used correctly it - * looks like there is an off by one error, and of course it caused an off - * by one error in several places. - */ - cmpl $VM_MAXUSER_ADDRESS,%eax - ja copyout_fault - - /* bcopy(%esi, %edi, %ebx) */ -3: - movl %ebx,%ecx - /* - * End of duplicated code. - */ - - cmpl $1024,%ecx - jb slow_copyout - - pushl %ecx - call fastmove - addl $4,%esp - jmp done_copyout -#endif /* I586_CPU && defined(DEV_NPX) */ - -/* - * copyin(from_user, to_kernel, len) - MP SAFE - */ -ENTRY(copyin) - MEXITCOUNT - jmp *copyin_vector - -ENTRY(generic_copyin) - movl PCPU(CURPCB),%eax - movl $copyin_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - movl 12(%esp),%esi /* caddr_t from */ - movl 16(%esp),%edi /* caddr_t to */ - movl 20(%esp),%ecx /* size_t len */ - - /* - * make sure address is valid - */ - movl %esi,%edx - addl %ecx,%edx - jc copyin_fault - cmpl $VM_MAXUSER_ADDRESS,%edx - ja copyin_fault - -#if defined(I586_CPU) && defined(DEV_NPX) - ALIGN_TEXT -slow_copyin: -#endif - movb %cl,%al - shrl $2,%ecx /* copy longword-wise */ - cld - rep - movsl - movb %al,%cl - andb $3,%cl /* copy remaining bytes */ - rep - movsb - -#if defined(I586_CPU) && defined(DEV_NPX) - ALIGN_TEXT -done_copyin: -#endif - popl %edi - popl %esi - xorl %eax,%eax - movl PCPU(CURPCB),%edx - movl %eax,PCB_ONFAULT(%edx) - ret - - ALIGN_TEXT -copyin_fault: - popl %edi - popl %esi - movl PCPU(CURPCB),%edx - movl $0,PCB_ONFAULT(%edx) - movl $EFAULT,%eax - ret - -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_copyin) - /* - * Duplicated from generic_copyin. Could be done a bit better. - */ - movl PCPU(CURPCB),%eax - movl $copyin_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - movl 12(%esp),%esi /* caddr_t from */ - movl 16(%esp),%edi /* caddr_t to */ - movl 20(%esp),%ecx /* size_t len */ - - /* - * make sure address is valid - */ - movl %esi,%edx - addl %ecx,%edx - jc copyin_fault - cmpl $VM_MAXUSER_ADDRESS,%edx - ja copyin_fault - /* - * End of duplicated code. - */ - - cmpl $1024,%ecx - jb slow_copyin - - pushl %ebx /* XXX prepare for fastmove_fault */ - pushl %ecx - call fastmove - addl $8,%esp - jmp done_copyin -#endif /* I586_CPU && defined(DEV_NPX) */ - -#if defined(I586_CPU) && defined(DEV_NPX) -/* fastmove(src, dst, len) - src in %esi - dst in %edi - len in %ecx XXX changed to on stack for profiling - uses %eax and %edx for tmp. storage - */ -/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ -ENTRY(fastmove) - pushl %ebp - movl %esp,%ebp - subl $PCB_SAVEFPU_SIZE+3*4,%esp - - movl 8(%ebp),%ecx - cmpl $63,%ecx - jbe fastmove_tail - - testl $7,%esi /* check if src addr is multiple of 8 */ - jnz fastmove_tail - - testl $7,%edi /* check if dst addr is multiple of 8 */ - jnz fastmove_tail - - /* XXX grab FPU context atomically. */ - call ni_cli - -/* if (fpcurthread != NULL) { */ - cmpl $0,PCPU(FPCURTHREAD) - je 6f -/* fnsave(&curpcb->pcb_savefpu); */ - movl PCPU(CURPCB),%eax - fnsave PCB_SAVEFPU(%eax) -/* FPCURTHREAD = NULL; */ - movl $0,PCPU(FPCURTHREAD) -/* } */ -6: -/* now we own the FPU. */ - -/* - * The process' FP state is saved in the pcb, but if we get - * switched, the cpu_switch() will store our FP state in the - * pcb. It should be possible to avoid all the copying for - * this, e.g., by setting a flag to tell cpu_switch() to - * save the state somewhere else. - */ -/* tmp = curpcb->pcb_savefpu; */ - movl %ecx,-12(%ebp) - movl %esi,-8(%ebp) - movl %edi,-4(%ebp) - movl %esp,%edi - movl PCPU(CURPCB),%esi - addl $PCB_SAVEFPU,%esi - cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx - rep - movsl - movl -12(%ebp),%ecx - movl -8(%ebp),%esi - movl -4(%ebp),%edi -/* stop_emulating(); */ - clts -/* fpcurthread = curthread; */ - movl PCPU(CURTHREAD),%eax - movl %eax,PCPU(FPCURTHREAD) - movl PCPU(CURPCB),%eax - - /* XXX end of atomic FPU context grab. */ - call ni_sti - - movl $fastmove_fault,PCB_ONFAULT(%eax) -4: - movl %ecx,-12(%ebp) - cmpl $1792,%ecx - jbe 2f - movl $1792,%ecx -2: - subl %ecx,-12(%ebp) - cmpl $256,%ecx - jb 5f - movl %ecx,-8(%ebp) - movl %esi,-4(%ebp) - ALIGN_TEXT -3: - movl 0(%esi),%eax - movl 32(%esi),%eax - movl 64(%esi),%eax - movl 96(%esi),%eax - movl 128(%esi),%eax - movl 160(%esi),%eax - movl 192(%esi),%eax - movl 224(%esi),%eax - addl $256,%esi - subl $256,%ecx - cmpl $256,%ecx - jae 3b - movl -8(%ebp),%ecx - movl -4(%ebp),%esi -5: - ALIGN_TEXT -fastmove_loop: - fildq 0(%esi) - fildq 8(%esi) - fildq 16(%esi) - fildq 24(%esi) - fildq 32(%esi) - fildq 40(%esi) - fildq 48(%esi) - fildq 56(%esi) - fistpq 56(%edi) - fistpq 48(%edi) - fistpq 40(%edi) - fistpq 32(%edi) - fistpq 24(%edi) - fistpq 16(%edi) - fistpq 8(%edi) - fistpq 0(%edi) - addl $-64,%ecx - addl $64,%esi - addl $64,%edi - cmpl $63,%ecx - ja fastmove_loop - movl -12(%ebp),%eax - addl %eax,%ecx - cmpl $64,%ecx - jae 4b - - /* XXX ungrab FPU context atomically. */ - call ni_cli - -/* curpcb->pcb_savefpu = tmp; */ - movl %ecx,-12(%ebp) - movl %esi,-8(%ebp) - movl %edi,-4(%ebp) - movl PCPU(CURPCB),%edi - addl $PCB_SAVEFPU,%edi - movl %esp,%esi - cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx - rep - movsl - movl -12(%ebp),%ecx - movl -8(%ebp),%esi - movl -4(%ebp),%edi - -/* start_emulating(); */ - smsw %ax - orb $CR0_TS,%al - lmsw %ax -/* fpcurthread = NULL; */ - movl $0,PCPU(FPCURTHREAD) - - /* XXX end of atomic FPU context ungrab. */ - call ni_sti - - ALIGN_TEXT -fastmove_tail: - movl PCPU(CURPCB),%eax - movl $fastmove_tail_fault,PCB_ONFAULT(%eax) - - movb %cl,%al - shrl $2,%ecx /* copy longword-wise */ - cld - rep - movsl - movb %al,%cl - andb $3,%cl /* copy remaining bytes */ - rep - movsb - - movl %ebp,%esp - popl %ebp - ret - - ALIGN_TEXT -fastmove_fault: - /* XXX ungrab FPU context atomically. */ - call ni_cli - - movl PCPU(CURPCB),%edi - addl $PCB_SAVEFPU,%edi - movl %esp,%esi - cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx - rep - movsl - - smsw %ax - orb $CR0_TS,%al - lmsw %ax - movl $0,PCPU(FPCURTHREAD) - - /* XXX end of atomic FPU context ungrab. */ - call ni_sti - -fastmove_tail_fault: - movl %ebp,%esp - popl %ebp - addl $8,%esp - popl %ebx - popl %edi - popl %esi - movl PCPU(CURPCB),%edx - movl $0,PCB_ONFAULT(%edx) - movl $EFAULT,%eax - ret -#endif /* I586_CPU && defined(DEV_NPX) */ - -/* - * casuptr. Compare and set user pointer. Returns -1 or the current value. - */ -ENTRY(casuptr) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx /* dst */ - movl 8(%esp),%eax /* old */ - movl 12(%esp),%ecx /* new */ - - cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ - ja fusufault - -#ifdef SMP - lock -#endif - cmpxchgl %ecx, (%edx) /* Compare and set. */ - - /* - * The old value is in %eax. If the store succeeded it will be the - * value we expected (old) from before the store, otherwise it will - * be the current value. - */ - - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * fu{byte,sword,word} - MP SAFE - * - * Fetch a byte (sword, word) from user memory - */ -ENTRY(fuword) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx /* from */ - - cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ - ja fusufault - - movl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -ENTRY(fuword32) - jmp fuword - -/* - * These two routines are called from the profiling code, potentially - * at interrupt time. If they fail, that's okay, good things will - * happen later. Fail all the time for now - until the trap code is - * able to deal with this. - */ -ALTENTRY(suswintr) -ENTRY(fuswintr) - movl $-1,%eax - ret - -/* - * fuword16 - MP SAFE - */ -ENTRY(fuword16) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - - cmpl $VM_MAXUSER_ADDRESS-2,%edx - ja fusufault - - movzwl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * fubyte - MP SAFE - */ -ENTRY(fubyte) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - - cmpl $VM_MAXUSER_ADDRESS-1,%edx - ja fusufault - - movzbl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - - ALIGN_TEXT -fusufault: - movl PCPU(CURPCB),%ecx - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - decl %eax - ret - -/* - * su{byte,sword,word} - MP SAFE (if not I386_CPU) - * - * Write a byte (word, longword) to user memory - */ -ENTRY(suword) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - - cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ - ja fusufault - - movl 8(%esp),%eax - movl %eax,(%edx) - xorl %eax,%eax - movl PCPU(CURPCB),%ecx - movl %eax,PCB_ONFAULT(%ecx) - ret - -ENTRY(suword32) - jmp suword - -/* - * suword16 - MP SAFE (if not I386_CPU) - */ -ENTRY(suword16) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - - cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ - ja fusufault - - movw 8(%esp),%ax - movw %ax,(%edx) - xorl %eax,%eax - movl PCPU(CURPCB),%ecx /* restore trashed register */ - movl %eax,PCB_ONFAULT(%ecx) - ret - -/* - * subyte - MP SAFE (if not I386_CPU) - */ -ENTRY(subyte) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - - cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ - ja fusufault - - movb 8(%esp),%al - movb %al,(%edx) - xorl %eax,%eax - movl PCPU(CURPCB),%ecx /* restore trashed register */ - movl %eax,PCB_ONFAULT(%ecx) - ret - -/* - * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE - * - * copy a string from from to to, stop when a 0 character is reached. - * return ENAMETOOLONG if string is longer than maxlen, and - * EFAULT on protection violations. If lencopied is non-zero, - * return the actual length in *lencopied. - */ -ENTRY(copyinstr) - pushl %esi - pushl %edi - movl PCPU(CURPCB),%ecx - movl $cpystrflt,PCB_ONFAULT(%ecx) - - movl 12(%esp),%esi /* %esi = from */ - movl 16(%esp),%edi /* %edi = to */ - movl 20(%esp),%edx /* %edx = maxlen */ - - movl $VM_MAXUSER_ADDRESS,%eax - - /* make sure 'from' is within bounds */ - subl %esi,%eax - jbe cpystrflt - - /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ - cmpl %edx,%eax - jae 1f - movl %eax,%edx - movl %eax,20(%esp) -1: - incl %edx - cld - -2: - decl %edx - jz 3f - - lodsb - stosb - orb %al,%al - jnz 2b - - /* Success -- 0 byte reached */ - decl %edx - xorl %eax,%eax - jmp cpystrflt_x -3: - /* edx is zero - return ENAMETOOLONG or EFAULT */ - cmpl $VM_MAXUSER_ADDRESS,%esi - jae cpystrflt -4: - movl $ENAMETOOLONG,%eax - jmp cpystrflt_x - -cpystrflt: - movl $EFAULT,%eax - -cpystrflt_x: - /* set *lencopied and return %eax */ - movl PCPU(CURPCB),%ecx - movl $0,PCB_ONFAULT(%ecx) - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx - jz 1f - movl %ecx,(%edx) -1: - popl %edi - popl %esi - ret - - -/* - * copystr(from, to, maxlen, int *lencopied) - MP SAFE - */ -ENTRY(copystr) - pushl %esi - pushl %edi - - movl 12(%esp),%esi /* %esi = from */ - movl 16(%esp),%edi /* %edi = to */ - movl 20(%esp),%edx /* %edx = maxlen */ - incl %edx - cld -1: - decl %edx - jz 4f - lodsb - stosb - orb %al,%al - jnz 1b - - /* Success -- 0 byte reached */ - decl %edx - xorl %eax,%eax - jmp 6f -4: - /* edx is zero -- return ENAMETOOLONG */ - movl $ENAMETOOLONG,%eax - -6: - /* set *lencopied and return %eax */ - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx - jz 7f - movl %ecx,(%edx) -7: - popl %edi - popl %esi - ret - -ENTRY(bcmp) - pushl %edi - pushl %esi - movl 12(%esp),%edi - movl 16(%esp),%esi - movl 20(%esp),%edx - xorl %eax,%eax - - movl %edx,%ecx - shrl $2,%ecx - cld /* compare forwards */ - repe - cmpsl - jne 1f - - movl %edx,%ecx - andl $3,%ecx - repe - cmpsb - je 2f -1: - incl %eax -2: - popl %esi - popl %edi - ret - - -/* - * Handling of special 386 registers and descriptor tables etc - */ -/* void lgdt(struct region_descriptor *rdp); */ -ENTRY(lgdt_finish) -#if 0 - /* reload the descriptor table */ - movl 4(%esp),%eax - lgdt (%eax) -#endif - /* flush the prefetch q */ - jmp 1f - nop -1: - /* reload "stale" selectors */ - movl $KDSEL,%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%gs - movl %eax,%ss - movl $KPSEL,%eax - movl %eax,%fs - - /* reload code selector by turning return into intersegmental return */ - movl (%esp),%eax - pushl %eax - movl $KCSEL,4(%esp) - lret - -/* ssdtosd(*ssdp,*sdp) */ -ENTRY(ssdtosd) - pushl %ebx - movl 8(%esp),%ecx - movl 8(%ecx),%ebx - shll $16,%ebx - movl (%ecx),%edx - roll $16,%edx - movb %dh,%bl - movb %dl,%bh - rorl $8,%ebx - movl 4(%ecx),%eax - movw %ax,%dx - andl $0xf0000,%eax - orl %eax,%ebx - movl 12(%esp),%ecx - movl %edx,(%ecx) - movl %ebx,4(%ecx) - popl %ebx - ret - -/* void reset_dbregs() */ -ENTRY(reset_dbregs) - movl $0,%eax - movl %eax,%dr7 /* disable all breapoints first */ - movl %eax,%dr0 - movl %eax,%dr1 - movl %eax,%dr2 - movl %eax,%dr3 - movl %eax,%dr6 - ret - -/*****************************************************************************/ -/* setjump, longjump */ -/*****************************************************************************/ - -ENTRY(setjmp) - movl 4(%esp),%eax - movl %ebx,(%eax) /* save ebx */ - movl %esp,4(%eax) /* save esp */ - movl %ebp,8(%eax) /* save ebp */ - movl %esi,12(%eax) /* save esi */ - movl %edi,16(%eax) /* save edi */ - movl (%esp),%edx /* get rta */ - movl %edx,20(%eax) /* save eip */ - xorl %eax,%eax /* return(0); */ - ret - -ENTRY(longjmp) - movl 4(%esp),%eax - movl (%eax),%ebx /* restore ebx */ - movl 4(%eax),%esp /* restore esp */ - movl 8(%eax),%ebp /* restore ebp */ - movl 12(%eax),%esi /* restore esi */ - movl 16(%eax),%edi /* restore edi */ - movl 20(%eax),%edx /* get rta */ - movl %edx,(%esp) /* put in return frame */ - xorl %eax,%eax /* return(1); */ - incl %eax - ret - -/* - * Support for BB-profiling (gcc -a). The kernbb program will extract - * the data from the kernel. - */ - - .data - ALIGN_DATA - .globl bbhead -bbhead: - .long 0 - - .text -NON_GPROF_ENTRY(__bb_init_func) - movl 4(%esp),%eax - movl $1,(%eax) - movl bbhead,%edx - movl %edx,16(%eax) - movl %eax,bbhead - NON_GPROF_RET diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/swtch.s Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,445 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/i386/swtch.s,v 1.143 2003/09/30 08:11:35 jeff Exp $ - */ - -#include "opt_npx.h" - -#include <machine/asmacros.h> - -#include "assym.s" - - -/*****************************************************************************/ -/* Scheduling */ -/*****************************************************************************/ - - .text - -/* - * cpu_throw() - * - * This is the second half of cpu_swtch(). It is used when the current - * thread is either a dummy or slated to die, and we no longer care - * about its state. This is only a slight optimization and is probably - * not worth it anymore. Note that we need to clear the pm_active bits so - * we do need the old proc if it still exists. - * 0(%esp) = ret - * 4(%esp) = oldtd - * 8(%esp) = newtd - */ -ENTRY(cpu_throw) - movl PCPU(CPUID), %esi - movl 4(%esp),%ecx /* Old thread */ - testl %ecx,%ecx /* no thread? */ - jz 1f - /* release bit from old pm_active */ - movl PCPU(CURPMAP), %ebx -#ifdef SMP - lock -#endif - btrl %esi, PM_ACTIVE(%ebx) /* clear old */ -1: - movl 8(%esp),%ecx /* New thread */ - movl TD_PCB(%ecx),%edx - movl PCB_CR3(%edx),%eax - - movl %eax,PCPU(CR3) /* new address space */ - - pushl %ecx - pushl %edx - pushl %esi - pushl %eax - call load_cr3 - addl $4,%esp - popl %esi - popl %edx - popl %ecx - - /* set bit in new pm_active */ - movl TD_PROC(%ecx),%eax - movl P_VMSPACE(%eax), %ebx - addl $VM_PMAP, %ebx - movl %ebx, PCPU(CURPMAP) -#ifdef SMP - lock -#endif - btsl %esi, PM_ACTIVE(%ebx) /* set new */ - jmp sw1 - -/* - * cpu_switch(old, new) - * - * Save the current thread state, then select the next thread to run - * and load its state. - * 0(%esp) = ret - * 4(%esp) = oldtd - * 8(%esp) = newtd - */ -ENTRY(cpu_switch) - - /* Switch to new thread. First, save context. */ - movl 4(%esp),%ecx - -#ifdef INVARIANTS - testl %ecx,%ecx /* no thread? */ - jz badsw2 /* no, panic */ -#endif - - movl TD_PCB(%ecx),%edx - - movl (%esp),%eax /* Hardware registers */ - movl %eax,PCB_EIP(%edx) - movl %ebx,PCB_EBX(%edx) - movl %esp,PCB_ESP(%edx) - movl %ebp,PCB_EBP(%edx) - movl %esi,PCB_ESI(%edx) - movl %edi,PCB_EDI(%edx) - movl %gs,PCB_GS(%edx) -#if 0 - pushfl /* PSL */ - popl PCB_PSL(%edx) -#endif - /* Check to see if we need to call a switchout function. */ - movl PCB_SWITCHOUT(%edx),%eax - cmpl $0, %eax - je 1f - call *%eax -1: - /* Test if debug registers should be saved. */ - testl $PCB_DBREGS,PCB_FLAGS(%edx) - jz 1f /* no, skip over */ - movl %dr7,%eax /* yes, do the save */ - movl %eax,PCB_DR7(%edx) - andl $0x0000fc00, %eax /* disable all watchpoints */ - movl %eax,%dr7 - movl %dr6,%eax - movl %eax,PCB_DR6(%edx) - movl %dr3,%eax - movl %eax,PCB_DR3(%edx) - movl %dr2,%eax - movl %eax,PCB_DR2(%edx) - movl %dr1,%eax - movl %eax,PCB_DR1(%edx) - movl %dr0,%eax - movl %eax,PCB_DR0(%edx) -1: - -#ifdef DEV_NPX - /* have we used fp, and need a save? */ - cmpl %ecx,PCPU(FPCURTHREAD) - jne 1f - addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */ - pushl %edx - call npxsave /* do it in a big C function */ - popl %eax -1: -#endif - - - /* Save is done. Now fire up new thread. Leave old vmspace. */ - movl %ecx,%edi - movl 8(%esp),%ecx /* New thread */ -#ifdef INVARIANTS - testl %ecx,%ecx /* no thread? */ - jz badsw3 /* no, panic */ -#endif - movl TD_PCB(%ecx),%edx - movl PCPU(CPUID), %esi - - /* switch address space */ - movl PCB_CR3(%edx),%eax - - cmpl %eax,IdlePTD /* Kernel address space? */ - - je sw1 - /* XXX optimize later KMM */ -#if 0 - movl %cr3,%ebx /* The same address space? */ -#else - movl PCPU(CR3),%ebx -#endif - cmpl %ebx,%eax - je sw1 - - movl %eax,PCPU(CR3) /* new address space */ - - pushl %edx - pushl %ecx - pushl %esi - pushl %eax - call load_cr3 /* inform xen of the switch */ - addl $4,%esp - popl %esi - popl %ecx - popl %edx - - /* Release bit from old pmap->pm_active */ - movl PCPU(CURPMAP), %ebx - -#ifdef SMP - lock -#endif - btrl %esi, PM_ACTIVE(%ebx) /* clear old */ - /* Set bit in new pmap->pm_active */ - movl TD_PROC(%ecx),%eax /* newproc */ - movl P_VMSPACE(%eax), %ebx - addl $VM_PMAP, %ebx - movl %ebx, PCPU(CURPMAP) -#ifdef SMP - lock -#endif - btsl %esi, PM_ACTIVE(%ebx) /* set new */ -sw1: - -#if 0 - - /* only one task selector under Xen */ - /* - * At this point, we've switched address spaces and are ready - * to load up the rest of the next context. - */ - cmpl $0, PCB_EXT(%edx) /* has pcb extension? */ - je 1f /* If not, use the default */ - btsl %esi, private_tss /* mark use of private tss */ - movl PCB_EXT(%edx), %edi /* new tss descriptor */ - jmp 2f /* Load it up */ - -1: /* - * Use the common default TSS instead of our own. - * Set our stack pointer into the TSS, it's set to just - * below the PCB. In C, common_tss.tss_esp0 = &pcb - 16; - */ - leal -16(%edx), %ebx /* leave space for vm86 */ - movl %ebx, PCPU(COMMON_TSS) + TSS_ESP0 - - /* - * Test this CPU's bit in the bitmap to see if this - * CPU was using a private TSS. - */ - btrl %esi, private_tss /* Already using the common? */ - jae 3f /* if so, skip reloading */ - PCPU_ADDR(COMMON_TSSD, %edi) -2: - /* Move correct tss descriptor into GDT slot, then reload tr. */ - movl PCPU(TSS_GDT), %ebx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ebx) - movl 4(%edi), %eax - movl %eax, 4(%ebx) - - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -#endif /* !XEN */ -3: - /* notify Xen of task switch */ - pushl %edx /* &pcb is the new stack base */ - pushl $KDSEL - pushl $HYPERVISOR_STACK_SWITCH - call ni_queue_multicall2 - addl $12,%esp - /* XXX handle DOM0 IOPL case here (KMM) */ - /* we currently don't support running FreeBSD */ - /* in DOM0 so we can skip for now */ - - call ni_execute_multicall_list - - /* Restore context. */ - movl PCB_EBX(%edx),%ebx - movl PCB_ESP(%edx),%esp - movl PCB_EBP(%edx),%ebp - movl PCB_ESI(%edx),%esi - movl PCB_EDI(%edx),%edi - movl PCB_EIP(%edx),%eax - movl %eax,(%esp) -#if 0 - pushl PCB_PSL(%edx) - popfl -#endif - movl %edx, PCPU(CURPCB) - movl %ecx, PCPU(CURTHREAD) /* into next thread */ - - /* - * Determine the LDT to use and load it if is the default one and - * that is not the current one. - */ - movl TD_PROC(%ecx),%eax - cmpl $0,P_MD+MD_LDT(%eax) - jnz 1f - movl _default_ldt,%eax - cmpl PCPU(CURRENTLDT),%eax - je 2f - pushl %edx - pushl %eax - xorl %eax,%eax - movl %eax,%gs - call i386_reset_ldt - popl %eax - popl %edx - - movl %eax,PCPU(CURRENTLDT) - jmp 2f -1: - /* Load the LDT when it is not the default one. */ - pushl %edx /* Preserve pointer to pcb. */ - addl $P_MD,%eax /* Pointer to mdproc is arg. */ - pushl %eax - call set_user_ldt - addl $4,%esp - popl %edx -2: - /* This must be done after loading the user LDT. */ - .globl cpu_switch_load_gs -cpu_switch_load_gs: - movl PCB_GS(%edx),%gs - - /* XXX evidently setting debug registers needs to be - * routed through Xen - this appears to work - so I - * am leaving it as it is for now - (KMM) - */ - - /* Test if debug registers should be restored. */ - testl $PCB_DBREGS,PCB_FLAGS(%edx) - jz 1f - - /* - * Restore debug registers. The special code for dr7 is to - * preserve the current values of its reserved bits. - */ - movl PCB_DR6(%edx),%eax - movl %eax,%dr6 - movl PCB_DR3(%edx),%eax - movl %eax,%dr3 - movl PCB_DR2(%edx),%eax - movl %eax,%dr2 - movl PCB_DR1(%edx),%eax - movl %eax,%dr1 - movl PCB_DR0(%edx),%eax - movl %eax,%dr0 - movl %dr7,%eax - andl $0x0000fc00,%eax - movl PCB_DR7(%edx),%ecx - andl $~0x0000fc00,%ecx - orl %ecx,%eax - movl %eax,%dr7 -1: - ret - -#ifdef INVARIANTS -badsw1: - pushal - pushl $sw0_1 - call panic -sw0_1: .asciz "cpu_throw: no newthread supplied" - -badsw2: - pushal - pushl $sw0_2 - call panic -sw0_2: .asciz "cpu_switch: no curthread supplied" - -badsw3: - pushal - pushl $sw0_3 - call panic -sw0_3: .asciz "cpu_switch: no newthread supplied" -#endif - -/* - * savectx(pcb) - * Update pcb, saving current processor state. - */ -ENTRY(savectx) - /* Fetch PCB. */ - movl 4(%esp),%ecx - - /* Save caller's return address. Child won't execute this routine. */ - movl (%esp),%eax - movl %eax,PCB_EIP(%ecx) - -#if 0 - movl %cr3,%eax -#else - movl PCPU(CR3),%eax -#endif - movl %eax,PCB_CR3(%ecx) - - movl %ebx,PCB_EBX(%ecx) - movl %esp,PCB_ESP(%ecx) - movl %ebp,PCB_EBP(%ecx) - movl %esi,PCB_ESI(%ecx) - movl %edi,PCB_EDI(%ecx) - movl %gs,PCB_GS(%ecx) -#if 0 - pushfl - popl PCB_PSL(%ecx) -#endif -#ifdef DEV_NPX - /* - * If fpcurthread == NULL, then the npx h/w state is irrelevant and the - * state had better already be in the pcb. This is true for forks - * but not for dumps (the old book-keeping with FP flags in the pcb - * always lost for dumps because the dump pcb has 0 flags). - * - * If fpcurthread != NULL, then we have to save the npx h/w state to - * fpcurthread's pcb and copy it to the requested pcb, or save to the - * requested pcb and reload. Copying is easier because we would - * have to handle h/w bugs for reloading. We used to lose the - * parent's npx state for forks by forgetting to reload. - */ - pushfl - call ni_cli - movl PCPU(FPCURTHREAD),%eax - testl %eax,%eax - je 1f - - pushl %ecx - movl TD_PCB(%eax),%eax - leal PCB_SAVEFPU(%eax),%eax - pushl %eax - pushl %eax - call npxsave - addl $4,%esp - popl %eax - popl %ecx - - pushl $PCB_SAVEFPU_SIZE - leal PCB_SAVEFPU(%ecx),%ecx - pushl %ecx - pushl %eax - call bcopy - addl $12,%esp -1: - popfl -#endif /* DEV_NPX */ - - ret diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/symbols.raw Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,75 +0,0 @@ -# @(#)symbols.raw 7.6 (Berkeley) 5/8/91 -# -# $FreeBSD: src/sys/i386/i386/symbols.raw,v 1.15 1999/08/28 00:43:51 peter Exp $ -# - - -#gdb - _IdlePTD - _PTD - _panicstr - _atdevbase -# _version -#dmesg - _msgbufp -# _msgbuf -#iostat - _tk_nin - _tk_nout - _cp_time -# _io_info -#ps - _nswap - _maxslp - _ccpu - _fscale - _avail_start - _avail_end -#pstat -# _cons - _nswap - _swapblist -# _swaplist -#vmstat - _cp_time -# _rate -# _total -# _sum -# _rectime -# _pgintime - _boottime -#w - _swapdev - _nswap - _averunnable - _boottime -#netstat - _mbstat - _ipstat - _tcb - _tcpstat - _udb - _udpstat -# _rawcb - _ifnet -# _rthost -# _rtnet - _icmpstat - _filehead - _nfiles -# _rthashsize -# _radix_node_head -#routed - _ifnet -#rwho - _boottime -#savecore - _dumpdev - _dumplo - _time_second - _version - _dumpsize - _panicstr - _dumpmag -#deprecated -# _avenrun diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/sys_machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,703 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/sys_machdep.c,v 1.91 2003/09/07 05:23:28 davidxu Exp $"); - -#include "opt_kstack_pages.h" -#include "opt_mac.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/lock.h> -#include <sys/mac.h> -#include <sys/malloc.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/smp.h> -#include <sys/sysproto.h> -#include <sys/user.h> - -#include <vm/vm.h> -#include <vm/pmap.h> -#include <vm/vm_map.h> -#include <vm/vm_extern.h> - -#include <machine/cpu.h> -#include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */ -#include <machine/proc.h> -#include <machine/sysarch.h> -#include <machine/xenfunc.h> - -#include <vm/vm_kern.h> /* for kernel_map */ - -#define MAX_LD 8192 -#define LD_PER_PAGE 512 -#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) -#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3) - -void i386_reset_ldt(struct proc_ldt *pldt); - -static int i386_get_ldt(struct thread *, char *); -static int i386_set_ldt(struct thread *, char *); -static int i386_set_ldt_data(struct thread *, int start, int num, - union descriptor *descs); -static int i386_ldt_grow(struct thread *td, int len); -static int i386_get_ioperm(struct thread *, char *); -static int i386_set_ioperm(struct thread *, char *); -#ifdef SMP -static void set_user_ldt_rv(struct thread *); -#endif - -#ifndef _SYS_SYSPROTO_H_ -struct sysarch_args { - int op; - char *parms; -}; -#endif - -int -sysarch(td, uap) - struct thread *td; - register struct sysarch_args *uap; -{ - int error; - - mtx_lock(&Giant); - switch(uap->op) { - case I386_GET_LDT: - error = i386_get_ldt(td, uap->parms); - break; - - case I386_SET_LDT: - error = i386_set_ldt(td, uap->parms); - break; - case I386_GET_IOPERM: - error = i386_get_ioperm(td, uap->parms); - break; - case I386_SET_IOPERM: - error = i386_set_ioperm(td, uap->parms); - break; -#if 0 - case I386_VM86: - error = vm86_sysarch(td, uap->parms); - break; -#endif - default: - error = EINVAL; - break; - } - mtx_unlock(&Giant); - return (error); -} - -int -i386_extend_pcb(struct thread *td) -{ - int i, offset; - u_long *addr; - struct pcb_ext *ext; - struct soft_segment_descriptor ssd = { - 0, /* segment base address (overwritten) */ - ctob(IOPAGES + 1) - 1, /* length */ - SDT_SYS386TSS, /* segment type */ - 0, /* priority level */ - 1, /* descriptor present */ - 0, 0, - 0, /* default 32 size */ - 0 /* granularity */ - }; - - if (td->td_proc->p_flag & P_SA) - return (EINVAL); /* XXXKSE */ -/* XXXKSE All the code below only works in 1:1 needs changing */ - ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1)); - if (ext == 0) - return (ENOMEM); - bzero(ext, sizeof(struct pcb_ext)); - /* -16 is so we can convert a trapframe into vm86trapframe inplace */ - ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) - - sizeof(struct pcb) - 16; - ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - /* - * The last byte of the i/o map must be followed by an 0xff byte. - * We arbitrarily allocate 16 bytes here, to keep the starting - * address on a doubleword boundary. - */ - offset = PAGE_SIZE - 16; - ext->ext_tss.tss_ioopt = - (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16; - ext->ext_iomap = (caddr_t)ext + offset; - ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32; - - addr = (u_long *)ext->ext_vm86.vm86_intmap; - for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++) - *addr++ = ~0; - - ssd.ssd_base = (unsigned)&ext->ext_tss; - ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext); - ssdtosd(&ssd, &ext->ext_tssd); - - KASSERT(td->td_proc == curthread->td_proc, ("giving TSS to !curproc")); - KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!")); - mtx_lock_spin(&sched_lock); - td->td_pcb->pcb_ext = ext; - - /* switch to the new TSS after syscall completes */ - td->td_flags |= TDF_NEEDRESCHED; - mtx_unlock_spin(&sched_lock); - - return 0; -} - -static int -i386_set_ioperm(td, args) - struct thread *td; - char *args; -{ - int i, error; - struct i386_ioperm_args ua; - char *iomap; - - if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0) - return (error); - -#ifdef MAC - if ((error = mac_check_sysarch_ioperm(td->td_ucred)) != 0) - return (error); -#endif - if ((error = suser(td)) != 0) - return (error); - if ((error = securelevel_gt(td->td_ucred, 0)) != 0) - return (error); - /* - * XXX - * While this is restricted to root, we should probably figure out - * whether any other driver is using this i/o address, as so not to - * cause confusion. This probably requires a global 'usage registry'. - */ - - if (td->td_pcb->pcb_ext == 0) - if ((error = i386_extend_pcb(td)) != 0) - return (error); - iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; - - if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY) - return (EINVAL); - - for (i = ua.start; i < ua.start + ua.length; i++) { - if (ua.enable) - iomap[i >> 3] &= ~(1 << (i & 7)); - else - iomap[i >> 3] |= (1 << (i & 7)); - } - return (error); -} - -static int -i386_get_ioperm(td, args) - struct thread *td; - char *args; -{ - int i, state, error; - struct i386_ioperm_args ua; - char *iomap; - - if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0) - return (error); - if (ua.start >= IOPAGES * PAGE_SIZE * NBBY) - return (EINVAL); - - if (td->td_pcb->pcb_ext == 0) { - ua.length = 0; - goto done; - } - - iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; - - i = ua.start; - state = (iomap[i >> 3] >> (i & 7)) & 1; - ua.enable = !state; - ua.length = 1; - - for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { - if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) - break; - ua.length++; - } - -done: - error = copyout(&ua, args, sizeof(struct i386_ioperm_args)); - return (error); -} - -/* - * Update the GDT entry pointing to the LDT to point to the LDT of the - * current process. - * - * This must be called with sched_lock held. Unfortunately, we can't use a - * mtx_assert() here because cpu_switch() calls this function after changing - * curproc but before sched_lock's owner is updated in mi_switch(). - */ -void -set_user_ldt(struct mdproc *mdp) -{ - struct proc_ldt *pldt; - pldt = mdp->md_ldt; - i386_reset_ldt(pldt); - PCPU_SET(currentldt, (int)pldt); - -} - -#ifdef SMP -static void -set_user_ldt_rv(struct thread *td) -{ - - if (td->td_proc != curthread->td_proc) - return; - - set_user_ldt(&td->td_proc->p_md); -} -#endif - -/* - * Must be called with either sched_lock free or held but not recursed. - * If it does not return NULL, it will return with it owned. - */ -struct proc_ldt * -user_ldt_alloc(struct mdproc *mdp, int len) -{ - struct proc_ldt *pldt,*new_ldt; - - - if (mtx_owned(&sched_lock)) - mtx_unlock_spin(&sched_lock); - mtx_assert(&sched_lock, MA_NOTOWNED); - MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt), - M_SUBPROC, M_WAITOK); - - new_ldt->ldt_len = len = NEW_MAX_LD(len); - new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, - round_page(len * sizeof(union descriptor))); - if (new_ldt->ldt_base == NULL) { - FREE(new_ldt, M_SUBPROC); - return NULL; - } - new_ldt->ldt_refcnt = 1; - new_ldt->ldt_active = 0; - - mtx_lock_spin(&sched_lock); - - if ((pldt = mdp->md_ldt)) { - if (len > pldt->ldt_len) - len = pldt->ldt_len; - bcopy(pldt->ldt_base, new_ldt->ldt_base, - len * sizeof(union descriptor)); - } else { - bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); - } - pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, - new_ldt->ldt_len*sizeof(union descriptor)); - return new_ldt; -} - -/* - * Must be called either with sched_lock free or held but not recursed. - * If md_ldt is not NULL, it will return with sched_lock released. - */ -void -user_ldt_free(struct thread *td) -{ - struct mdproc *mdp = &td->td_proc->p_md; - struct proc_ldt *pldt = mdp->md_ldt; - if (pldt == NULL) - return; - - if (!mtx_owned(&sched_lock)) - mtx_lock_spin(&sched_lock); - mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); - if (td == PCPU_GET(curthread)) { - PCPU_SET(currentldt, _default_ldt); - i386_reset_ldt((struct proc_ldt *)_default_ldt); - } - - mdp->md_ldt = NULL; - if (--pldt->ldt_refcnt == 0) { - mtx_unlock_spin(&sched_lock); - - pmap_map_readwrite(kernel_pmap,(vm_offset_t) pldt->ldt_base, - pldt->ldt_len*sizeof(union descriptor)); - kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base, - pldt->ldt_len * sizeof(union descriptor)); - FREE(pldt, M_SUBPROC); - } else - mtx_unlock_spin(&sched_lock); -} - -void -i386_reset_ldt(struct proc_ldt *pldt) -{ - xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); -} - -static int -i386_get_ldt(td, args) - struct thread *td; - char *args; -{ - int error = 0; - struct proc_ldt *pldt = td->td_proc->p_md.md_ldt; - int nldt, num; - union descriptor *lp; - struct i386_ldt_args ua, *uap = &ua; - - if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0) - return(error); - -#ifdef DEBUG - printf("i386_get_ldt: start=%d num=%d descs=%p\n", - uap->start, uap->num, (void *)uap->descs); -#endif - - /* verify range of LDTs exist */ - if ((uap->start < 0) || (uap->num <= 0)) - return(EINVAL); - - if (pldt) { - nldt = pldt->ldt_len; - num = min(uap->num, nldt); - lp = &((union descriptor *)(pldt->ldt_base))[uap->start]; - } else { - nldt = sizeof(ldt)/sizeof(ldt[0]); - num = min(uap->num, nldt); - lp = &ldt[uap->start]; - } - if (uap->start + num > nldt) - return(EINVAL); - - error = copyout(lp, uap->descs, num * sizeof(union descriptor)); - if (!error) - td->td_retval[0] = num; - - return(error); -} - -static int ldt_warnings; -#define NUM_LDT_WARNINGS 10 - -static int -i386_set_ldt(struct thread *td, char *args) -{ - int error = 0, i; - int largest_ld; - struct mdproc *mdp = &td->td_proc->p_md; - struct proc_ldt *pldt = 0; - struct i386_ldt_args ua, *uap = &ua; - union descriptor *descs, *dp; - int descs_size; - - if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0) - return(error); -#ifdef DEBUG - printf("i386_set_ldt: start=%d num=%d descs=%p\n", - uap->start, uap->num, (void *)uap->descs); - -#endif - - if (uap->descs == NULL) { - /* Free descriptors */ - if (uap->start == 0 && uap->num == 0) { - /* - * Treat this as a special case, so userland needn't - * know magic number NLDT. - */ - uap->start = NLDT; - uap->num = MAX_LD - NLDT; - } - if (uap->start <= LUDATA_SEL || uap->num <= 0) - return (EINVAL); - mtx_lock_spin(&sched_lock); - pldt = mdp->md_ldt; - if (pldt == NULL || uap->start >= pldt->ldt_len) { - mtx_unlock_spin(&sched_lock); - return (0); - } - largest_ld = uap->start + uap->num; - if (largest_ld > pldt->ldt_len) - largest_ld = pldt->ldt_len; - i = largest_ld - uap->start; - bzero(&((union descriptor *)(pldt->ldt_base))[uap->start], - sizeof(union descriptor) * i); - mtx_unlock_spin(&sched_lock); - return (0); - } - - if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { - /* complain a for a while if using old methods */ - if (ldt_warnings++ < NUM_LDT_WARNINGS) { - printf("Warning: pid %d used static ldt allocation.\n", - td->td_proc->p_pid); - printf("See the i386_set_ldt man page for more info\n"); - } - /* verify range of descriptors to modify */ - largest_ld = uap->start + uap->num; - if (uap->start >= MAX_LD || - uap->num < 0 || largest_ld > MAX_LD) { - return (EINVAL); - } - } - - descs_size = uap->num * sizeof(union descriptor); - descs = (union descriptor *)kmem_alloc(kernel_map, descs_size); - if (descs == NULL) - return (ENOMEM); - error = copyin(uap->descs, descs, descs_size); - if (error) { - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - return (error); - } - - /* Check descriptors for access violations */ - for (i = 0; i < uap->num; i++) { - dp = &descs[i]; - - switch (dp->sd.sd_type) { - case SDT_SYSNULL: /* system null */ - dp->sd.sd_p = 0; - break; - case SDT_SYS286TSS: /* system 286 TSS available */ - case SDT_SYSLDT: /* system local descriptor table */ - case SDT_SYS286BSY: /* system 286 TSS busy */ - case SDT_SYSTASKGT: /* system task gate */ - case SDT_SYS286IGT: /* system 286 interrupt gate */ - case SDT_SYS286TGT: /* system 286 trap gate */ - case SDT_SYSNULL2: /* undefined by Intel */ - case SDT_SYS386TSS: /* system 386 TSS available */ - case SDT_SYSNULL3: /* undefined by Intel */ - case SDT_SYS386BSY: /* system 386 TSS busy */ - case SDT_SYSNULL4: /* undefined by Intel */ - case SDT_SYS386IGT: /* system 386 interrupt gate */ - case SDT_SYS386TGT: /* system 386 trap gate */ - case SDT_SYS286CGT: /* system 286 call gate */ - case SDT_SYS386CGT: /* system 386 call gate */ - /* I can't think of any reason to allow a user proc - * to create a segment of these types. They are - * for OS use only. - */ - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - return (EACCES); - /*NOTREACHED*/ - - /* memory segment types */ - case SDT_MEMEC: /* memory execute only conforming */ - case SDT_MEMEAC: /* memory execute only accessed conforming */ - case SDT_MEMERC: /* memory execute read conforming */ - case SDT_MEMERAC: /* memory execute read accessed conforming */ - /* Must be "present" if executable and conforming. */ - if (dp->sd.sd_p == 0) { - kmem_free(kernel_map, (vm_offset_t)descs, - descs_size); - return (EACCES); - } - break; - case SDT_MEMRO: /* memory read only */ - case SDT_MEMROA: /* memory read only accessed */ - case SDT_MEMRW: /* memory read write */ - case SDT_MEMRWA: /* memory read write accessed */ - case SDT_MEMROD: /* memory read only expand dwn limit */ - case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ - case SDT_MEMRWD: /* memory read write expand dwn limit */ - case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ - case SDT_MEME: /* memory execute only */ - case SDT_MEMEA: /* memory execute only accessed */ - case SDT_MEMER: /* memory execute read */ - case SDT_MEMERA: /* memory execute read accessed */ - break; - default: - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - return(EINVAL); - /*NOTREACHED*/ - } - - /* Only user (ring-3) descriptors may be present. */ - if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) { - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - return (EACCES); - } - } - - if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { - /* Allocate a free slot */ - pldt = mdp->md_ldt; - if (pldt == NULL) { - load_gs(0); - error = i386_ldt_grow(td, NLDT+1); - if (error) { - kmem_free(kernel_map, (vm_offset_t)descs, - descs_size); - return (error); - } - pldt = mdp->md_ldt; - } -again: - mtx_lock_spin(&sched_lock); - /* - * start scanning a bit up to leave room for NVidia and - * Wine, which still user the "Blat" method of allocation. - */ - dp = &((union descriptor *)(pldt->ldt_base))[NLDT]; - for (i = NLDT; i < pldt->ldt_len; ++i) { - if (dp->sd.sd_type == SDT_SYSNULL) - break; - dp++; - } - if (i >= pldt->ldt_len) { - mtx_unlock_spin(&sched_lock); - error = i386_ldt_grow(td, pldt->ldt_len+1); - if (error) { - kmem_free(kernel_map, (vm_offset_t)descs, - descs_size); - return (error); - } - goto again; - } - uap->start = i; - error = i386_set_ldt_data(td, i, 1, descs); - mtx_unlock_spin(&sched_lock); - } else { - largest_ld = uap->start + uap->num; - error = i386_ldt_grow(td, largest_ld); - if (error == 0) { - mtx_lock_spin(&sched_lock); - error = i386_set_ldt_data(td, uap->start, uap->num, - descs); - mtx_unlock_spin(&sched_lock); - } - } - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - if (error == 0) - td->td_retval[0] = uap->start; - return (error); -} -typedef struct uint64_lohi { - unsigned long lo; - unsigned long hi; -} uint64_lohi; - -static int -i386_set_ldt_data(struct thread *td, int start, int num, - union descriptor *descs) -{ - struct mdproc *mdp = &td->td_proc->p_md; - struct proc_ldt *pldt = mdp->md_ldt; - int i, error; - - mtx_assert(&sched_lock, MA_OWNED); - - /* Fill in range */ - for (i = 0; i < num; i++) { - error = HYPERVISOR_update_descriptor(vtomach(&((union descriptor *)(pldt->ldt_base))[start + i]), ((uint64_lohi *)descs)[i].lo, ((uint64_lohi *)descs)[i].hi); - if (error) - panic("failed to update ldt: %d", error); - } - return (0); -} - -static int -i386_ldt_grow(struct thread *td, int len) -{ - struct mdproc *mdp = &td->td_proc->p_md; - struct proc_ldt *pldt; - caddr_t old_ldt_base; - int old_ldt_len; - - if (len > MAX_LD) - return (ENOMEM); - if (len < NLDT+1) - len = NLDT+1; - pldt = mdp->md_ldt; - /* allocate user ldt */ - if (!pldt || len > pldt->ldt_len) { - struct proc_ldt *new_ldt = user_ldt_alloc(mdp, len); - if (new_ldt == NULL) - return (ENOMEM); - pldt = mdp->md_ldt; - /* sched_lock was held by user_ldt_alloc */ - if (pldt) { - if (new_ldt->ldt_len > pldt->ldt_len) { - old_ldt_base = pldt->ldt_base; - old_ldt_len = pldt->ldt_len; - pldt->ldt_sd = new_ldt->ldt_sd; - pldt->ldt_base = new_ldt->ldt_base; - pldt->ldt_len = new_ldt->ldt_len; - mtx_unlock_spin(&sched_lock); - pmap_map_readwrite(kernel_pmap, - (vm_offset_t)old_ldt_base, - old_ldt_len * sizeof(union descriptor)); - kmem_free(kernel_map, (vm_offset_t)old_ldt_base, - old_ldt_len * sizeof(union descriptor)); - FREE(new_ldt, M_SUBPROC); - mtx_lock_spin(&sched_lock); - } else { - /* - * If other threads already did the work, - * do nothing - */ - mtx_unlock_spin(&sched_lock); - pmap_map_readwrite(kernel_pmap, - (vm_offset_t)new_ldt->ldt_base, - new_ldt->ldt_len * sizeof(union descriptor)); - kmem_free(kernel_map, - (vm_offset_t)new_ldt->ldt_base, - new_ldt->ldt_len * sizeof(union descriptor)); - FREE(new_ldt, M_SUBPROC); - return (0); - } - } else { - mdp->md_ldt = pldt = new_ldt; - } -#ifdef SMP - mtx_unlock_spin(&sched_lock); - /* signal other cpus to reload ldt */ - smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, - NULL, td); -#else - set_user_ldt(mdp); - mtx_unlock_spin(&sched_lock); -#endif - } - return (0); -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,998 +0,0 @@ -/*- - * Copyright (C) 1994, David Greenman - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the University of Utah, and William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/trap.c,v 1.260 2003/11/03 21:53:37 jhb Exp $"); - -/* - * 386 Trap and System call handling - */ - -#include "opt_clock.h" -#include "opt_cpu.h" -#include "opt_isa.h" -#include "opt_ktrace.h" -#include "opt_npx.h" -#include "opt_trap.h" - -#include <sys/param.h> -#include <sys/bus.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/pioctl.h> -#include <sys/ptrace.h> -#include <sys/kdb.h> -#include <sys/kernel.h> -#include <sys/ktr.h> -#include <sys/lock.h> -#include <sys/mutex.h> -#include <sys/resourcevar.h> -#include <sys/signalvar.h> -#include <sys/syscall.h> -#include <sys/sysctl.h> -#include <sys/sysent.h> -#include <sys/uio.h> -#include <sys/vmmeter.h> -#ifdef KTRACE -#include <sys/ktrace.h> -#endif - -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/pmap.h> -#include <vm/vm_kern.h> -#include <vm/vm_map.h> -#include <vm/vm_page.h> -#include <vm/vm_extern.h> - -#include <machine/cpu.h> -#include <machine/intr_machdep.h> -#include <machine/md_var.h> -#include <machine/pcb.h> -#ifdef SMP -#include <machine/smp.h> -#endif -#include <machine/tss.h> -#ifdef POWERFAIL_NMI -#include <sys/syslog.h> -#include <machine/clock.h> -#endif - - -#include <machine/xenfunc.h> -#include <machine/hypervisor.h> -#include <machine/xenvar.h> -#include <machine/hypervisor-ifs.h> - - -extern void trap(struct trapframe frame); -extern void syscall(struct trapframe frame); - -static int trap_pfault(struct trapframe *, int, vm_offset_t); -static void trap_fatal(struct trapframe *, vm_offset_t); -void dblfault_handler(void); - -extern inthand_t IDTVEC(lcall_syscall); - -#define MAX_TRAP_MSG 28 -static char *trap_msg[] = { - "", /* 0 unused */ - "privileged instruction fault", /* 1 T_PRIVINFLT */ - "", /* 2 unused */ - "breakpoint instruction fault", /* 3 T_BPTFLT */ - "", /* 4 unused */ - "", /* 5 unused */ - "arithmetic trap", /* 6 T_ARITHTRAP */ - "", /* 7 unused */ - "", /* 8 unused */ - "general protection fault", /* 9 T_PROTFLT */ - "trace trap", /* 10 T_TRCTRAP */ - "", /* 11 unused */ - "page fault", /* 12 T_PAGEFLT */ - "", /* 13 unused */ - "alignment fault", /* 14 T_ALIGNFLT */ - "", /* 15 unused */ - "", /* 16 unused */ - "hypervisor callback", /* 17 T_HYPCALLBACK */ - "integer divide fault", /* 18 T_DIVIDE */ - "non-maskable interrupt trap", /* 19 T_NMI */ - "overflow trap", /* 20 T_OFLOW */ - "FPU bounds check fault", /* 21 T_BOUND */ - "FPU device not available", /* 22 T_DNA */ - "double fault", /* 23 T_DOUBLEFLT */ - "FPU operand fetch fault", /* 24 T_FPOPFLT */ - "invalid TSS fault", /* 25 T_TSSFLT */ - "segment not present fault", /* 26 T_SEGNPFLT */ - "stack fault", /* 27 T_STKFLT */ - "machine check trap", /* 28 T_MCHK */ -}; - -#if defined(I586_CPU) && !defined(NO_F00F_HACK) -extern int has_f00f_bug; -#endif - -#ifdef KDB -static int kdb_on_nmi = 1; -SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW, - &kdb_on_nmi, 0, "Go to KDB on NMI"); -#endif -static int panic_on_nmi = 1; -SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, - &panic_on_nmi, 0, "Panic on NMI"); - -#ifdef WITNESS -extern char *syscallnames[]; -#endif - -#ifdef DEVICE_POLLING -extern u_int32_t poll_in_trap; -extern int ether_poll(int count); -#endif /* DEVICE_POLLING */ - - -/* - * Exception, fault, and trap interface to the FreeBSD kernel. - * This common code is called from assembly language IDT gate entry - * routines that prepare a suitable stack frame, and restore this - * frame after the exception has been processed. - */ - -void -trap(struct trapframe frame) -{ - struct thread *td = curthread; - struct proc *p = td->td_proc; - u_int sticks = 0; - int i = 0, ucode = 0, type, code; - vm_offset_t eva; - -#ifdef POWERFAIL_NMI - static int lastalert = 0; -#endif - - atomic_add_int(&cnt.v_trap, 1); - type = frame.tf_trapno; -#ifdef KDB - if (kdb_active) { - kdb_reenter(); - goto out; - } -#endif - - eva = 0; - code = frame.tf_err; - - if (type == T_HYPCALLBACK) { - evtchn_do_upcall((struct intrframe *)&frame); - if (ISPL(frame.tf_cs) == SEL_KPL) - goto out; - goto userout; - } else if (type == 0) - panic("invalid trap type/code %d/%d\n",type, code); - - - if (type == T_PAGEFLT) { - /* - * For some Cyrix CPUs, %cr2 is clobbered by - * interrupts. This problem is worked around by using - * an interrupt gate for the pagefault handler. We - * are finally ready to read %cr2 and then must - * reenable interrupts. - * - * If we get a page fault while in a critical section, then - * it is most likely a fatal kernel page fault. The kernel - * is already going to panic trying to get a sleep lock to - * do the VM lookup, so just consider it a fatal trap so the - * kernel can print out a useful trap message and even get - * to the debugger. - */ - eva = PCPU_GET(cr2); - - if (td->td_critnest != 0) - trap_fatal(&frame, eva); - } - -#ifdef DEVICE_POLLING - if (poll_in_trap) - ether_poll(poll_in_trap); -#endif /* DEVICE_POLLING */ - - if ((ISPL(frame.tf_cs) == SEL_UPL) - || ((frame.tf_eflags & PSL_VM) && - !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) { - /* user trap */ - - sticks = td->td_sticks; - td->td_frame = &frame; - if (td->td_ucred != p->p_ucred) - cred_update_thread(td); - - switch (type) { - case T_PRIVINFLT: /* privileged instruction fault */ - ucode = type; - i = SIGILL; - break; - - case T_BPTFLT: /* bpt instruction fault */ - case T_TRCTRAP: /* trace trap */ - enable_intr(); - frame.tf_eflags &= ~PSL_T; - i = SIGTRAP; - break; - - case T_ARITHTRAP: /* arithmetic trap */ -#ifdef DEV_NPX - ucode = npxtrap(); - if (ucode == -1) - goto userout; -#else - ucode = code; -#endif - i = SIGFPE; - break; - - case T_PROTFLT: /* general protection fault */ - case T_STKFLT: /* stack fault */ - case T_SEGNPFLT: /* segment not present fault */ - case T_TSSFLT: /* invalid TSS fault */ - case T_DOUBLEFLT: /* double fault */ - default: - ucode = code + BUS_SEGM_FAULT ; - printf("unexpected trap type/code %d/%d\n",type, code); /* XXX temporary */ - - i = SIGBUS; - break; - - case T_PAGEFLT: /* page fault */ - if (td->td_pflags & TDP_SA) - thread_user_enter(td); - - i = trap_pfault(&frame, TRUE, eva); -#if defined(I586_CPU) && !defined(NO_F00F_HACK) - if (i == -2) { - /* - * The f00f hack workaround has triggered, so - * treat the fault as an illegal instruction - * (T_PRIVINFLT) instead of a page fault. - */ - type = frame.tf_trapno = T_PRIVINFLT; - - /* Proceed as in that case. */ - ucode = type; - i = SIGILL; - break; - } -#endif - if (i == -1) - goto userout; - if (i == 0) - goto user; - - ucode = T_PAGEFLT; - break; - - case T_DIVIDE: /* integer divide fault */ - ucode = FPE_INTDIV; - i = SIGFPE; - break; - -#ifdef DEV_ISA - case T_NMI: -#ifdef POWERFAIL_NMI -#ifndef TIMER_FREQ -# define TIMER_FREQ 1193182 -#endif - mtx_lock(&Giant); - if (time_second - lastalert > 10) { - log(LOG_WARNING, "NMI: power fail\n"); - sysbeep(TIMER_FREQ/880, hz); - lastalert = time_second; - } - mtx_unlock(&Giant); - goto userout; -#else /* !POWERFAIL_NMI */ - /* machine/parity/power fail/"kitchen sink" faults */ - /* XXX Giant */ - if (isa_nmi(code) == 0) { -#ifdef KDB - /* - * NMI can be hooked up to a pushbutton - * for debugging. - */ - if (kdb_on_nmi) { - printf ("NMI ... going to debugger\n"); - kdb_trap (type, 0, &frame); - } -#endif /* KDB */ - goto userout; - } else if (panic_on_nmi) - panic("NMI indicates hardware failure"); - break; -#endif /* POWERFAIL_NMI */ -#endif /* DEV_ISA */ - - case T_OFLOW: /* integer overflow fault */ - ucode = FPE_INTOVF; - i = SIGFPE; - break; - - case T_BOUND: /* bounds check fault */ - ucode = FPE_FLTSUB; - i = SIGFPE; - break; - - case T_DNA: -#ifdef DEV_NPX - /* transparent fault (due to context switch "late") */ - if (npxdna()) - goto userout; -#endif - i = SIGFPE; - ucode = FPE_FPU_NP_TRAP; - break; - - case T_FPOPFLT: /* FPU operand fetch fault */ - ucode = T_FPOPFLT; - i = SIGILL; - break; - - case T_XMMFLT: /* SIMD floating-point exception */ - ucode = 0; /* XXX */ - i = SIGFPE; - break; - } - } else { - /* kernel trap */ - - KASSERT(cold || td->td_ucred != NULL, - ("kernel trap doesn't have ucred")); - switch (type) { - case T_PAGEFLT: /* page fault */ - (void) trap_pfault(&frame, FALSE, eva); - goto out; - - case T_DNA: -#ifdef DEV_NPX - /* - * The kernel is apparently using npx for copying. - * XXX this should be fatal unless the kernel has - * registered such use. - */ - if (npxdna()) - goto out; -#endif - break; - - /* - * The following two traps can happen in - * vm86 mode, and, if so, we want to handle - * them specially. - */ - case T_PROTFLT: /* general protection fault */ - case T_STKFLT: /* stack fault */ -#if 0 - if (frame.tf_eflags & PSL_VM) { - i = vm86_emulate((struct vm86frame *)&frame); - if (i != 0) - /* - * returns to original process - */ - vm86_trap((struct vm86frame *)&frame); - goto out; - } -#endif - if (type == T_STKFLT) - break; - - /* FALL THROUGH */ - - case T_SEGNPFLT: /* segment not present fault */ - if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL) - break; - - /* - * Invalid %fs's and %gs's can be created using - * procfs or PT_SETREGS or by invalidating the - * underlying LDT entry. This causes a fault - * in kernel mode when the kernel attempts to - * switch contexts. Lose the bad context - * (XXX) so that we can continue, and generate - * a signal. - */ - if (frame.tf_eip == (int)cpu_switch_load_gs) { - PCPU_GET(curpcb)->pcb_gs = 0; -#if 0 - PROC_LOCK(p); - psignal(p, SIGBUS); - PROC_UNLOCK(p); -#endif - goto out; - } - - if (td->td_intr_nesting_level != 0) - break; - - /* - * Invalid segment selectors and out of bounds - * %eip's and %esp's can be set up in user mode. - * This causes a fault in kernel mode when the - * kernel tries to return to user mode. We want - * to get this fault so that we can fix the - * problem here and not have to check all the - * selectors and pointers when the user changes - * them. - */ - if (frame.tf_eip == (int)doreti_iret) { - frame.tf_eip = (int)doreti_iret_fault; - goto out; - } - if (frame.tf_eip == (int)doreti_popl_ds) { - frame.tf_eip = (int)doreti_popl_ds_fault; - goto out; - } - if (frame.tf_eip == (int)doreti_popl_es) { - frame.tf_eip = (int)doreti_popl_es_fault; - goto out; - } - if (frame.tf_eip == (int)doreti_popl_fs) { - frame.tf_eip = (int)doreti_popl_fs_fault; - goto out; - } - if (PCPU_GET(curpcb)->pcb_onfault != NULL) { - frame.tf_eip = - (int)PCPU_GET(curpcb)->pcb_onfault; - goto out; - } - break; - - case T_TSSFLT: - /* - * PSL_NT can be set in user mode and isn't cleared - * automatically when the kernel is entered. This - * causes a TSS fault when the kernel attempts to - * `iret' because the TSS link is uninitialized. We - * want to get this fault so that we can fix the - * problem here and not every time the kernel is - * entered. - */ - if (frame.tf_eflags & PSL_NT) { - frame.tf_eflags &= ~PSL_NT; - goto out; - } - break; - - case T_TRCTRAP: /* trace trap */ - if (frame.tf_eip == (int)IDTVEC(lcall_syscall)) { - /* - * We've just entered system mode via the - * syscall lcall. Continue single stepping - * silently until the syscall handler has - * saved the flags. - */ - goto out; - } - if (frame.tf_eip == (int)IDTVEC(lcall_syscall) + 1) { - /* - * The syscall handler has now saved the - * flags. Stop single stepping it. - */ - frame.tf_eflags &= ~PSL_T; - goto out; - } - /* - * Ignore debug register trace traps due to - * accesses in the user's address space, which - * can happen under several conditions such as - * if a user sets a watchpoint on a buffer and - * then passes that buffer to a system call. - * We still want to get TRCTRAPS for addresses - * in kernel space because that is useful when - * debugging the kernel. - */ - /* XXX Giant */ - if (user_dbreg_trap() && - !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) { - /* - * Reset breakpoint bits because the - * processor doesn't - */ - load_dr6(rdr6() & 0xfffffff0); - goto out; - } - /* - * FALLTHROUGH (TRCTRAP kernel mode, kernel address) - */ - case T_BPTFLT: - /* - * If KDB is enabled, let it handle the debugger trap. - * Otherwise, debugger traps "can't happen". - */ -#ifdef KDB - /* XXX Giant */ - if (kdb_trap (type, 0, &frame)) - goto out; -#endif - break; - -#ifdef DEV_ISA - case T_NMI: -#ifdef POWERFAIL_NMI - mtx_lock(&Giant); - if (time_second - lastalert > 10) { - log(LOG_WARNING, "NMI: power fail\n"); - sysbeep(TIMER_FREQ/880, hz); - lastalert = time_second; - } - mtx_unlock(&Giant); - goto out; -#else /* !POWERFAIL_NMI */ - /* XXX Giant */ - /* machine/parity/power fail/"kitchen sink" faults */ - if (isa_nmi(code) == 0) { -#ifdef KDB - /* - * NMI can be hooked up to a pushbutton - * for debugging. - */ - if (kdb_on_nmi) { - printf ("NMI ... going to debugger\n"); - kdb_trap (type, 0, &frame); - } -#endif /* KDB */ - goto out; - } else if (panic_on_nmi == 0) - goto out; - /* FALLTHROUGH */ -#endif /* POWERFAIL_NMI */ -#endif /* DEV_ISA */ - } - - trap_fatal(&frame, eva); - goto out; - } - - /* Translate fault for emulators (e.g. Linux) */ - if (*p->p_sysent->sv_transtrap) - i = (*p->p_sysent->sv_transtrap)(i, type); - - trapsignal(td, i, ucode); - -#if 1 /* DEBUG */ - if (type <= MAX_TRAP_MSG) { - uprintf("fatal process exception: %s", - trap_msg[type]); - if ((type == T_PAGEFLT) || (type == T_PROTFLT)) - uprintf(", fault VA = 0x%lx", (u_long)eva); - uprintf("\n"); - } -#endif - -user: - userret(td, &frame, sticks); - mtx_assert(&Giant, MA_NOTOWNED); -userout: -out: - return; -} - -static int -trap_pfault(frame, usermode, eva) - struct trapframe *frame; - int usermode; - vm_offset_t eva; -{ - vm_offset_t va; - struct vmspace *vm = NULL; - vm_map_t map = 0; - int rv = 0; - vm_prot_t ftype; - struct thread *td = curthread; - struct proc *p = td->td_proc; - - va = trunc_page(eva); - if (va >= KERNBASE) { - /* - * Don't allow user-mode faults in kernel address space. - * An exception: if the faulting address is the invalid - * instruction entry in the IDT, then the Intel Pentium - * F00F bug workaround was triggered, and we need to - * treat it is as an illegal instruction, and not a page - * fault. - */ -#if defined(I586_CPU) && !defined(NO_F00F_HACK) - if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) - return -2; -#endif - if (usermode) - goto nogo; - - map = kernel_map; - } else { - /* - * This is a fault on non-kernel virtual memory. - * vm is initialized above to NULL. If curproc is NULL - * or curproc->p_vmspace is NULL the fault is fatal. - */ - if (p != NULL) - vm = p->p_vmspace; - - if (vm == NULL) - goto nogo; - - map = &vm->vm_map; - } - - if (frame->tf_err & PGEX_W) - ftype = VM_PROT_WRITE; - else - ftype = VM_PROT_READ; - - if (map != kernel_map) { - /* - * Keep swapout from messing with us during this - * critical time. - */ - PROC_LOCK(p); - ++p->p_lock; - PROC_UNLOCK(p); - - /* Fault in the user page: */ - rv = vm_fault(map, va, ftype, - (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY - : VM_FAULT_NORMAL); - - PROC_LOCK(p); - --p->p_lock; - PROC_UNLOCK(p); - } else { - /* - * Don't have to worry about process locking or stacks in the - * kernel. - */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); - } - if (rv == KERN_SUCCESS) - return (0); -nogo: - if (!usermode) { - if (td->td_intr_nesting_level == 0 && - PCPU_GET(curpcb)->pcb_onfault != NULL) { - frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault; - return (0); - } - trap_fatal(frame, eva); - return (-1); - } - - /* kludge to pass faulting virtual address to sendsig */ - frame->tf_err = eva; - - return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); -} - -static void -trap_fatal(struct trapframe *frame, vm_offset_t eva) -{ - int code, type, ss, esp; - struct soft_segment_descriptor softseg; - - code = frame->tf_err; - type = frame->tf_trapno; -#if 0 - XENPRINTF("trying to read gdt\n"); - sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); - XENPRINTF("read gdt\n"); -#endif - if (type <= MAX_TRAP_MSG) - printf("\n\nFatal trap %d: %s while in %s mode\n", - type, trap_msg[type], - frame->tf_eflags & PSL_VM ? "vm86" : - ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); -#ifdef SMP - /* two separate prints in case of a trap on an unmapped page */ - printf("cpuid = %d; ", PCPU_GET(cpuid)); - printf("apic id = %02x\n", PCPU_GET(apic_id)); -#endif - if (type == T_PAGEFLT) { - printf("fault virtual address = 0x%x\n", eva); - printf("fault code = %s %s, %s\n", - code & PGEX_U ? "user" : "supervisor", - code & PGEX_W ? "write" : "read", - code & PGEX_P ? "protection violation" : "page not present"); - } - printf("instruction pointer = 0x%x:0x%x\n", - frame->tf_cs & 0xffff, frame->tf_eip); - if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { - ss = frame->tf_ss & 0xffff; - esp = frame->tf_esp; - } else { - ss = GSEL(GDATA_SEL, SEL_KPL); - esp = (int)&frame->tf_esp; - } - printf("stack pointer = 0x%x:0x%x\n", ss, esp); - printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); - printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", - softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); - printf(" = DPL %d, pres %d, def32 %d, gran %d\n", - softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, - softseg.ssd_gran); - printf("processor eflags = "); - if (frame->tf_eflags & PSL_T) - printf("trace trap, "); - if (frame->tf_eflags & PSL_I) - printf("interrupt enabled, "); - if (frame->tf_eflags & PSL_NT) - printf("nested task, "); - if (frame->tf_eflags & PSL_RF) - printf("resume, "); - if (frame->tf_eflags & PSL_VM) - printf("vm86, "); - printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); - printf("current process = "); - if (curproc) { - printf("%lu (%s)\n", - (u_long)curproc->p_pid, curproc->p_comm ? - curproc->p_comm : ""); - } else { - printf("Idle\n"); - } - /* XXX */ - -#ifdef KDB - if (kdb_trap(type, 0, frame)) - return; -#endif - printf("trap number = %d\n", type); - if (type <= MAX_TRAP_MSG) - panic("%s", trap_msg[type]); - else - panic("unknown/reserved trap"); -} - -/* - * Double fault handler. Called when a fault occurs while writing - * a frame for a trap/exception onto the stack. This usually occurs - * when the stack overflows (such is the case with infinite recursion, - * for example). - * - * XXX Note that the current PTD gets replaced by IdlePTD when the - * task switch occurs. This means that the stack that was active at - * the time of the double fault is not available at <kstack> unless - * the machine was idle when the double fault occurred. The downside - * of this is that "trace <ebp>" in ddb won't work. - */ -void -dblfault_handler() -{ - printf("\nFatal double fault:\n"); - printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip)); - printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp)); - printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp)); -#ifdef SMP - /* two separate prints in case of a trap on an unmapped page */ - printf("cpuid = %d; ", PCPU_GET(cpuid)); - printf("apic id = %02x\n", PCPU_GET(apic_id)); -#endif - panic("double fault"); -} - -/* - * syscall - system call request C handler - * - * A system call is essentially treated as a trap. - */ -void -syscall(frame) - struct trapframe frame; -{ - caddr_t params; - struct sysent *callp; - struct thread *td = curthread; - struct proc *p = td->td_proc; - register_t orig_tf_eflags; - u_int sticks; - int error; - int narg; - int args[8]; - u_int code; - - /* - * note: PCPU_LAZY_INC() can only be used if we can afford - * occassional inaccuracy in the count. - */ - PCPU_LAZY_INC(cnt.v_syscall); - -#ifdef DIAGNOSTIC - if (ISPL(frame.tf_cs) != SEL_UPL) { - mtx_lock(&Giant); /* try to stabilize the system XXX */ - panic("syscall"); - /* NOT REACHED */ - mtx_unlock(&Giant); - } -#endif - - sticks = td->td_sticks; - td->td_frame = &frame; - if (td->td_ucred != p->p_ucred) - cred_update_thread(td); - if (p->p_flag & P_SA) - thread_user_enter(td); - params = (caddr_t)frame.tf_esp + sizeof(int); - code = frame.tf_eax; - orig_tf_eflags = frame.tf_eflags; - - if (p->p_sysent->sv_prepsyscall) { - /* - * The prep code is MP aware. - */ - (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); - } else { - /* - * Need to check if this is a 32 bit or 64 bit syscall. - * fuword is MP aware. - */ - if (code == SYS_syscall) { - /* - * Code is first argument, followed by actual args. - */ - code = fuword(params); - params += sizeof(int); - } else if (code == SYS___syscall) { - /* - * Like syscall, but code is a quad, so as to maintain - * quad alignment for the rest of the arguments. - */ - code = fuword(params); - params += sizeof(quad_t); - } - } - - if (p->p_sysent->sv_mask) - code &= p->p_sysent->sv_mask; - - if (code >= p->p_sysent->sv_size) - callp = &p->p_sysent->sv_table[0]; - else - callp = &p->p_sysent->sv_table[code]; - - narg = callp->sy_narg & SYF_ARGMASK; - - /* - * copyin and the ktrsyscall()/ktrsysret() code is MP-aware - */ - if (params != NULL && narg != 0) - error = copyin(params, (caddr_t)args, - (u_int)(narg * sizeof(int))); - else - error = 0; - -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSCALL)) - ktrsyscall(code, narg, args); -#endif - CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td, - td->td_proc->p_pid, td->td_proc->p_comm, code); - - /* - * Try to run the syscall without Giant if the syscall - * is MP safe. - */ - if ((callp->sy_narg & SYF_MPSAFE) == 0) - mtx_lock(&Giant); - - if (error == 0) { - td->td_retval[0] = 0; - td->td_retval[1] = frame.tf_edx; - - STOPEVENT(p, S_SCE, narg); - - PTRACESTOP_SC(p, td, S_PT_SCE); - - error = (*callp->sy_call)(td, args); - } - - switch (error) { - case 0: - frame.tf_eax = td->td_retval[0]; - frame.tf_edx = td->td_retval[1]; - frame.tf_eflags &= ~PSL_C; - break; - - case ERESTART: - /* - * Reconstruct pc, assuming lcall $X,y is 7 bytes, - * int 0x80 is 2 bytes. We saved this in tf_err. - */ - frame.tf_eip -= frame.tf_err; - break; - - case EJUSTRETURN: - break; - - default: - if (p->p_sysent->sv_errsize) { - if (error >= p->p_sysent->sv_errsize) - error = -1; /* XXX */ - else - error = p->p_sysent->sv_errtbl[error]; - } - frame.tf_eax = error; - frame.tf_eflags |= PSL_C; - break; - } - - /* - * Release Giant if we previously set it. - */ - if ((callp->sy_narg & SYF_MPSAFE) == 0) - mtx_unlock(&Giant); - - /* - * Traced syscall. - */ - if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { - frame.tf_eflags &= ~PSL_T; - trapsignal(td, SIGTRAP, 0); - } - - /* - * Handle reschedule and other end-of-syscall issues - */ - userret(td, &frame, sticks); - -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSRET)) - ktrsysret(code, error, td->td_retval[0]); -#endif - - /* - * This works because errno is findable through the - * register set. If we ever support an emulation where this - * is not the case, this code will need to be revisited. - */ - STOPEVENT(p, S_SCX, code); - - PTRACESTOP_SC(p, td, S_PT_SCX); - - WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", - (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"); - mtx_assert(&sched_lock, MA_NOTOWNED); - mtx_assert(&Giant, MA_NOTOWNED); -} - diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,620 +0,0 @@ -/*- - * Copyright (c) 1982, 1986 The Regents of the University of California. - * Copyright (c) 1989, 1990 William Jolitz - * Copyright (c) 1994 John Dyson - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department, and William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 - * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.219 2003/11/17 18:22:24 alc Exp $"); - -#include "opt_npx.h" -#ifdef PC98 -#include "opt_pc98.h" -#endif -#include "opt_reset.h" -#include "opt_cpu.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bio.h> -#include <sys/buf.h> -#include <sys/kse.h> -#include <sys/kernel.h> -#include <sys/ktr.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/sf_buf.h> -#include <sys/smp.h> -#include <sys/sysctl.h> -#include <sys/unistd.h> -#include <sys/user.h> -#include <sys/vnode.h> -#include <sys/vmmeter.h> - -#include <machine/cpu.h> -#include <machine/cputypes.h> -#include <machine/md_var.h> -#include <machine/pcb.h> -#include <machine/pcb_ext.h> - -#include <vm/vm.h> -#include <vm/vm_extern.h> -#include <vm/vm_kern.h> -#include <vm/vm_page.h> -#include <vm/vm_map.h> -#include <vm/vm_param.h> - -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif - -#ifndef NSFBUFS -#define NSFBUFS (512 + maxusers * 16) -#endif - -#include <machine/xenfunc.h> -#if 0 -#ifdef SMP -static void cpu_reset_proxy(void); -static u_int cpu_reset_proxyid; -static volatile u_int cpu_reset_proxy_active; -#endif -#endif -static void sf_buf_init(void *arg); -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) - -LIST_HEAD(sf_head, sf_buf); - -/* - * A hash table of active sendfile(2) buffers - */ -static TAILQ_HEAD(, sf_buf) sf_buf_freelist; - - -static struct sf_head *sf_buf_active; -static u_long sf_buf_hashmask; - - -#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask) - -static u_int sf_buf_alloc_want; - -/* - * A lock used to synchronize access to the hash table and free list - */ -static struct mtx sf_buf_lock; - -extern int _ucodesel, _udatasel; - -/* - * Finish a fork operation, with process p2 nearly set up. - * Copy and update the pcb, set up the stack so that the child - * ready to run and return to user mode. - */ -void -cpu_fork(struct thread *td1, - struct proc *p2, - struct thread *td2, - int flags) -{ - register struct proc *p1; - struct pcb *pcb2; - struct mdproc *mdp2; -#ifdef DEV_NPX - register_t savecrit; -#endif - - p1 = td1->td_proc; - if ((flags & RFPROC) == 0) { - if ((flags & RFMEM) == 0) { - /* unshare user LDT */ - struct mdproc *mdp1 = &p1->p_md; - struct proc_ldt *pldt = mdp1->md_ldt; - if (pldt && pldt->ldt_refcnt > 1) { - pldt = user_ldt_alloc(mdp1, pldt->ldt_len); - if (pldt == NULL) - panic("could not copy LDT"); - mdp1->md_ldt = pldt; - set_user_ldt(mdp1); - user_ldt_free(td1); - } - } - return; - } - - /* Ensure that p1's pcb is up to date. */ -#ifdef DEV_NPX - if (td1 == curthread) - td1->td_pcb->pcb_gs = rgs(); - savecrit = intr_disable(); - if (PCPU_GET(fpcurthread) == td1) - npxsave(&td1->td_pcb->pcb_save); - intr_restore(savecrit); -#endif - - /* Point the pcb to the top of the stack */ - pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; - td2->td_pcb = pcb2; - - /* Copy p1's pcb */ - bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); - - /* Point mdproc and then copy over td1's contents */ - mdp2 = &p2->p_md; - bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); - - /* - * Create a new fresh stack for the new process. - * Copy the trap frame for the return to user mode as if from a - * syscall. This copies most of the user mode register values. - */ - td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb) - 1; - bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); - - td2->td_frame->tf_eax = 0; /* Child returns zero */ - td2->td_frame->tf_eflags &= ~PSL_C; /* success */ - td2->td_frame->tf_edx = 1; - /* - * Set registers for trampoline to user mode. Leave space for the - * return address on stack. These are the kernel mode register values. - */ - pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); - pcb2->pcb_edi = 0; - pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ - pcb2->pcb_ebp = 0; - pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); - pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ - pcb2->pcb_eip = (int)fork_trampoline; - pcb2->pcb_psl = PSL_KERNEL; /* ints disabled */ - pcb2->pcb_gs = rgs(); - /*- - * pcb2->pcb_dr*: cloned above. - * pcb2->pcb_savefpu: cloned above. - * pcb2->pcb_flags: cloned above. - * pcb2->pcb_onfault: cloned above (always NULL here?). - * pcb2->pcb_gs: cloned above. - * pcb2->pcb_ext: cleared below. - */ - - /* - * XXX don't copy the i/o pages. this should probably be fixed. - */ - pcb2->pcb_ext = 0; - - /* Copy the LDT, if necessary. */ - mtx_lock_spin(&sched_lock); - - if (mdp2->md_ldt != 0) { - if (flags & RFMEM) { - mdp2->md_ldt->ldt_refcnt++; - } else { - mdp2->md_ldt = user_ldt_alloc(mdp2, - mdp2->md_ldt->ldt_len); - if (mdp2->md_ldt == NULL) - panic("could not copy LDT"); - } - } - mtx_unlock_spin(&sched_lock); - - /* - * Now, cpu_switch() can schedule the new process. - * pcb_esp is loaded pointing to the cpu_switch() stack frame - * containing the return address when exiting cpu_switch. - * This will normally be to fork_trampoline(), which will have - * %ebx loaded with the new proc's pointer. fork_trampoline() - * will set up a stack to call fork_return(p, frame); to complete - * the return to user-mode. - */ -} - -/* - * Intercept the return address from a freshly forked process that has NOT - * been scheduled yet. - * - * This is needed to make kernel threads stay in kernel mode. - */ -void -cpu_set_fork_handler(td, func, arg) - struct thread *td; - void (*func)(void *); - void *arg; -{ - /* - * Note that the trap frame follows the args, so the function - * is really called like this: func(arg, frame); - */ - td->td_pcb->pcb_esi = (int) func; /* function */ - td->td_pcb->pcb_ebx = (int) arg; /* first arg */ -} - -void -cpu_exit(struct thread *td) -{ - struct mdproc *mdp; - struct pcb *pcb = td->td_pcb; - - - /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ - mdp = &td->td_proc->p_md; - if (mdp->md_ldt) { - td->td_pcb->pcb_gs = _udatasel; - load_gs(_udatasel); - user_ldt_free(td); - } - if (pcb->pcb_flags & PCB_DBREGS) { - /* disable all hardware breakpoints */ - reset_dbregs(); - pcb->pcb_flags &= ~PCB_DBREGS; - } -} - -void -cpu_thread_exit(struct thread *td) -{ - struct pcb *pcb = td->td_pcb; -#ifdef DEV_NPX - if (td == PCPU_GET(fpcurthread)) - npxdrop(); -#endif - if (pcb->pcb_flags & PCB_DBREGS) { - /* disable all hardware breakpoints */ - reset_dbregs(); - pcb->pcb_flags &= ~PCB_DBREGS; - } -} - -void -cpu_thread_clean(struct thread *td) -{ - struct pcb *pcb; - - pcb = td->td_pcb; - if (pcb->pcb_ext != 0) { - /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */ - /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ - /* - * XXX do we need to move the TSS off the allocated pages - * before freeing them? (not done here) - */ - kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext, - ctob(IOPAGES + 1)); - pcb->pcb_ext = 0; - } -} - -void -cpu_thread_swapin(struct thread *td) -{ -} - -void -cpu_thread_swapout(struct thread *td) -{ -} - -void -cpu_thread_setup(struct thread *td) -{ - - td->td_pcb = - (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; - td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; - td->td_pcb->pcb_ext = NULL; -} - -/* - * Initialize machine state (pcb and trap frame) for a new thread about to - * upcall. Pu t enough state in the new thread's PCB to get it to go back - * userret(), where we can intercept it again to set the return (upcall) - * Address and stack, along with those from upcals that are from other sources - * such as those generated in thread_userret() itself. - */ -void -cpu_set_upcall(struct thread *td, struct thread *td0) -{ - struct pcb *pcb2; - - /* Point the pcb to the top of the stack. */ - pcb2 = td->td_pcb; - - /* - * Copy the upcall pcb. This loads kernel regs. - * Those not loaded individually below get their default - * values here. - * - * XXXKSE It might be a good idea to simply skip this as - * the values of the other registers may be unimportant. - * This would remove any requirement for knowing the KSE - * at this time (see the matching comment below for - * more analysis) (need a good safe default). - */ - bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); - pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE); - - /* - * Create a new fresh stack for the new thread. - * Don't forget to set this stack value into whatever supplies - * the address for the fault handlers. - * The contexts are filled in at the time we actually DO the - * upcall as only then do we know which KSE we got. - */ - bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); - - /* - * Set registers for trampoline to user mode. Leave space for the - * return address on stack. These are the kernel mode register values. - */ -#ifdef PAE - pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt); -#else - pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir); -#endif - pcb2->pcb_edi = 0; - pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ - pcb2->pcb_ebp = 0; - pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */ - pcb2->pcb_ebx = (int)td; /* trampoline arg */ - pcb2->pcb_eip = (int)fork_trampoline; - pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */ - pcb2->pcb_gs = rgs(); - /* - * If we didn't copy the pcb, we'd need to do the following registers: - * pcb2->pcb_dr*: cloned above. - * pcb2->pcb_savefpu: cloned above. - * pcb2->pcb_flags: cloned above. - * pcb2->pcb_onfault: cloned above (always NULL here?). - * pcb2->pcb_gs: cloned above. XXXKSE ??? - * pcb2->pcb_ext: cleared below. - */ - pcb2->pcb_ext = NULL; -} - -/* - * Set that machine state for performing an upcall that has to - * be done in thread_userret() so that those upcalls generated - * in thread_userret() itself can be done as well. - */ -void -cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku) -{ - - /* - * Do any extra cleaning that needs to be done. - * The thread may have optional components - * that are not present in a fresh thread. - * This may be a recycled thread so make it look - * as though it's newly allocated. - */ - cpu_thread_clean(td); - - /* - * Set the trap frame to point at the beginning of the uts - * function. - */ - td->td_frame->tf_ebp = 0; - td->td_frame->tf_esp = - (int)ku->ku_stack.ss_sp + ku->ku_stack.ss_size - 16; - td->td_frame->tf_eip = (int)ku->ku_func; - - /* - * Pass the address of the mailbox for this kse to the uts - * function as a parameter on the stack. - */ - suword((void *)(td->td_frame->tf_esp + sizeof(void *)), - (int)ku->ku_mailbox); -} - -/* - * Convert kernel VA to physical address - */ -vm_paddr_t -kvtop(void *addr) -{ - vm_paddr_t pa; - - pa = pmap_kextract((vm_offset_t)addr); - if (pa == 0) - panic("kvtop: zero page frame"); - return (pa); -} - -/* - * Force reset the processor by invalidating the entire address space! - */ - -#if 0 -#ifdef SMP -static void -cpu_reset_proxy() -{ - - cpu_reset_proxy_active = 1; - while (cpu_reset_proxy_active == 1) - ; /* Wait for other cpu to see that we've started */ - stop_cpus((1<<cpu_reset_proxyid)); - printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); - DELAY(1000000); - cpu_reset(); -} -#endif -#endif -void -cpu_reset() -{ - HYPERVISOR_shutdown(); -} - - -/* - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) - */ -static void -sf_buf_init(void *arg) -{ - struct sf_buf *sf_bufs; - vm_offset_t sf_base; - int i; - - nsfbufs = NSFBUFS; - TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); - - sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); - TAILQ_INIT(&sf_buf_freelist); - sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); - sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, - M_NOWAIT | M_ZERO); - for (i = 0; i < nsfbufs; i++) { - sf_bufs[i].kva = sf_base + i * PAGE_SIZE; - TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry); - } - sf_buf_alloc_want = 0; - mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF); -} - -/* - * Get an sf_buf from the freelist. Will block if none are available. - */ -struct sf_buf * -sf_buf_alloc(struct vm_page *m, int pri) -{ - struct sf_head *hash_list; - struct sf_buf *sf; - int error; - - hash_list = &sf_buf_active[SF_BUF_HASH(m)]; - mtx_lock(&sf_buf_lock); - LIST_FOREACH(sf, hash_list, list_entry) { - if (sf->m == m) { - sf->ref_count++; - if (sf->ref_count == 1) { - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - } - goto done; - } - } - while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) { - sf_buf_alloc_want++; - mbstat.sf_allocwait++; - error = msleep(&sf_buf_freelist, &sf_buf_lock, PVM | pri, - "sfbufa", 0); - sf_buf_alloc_want--; - - /* - * If we got a signal, don't risk going back to sleep. - */ - if (error) - goto done; - } - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - if (sf->m != NULL) - LIST_REMOVE(sf, list_entry); - LIST_INSERT_HEAD(hash_list, sf, list_entry); - sf->ref_count = 1; - sf->m = m; - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - pmap_qenter(sf->kva, &sf->m, 1); -done: - mtx_unlock(&sf_buf_lock); - return (sf); -} - -/* - * Detatch mapped page and release resources back to the system. - */ -void -sf_buf_free(struct sf_buf *sf) -{ - mtx_lock(&sf_buf_lock); - sf->ref_count--; - if (sf->ref_count == 0) { - TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); - nsfbufsused--; - /* XEN only */ - pmap_qremove(sf->kva, 1); - sf->m = NULL; - LIST_REMOVE(sf, list_entry); - /* ----- */ - if (sf_buf_alloc_want > 0) - wakeup_one(&sf_buf_freelist); - } - mtx_unlock(&sf_buf_lock); -} - -/* - * Software interrupt handler for queued VM system processing. - */ -void -swi_vm(void *dummy) -{ - if (busdma_swi_pending != 0) - busdma_swi(); -} - -/* - * Tell whether this address is in some physical memory region. - * Currently used by the kernel coredump code in order to avoid - * dumping the ``ISA memory hole'' which could cause indefinite hangs, - * or other unpredictable behaviour. - */ - -int -is_physical_memory(vm_paddr_t addr) -{ - -#ifdef DEV_ISA - /* The ISA ``memory hole''. */ - if (addr >= 0xa0000 && addr < 0x100000) - return 0; -#endif - - /* - * stuff other tests for known memory-mapped devices (PCI?) - * here - */ - - return 1; -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_bus.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,238 +0,0 @@ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bus.h> -#include <sys/malloc.h> -#include <sys/module.h> -#include <sys/kernel.h> -#include <machine/bus.h> -#include <sys/rman.h> -#include <sys/lock.h> -#include <sys/mutex.h> - -#include <machine/frame.h> -#include <machine/intr_machdep.h> -#include <machine/resource.h> - -#include <machine/xen-os.h> -#include <machine/hypervisor.h> -#include <machine/xen_intr.h> - -static MALLOC_DEFINE(M_XENDEV, "xenintrdrv", "xen system device"); - -struct xenbus_device { - struct resource_list xen_resources; -}; - -#define DEVTOXEN(dev) ((struct xenbus_device *)device_get_ivars(dev)) - -static void xenbus_identify(driver_t *, device_t); -static int xenbus_probe(device_t); -static int xenbus_attach(device_t); -static int xenbus_print_child(device_t, device_t); -static device_t xenbus_add_child(device_t bus, int order, const char *name, - int unit); -static struct resource *xenbus_alloc_resource(device_t, device_t, int, int *, - u_long, u_long, u_long, u_int); -static int xenbus_release_resource(device_t, device_t, int, int, - struct resource *); -static int xenbus_set_resource(device_t, device_t, int, int, u_long, u_long); -static int xenbus_get_resource(device_t, device_t, int, int, u_long *, u_long *); -static void xenbus_delete_resource(device_t, device_t, int, int); - - -static device_method_t xenbus_methods[] = { - /* Device interface */ - DEVMETHOD(device_identify, xenbus_identify), - DEVMETHOD(device_probe, xenbus_probe), - DEVMETHOD(device_attach, xenbus_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - - /* Bus interface */ - DEVMETHOD(bus_print_child, xenbus_print_child), - DEVMETHOD(bus_add_child, xenbus_add_child), - DEVMETHOD(bus_read_ivar, bus_generic_read_ivar), - DEVMETHOD(bus_write_ivar, bus_generic_write_ivar), - DEVMETHOD(bus_set_resource, xenbus_set_resource), - DEVMETHOD(bus_get_resource, xenbus_get_resource), - DEVMETHOD(bus_alloc_resource, xenbus_alloc_resource), - DEVMETHOD(bus_release_resource, xenbus_release_resource), - DEVMETHOD(bus_delete_resource, xenbus_delete_resource), - DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), - DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), - DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), - DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), - - { 0, 0 } -}; - - -static driver_t xenbus_driver = { - "xenbus", - xenbus_methods, - 1, /* no softc */ -}; -static devclass_t xenbus_devclass; -static device_t xenbus_dev; -static boolean_t xenbus_probe_delay = TRUE; /* delay child probes */ - -DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); - -static void -xenbus_identify(driver_t *driver, device_t parent) -{ - - /* - * Add child device with order of 0 so it gets probed - * first - */ - xenbus_dev = BUS_ADD_CHILD(parent, 0, "xenbus", 0); - if (xenbus_dev == NULL) - panic("xenbus: could not attach"); -} - -static int -xenbus_probe(device_t dev) -{ - device_set_desc(dev, "xen system"); - device_quiet(dev); - return (0); -} - -static int -xenbus_attach(device_t dev) -{ - /* - * First, let our child driver's identify any child devices that - * they can find. Once that is done attach any devices that we - * found. - */ - if (!xenbus_probe_delay) { - bus_generic_probe(dev); - bus_generic_attach(dev); - } - - return 0; -} - - -static int -xenbus_print_all_resources(device_t dev) -{ - struct xenbus_device *xdev = device_get_ivars(dev); - struct resource_list *rl = &xdev->xen_resources; - int retval = 0; - - if (SLIST_FIRST(rl)) - retval += printf(" at"); - - retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx"); - retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx"); - retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld"); - - return retval; -} - - -static int -xenbus_print_child(device_t bus, device_t child) -{ - int retval = 0; - - retval += bus_print_child_header(bus, child); - retval += xenbus_print_all_resources(child); - retval += printf(" on motherboard\n"); /* XXX "motherboard", ick */ - - return (retval); -} - -static device_t -xenbus_add_child(device_t bus, int order, const char *name, int unit) -{ - device_t child; - struct xenbus_device *xendev; - - xendev = malloc(sizeof(struct xenbus_device), M_XENDEV, - M_NOWAIT | M_ZERO); - if (!xendev) - return(0); - resource_list_init(&xendev->xen_resources); - - child = device_add_child_ordered(bus, order, name, unit); - - /* should we free this in xenbus_child_detached? */ - device_set_ivars(child, xendev); - - return(child); -} - -static struct resource * -xenbus_alloc_resource(device_t bus, device_t child, int type, int *rid, - u_long start, u_long end, u_long count, u_int flags) -{ - struct xenbus_device *xendev = DEVTOXEN(child); - struct resource_list *rl = &xendev->xen_resources; - - return (resource_list_alloc(rl, bus, child, type, rid, start, end, - count, flags)); -} - - -static int -xenbus_release_resource(device_t bus, device_t child, int type, int rid, - struct resource *r) -{ - struct xenbus_device *xendev = DEVTOXEN(child); - struct resource_list *rl = &xendev->xen_resources; - - return (resource_list_release(rl, bus, child, type, rid, r)); -} - -static int -xenbus_set_resource(device_t dev, device_t child, int type, int rid, - u_long start, u_long count) -{ - struct xenbus_device *xendev = DEVTOXEN(child); - struct resource_list *rl = &xendev->xen_resources; - - resource_list_add(rl, type, rid, start, start + count - 1, count); - return(0); -} - -static int -xenbus_get_resource(device_t dev, device_t child, int type, int rid, - u_long *startp, u_long *countp) -{ - struct xenbus_device *xendev = DEVTOXEN(child); - struct resource_list *rl = &xendev->xen_resources; - struct resource_list_entry *rle; - - rle = resource_list_find(rl, type, rid); - if (!rle) - return(ENOENT); - if (startp) - *startp = rle->start; - if (countp) - *countp = rle->count; - return(0); -} - -static void -xenbus_delete_resource(device_t dev, device_t child, int type, int rid) -{ - struct xenbus_device *xendev = DEVTOXEN(child); - struct resource_list *rl = &xendev->xen_resources; - - resource_list_delete(rl, type, rid); -} - -static void -xenbus_init(void *unused) -{ - xenbus_probe_delay = FALSE; - xenbus_attach(xenbus_dev); -} -SYSINIT(xenbusdev, SI_SUB_PSEUDO, SI_ORDER_FIRST, xenbus_init, NULL); diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,605 +0,0 @@ -/* - * - * Copyright (c) 2004 Christian Limpach. - * Copyright (c) 2004,2005 Kip Macy - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include <sys/cdefs.h> - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/mount.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/reboot.h> - - -#include <vm/vm.h> -#include <vm/pmap.h> -#include <machine/stdarg.h> -#include <machine/xenfunc.h> -#include <machine/xenpmap.h> -#include <machine/vmparam.h> -#include <machine/cpu.h> -#include <machine/xenvar.h> - -#include <sys/socket.h> -#include <sys/sockio.h> -#include <net/if.h> -#include <net/if_dl.h> -#include <net/if_types.h> -#include <net/if_var.h> -#include <net/ethernet.h> -#include <netinet/in.h> -#include <sys/mbuf.h> -#include <nfs/rpcv2.h> -#include <nfsclient/krpc.h> -#include <nfs/nfsproto.h> - - -shared_info_t *HYPERVISOR_shared_info; - -void ni_cli(void); -void ni_sti(void); -#ifdef NFS_ROOT - -static int -xdr_opaque_decode(struct mbuf **mptr, u_char *buf, int len) -{ - struct mbuf *m; - int alignedlen; - - m = *mptr; - alignedlen = ( len + 3 ) & ~3; - - if (m->m_len < alignedlen) { - m = m_pullup(m, alignedlen); - if (m == NULL) { - *mptr = NULL; - return EBADRPC; - } - } - bcopy(mtod(m, u_char *), buf, len); - m_adj(m, alignedlen); - *mptr = m; - return 0; -} - - -static int -getdec(char **ptr) -{ - char *p; - int ret; - - p = *ptr; - ret = 0; - if ((*p < '0') || (*p > '9')) - return -1; - while ((*p >= '0') && (*p <= '9')) { - ret = ret * 10 + (*p - '0'); - p++; - } - *ptr = p; - return ret; -} - -int -setinaddr(struct sockaddr_in *addr, char *ipstr) -{ - unsigned int ip; - int val; - - ip = 0; - if (((val = getdec(&ipstr)) < 0) || (val > 255)) - return 1; - ip = val << 24; - if (*ipstr != '.') - return 1; - ipstr++; - if (((val = getdec(&ipstr)) < 0) || (val > 255)) - return 1; - ip |= (val << 16); - if (*ipstr != '.') - return 1; - ipstr++; - if (((val = getdec(&ipstr)) < 0) || (val > 255)) - return 1; - ip |= (val << 8); - if (*ipstr != '.') - return 1; - ipstr++; - if (((val = getdec(&ipstr)) < 0) || (val > 255)) - return 1; - ip |= val; - - addr->sin_addr.s_addr = htonl(ip); - addr->sin_len = sizeof(struct sockaddr_in); - addr->sin_family = AF_INET; - - return 0; -} - -static int -hwaddr_to_sockaddr(char *ev, struct sockaddr_dl *sa) -{ - char *cp; - u_int32_t a[6]; - int count; - - bzero(sa, sizeof(*sa)); - sa->sdl_len = sizeof(*sa); - sa->sdl_family = AF_LINK; - sa->sdl_type = IFT_ETHER; - sa->sdl_alen = ETHER_ADDR_LEN; - if ((cp = getenv(ev)) == NULL) - return (1); - count = sscanf(cp, "%x:%x:%x:%x:%x:%x", - &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]); - freeenv(cp); - if (count != 6) - return (1); - sa->sdl_data[0] = a[0]; - sa->sdl_data[1] = a[1]; - sa->sdl_data[2] = a[2]; - sa->sdl_data[3] = a[3]; - sa->sdl_data[4] = a[4]; - sa->sdl_data[5] = a[5]; - return (0); -} -extern int in_control(struct socket *so, u_long cmd, - caddr_t data, struct ifnet *ifp, - struct thread *td); - -static int -xen_setnetwork(void) -{ - int error = 0; - struct ifaddr *ifa; - struct ifnet *ifp; - struct sockaddr_dl *sdl, ourdl; - - if (sizeof(struct sockaddr) != sizeof(struct sockaddr_in)) - panic("sizes not equal\n"); - - if (hwaddr_to_sockaddr("boot.netif.hwaddr", &ourdl)) { - printf("nfs_diskless: no hardware address\n"); - return -1; - } - - - ifa = NULL; - IFNET_RLOCK(); - TAILQ_FOREACH(ifp, &ifnet, if_link) { - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if ((ifa->ifa_addr->sa_family == AF_LINK) && - (sdl = ((struct sockaddr_dl *)ifa->ifa_addr))) { - if ((sdl->sdl_type == ourdl.sdl_type) && - (sdl->sdl_alen == ourdl.sdl_alen) && - !bcmp(sdl->sdl_data + sdl->sdl_nlen, - ourdl.sdl_data + ourdl.sdl_nlen, - sdl->sdl_alen)) { - IFNET_RUNLOCK(); - goto match_done; - } - } - } - } - IFNET_RUNLOCK(); - printf("nfs_diskless: no interface\n"); - return -1; /* no matching interface */ - match_done: - - if (getenv("boot.netif.ip") && getenv("boot.netif.gateway") && - getenv("boot.netif.netmask")) { - struct ifaliasreq ifra; - char *ip; - - bzero(&ifra, sizeof(ifra)); - strcpy(ifra.ifra_name, "xn0"); - ip = getenv("boot.netif.ip"); - setinaddr((struct sockaddr_in *)&(ifra.ifra_addr), ip); - printf("setting ip to %s\n", ip); - ip = getenv("boot.netif.netmask"); - setinaddr((struct sockaddr_in *)&ifra.ifra_mask, ip); - setinaddr((struct sockaddr_in *)&ifra.ifra_broadaddr, "255.255.255.255"); - - - if ((error = in_control(NULL, SIOCAIFADDR, (caddr_t) &ifra, ifp, curthread))) - printf("couldn't set interface address %d\n", error); -#if 0 - if ((error = xn_ioctl(ifp, SIOCSIFNETMASK, (caddr_t)&ifa))) - printf("couldn't set interface netmask %d\n", error); -#endif - } - return error; -} - -int -xen_setnfshandle(void) -{ - char *path, *ip; - u_char fhp[NFSX_V2FH]; - int error = 0; - struct sockaddr_in sin_local, *sin ; - struct mbuf *m; - - if ((error = xen_setnetwork())) - return error; - - sin = &sin_local; - - path = getenv("boot.nfsroot.path"); - ip = getenv("boot.nfsroot.server"); - - /* we aren't configured for NFS root */ - if (!path || !ip) - return 0; - - error = setinaddr(sin, ip); - if (error) { - printf("invalid ip address %s\n", ip); - return error; - } - - error = krpc_portmap(sin, RPCPROG_MNT, RPCMNT_VER1, - &sin->sin_port, curthread); - if (error) { - printf("failed to find port number for mountd\n"); - return error; - } - m = xdr_string_encode(path, strlen(path)); - - /* Do RPC to mountd */ - error = krpc_call(sin, RPCPROG_MNT, RPCMNT_VER1, - RPCMNT_MOUNT, &m, NULL, curthread); - if (error) { - printf("call to mountd failed\n"); - return error; - } - - if (xdr_opaque_decode(&m, fhp, NFSX_V2FH) != 0) { - printf("failed to decode nfs file handle\n"); - return error; - } - - setenv("boot.nfsroot.nfshandle", fhp); - - return 0; -} -#endif -void -ni_cli(void) -{ - __asm__("pushl %edx;" - "pushl %eax;" - ); - __cli(); - __asm__("popl %eax;" - "popl %edx;" - ); -} - - -void -ni_sti(void) -{ - __asm__("pushl %edx;" - "pushl %esi;" - "pushl %eax;" - ); - __sti(); - __asm__("popl %eax;" - "popl %esi;" - "popl %edx;" - ); -} - -/* - * Modify the cmd_line by converting ',' to NULLs so that it is in a format - * suitable for the static env vars. - */ -char * -xen_setbootenv(char *cmd_line) -{ - char *cmd_line_next; - - for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;); - return cmd_line; -} - -static struct -{ - const char *ev; - int mask; -} howto_names[] = { - {"boot_askname", RB_ASKNAME}, - {"boot_cdrom", RB_CDROM}, - {"boot_userconfig", RB_CONFIG}, - {"boot_ddb", RB_KDB}, - {"boot_gdb", RB_GDB}, - {"boot_gdb_pause", RB_GDB_PAUSE}, - {"boot_single", RB_SINGLE}, - {"boot_verbose", RB_VERBOSE}, - {"boot_multicons", RB_MULTIPLE}, - {"boot_serial", RB_SERIAL}, - {NULL, 0} -}; - -int -xen_boothowto(char *envp) -{ - int i, howto = 0; - - /* get equivalents from the environment */ - for (i = 0; howto_names[i].ev != NULL; i++) - if (getenv(howto_names[i].ev) != NULL) - howto |= howto_names[i].mask; - return howto; -} - -#define PRINTK_BUFSIZE 1024 -void -printk(const char *fmt, ...) -{ - __va_list ap; - int ret; - static char buf[PRINTK_BUFSIZE]; - - va_start(ap, fmt); - ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap); - va_end(ap); - buf[ret] = 0; - (void)HYPERVISOR_console_write(buf, ret); -} - - -#define XPQUEUE_SIZE 128 -#ifdef SMP -/* per-cpu queues and indices */ -static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE]; -static int xpq_idx[MAX_VIRT_CPUS]; - -#define XPQ_QUEUE xpq_queue[vcpu] -#define XPQ_IDX xpq_idx[vcpu] -#define SET_VCPU() int vcpu = smp_processor_id() -#else -static mmu_update_t xpq_queue[XPQUEUE_SIZE]; -static int xpq_idx = 0; - -#define XPQ_QUEUE xpq_queue -#define XPQ_IDX xpq_idx -#define SET_VCPU() -#endif -#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1); - - -static __inline void -_xen_flush_queue(void) -{ - SET_VCPU(); - int _xpq_idx = XPQ_IDX; - int error, i; - /* window of vulnerability here? */ - - XPQ_IDX = 0; - /* Make sure index is cleared first to avoid double updates. */ - error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE, - _xpq_idx, NULL, DOMID_SELF); - - if (__predict_false(error < 0)) { - for (i = 0; i < _xpq_idx; i++) - printk("val: %x ptr: %p\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr); - panic("Failed to execute MMU updates: %d", error); - } - -} - -void -xen_flush_queue(void) -{ - SET_VCPU(); - if (XPQ_IDX != 0) _xen_flush_queue(); -} - -static __inline void -xen_increment_idx(void) -{ - SET_VCPU(); - - XPQ_IDX++; - if (__predict_false(XPQ_IDX == XPQUEUE_SIZE)) - xen_flush_queue(); -} - -void -xen_invlpg(vm_offset_t va) -{ - struct mmuext_op op; - op.cmd = MMUEXT_INVLPG_LOCAL; - op.linear_addr = va & ~PAGE_MASK; - xen_flush_queue(); - PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void -load_cr3(uint32_t val) -{ - struct mmuext_op op; - op.cmd = MMUEXT_NEW_BASEPTR; - op.mfn = xpmap_ptom(val) >> PAGE_SHIFT; - xen_flush_queue(); - PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - - -void -xen_machphys_update(unsigned long mfn, unsigned long pfn) -{ - SET_VCPU(); - - XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; - XPQ_QUEUE[XPQ_IDX].val = pfn; - xen_increment_idx(); - _xen_flush_queue(); -} - -void -xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val) -{ - SET_VCPU(); - - XPQ_QUEUE[XPQ_IDX].ptr = (memory_t)ptr; - XPQ_QUEUE[XPQ_IDX].val = (memory_t)val; - xen_increment_idx(); -} - -void -xen_pgd_pin(unsigned long ma) -{ - struct mmuext_op op; - op.cmd = MMUEXT_PIN_L2_TABLE; - op.mfn = ma >> PAGE_SHIFT; - xen_flush_queue(); - PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void -xen_pgd_unpin(unsigned long ma) -{ - struct mmuext_op op; - op.cmd = MMUEXT_UNPIN_TABLE; - op.mfn = ma >> PAGE_SHIFT; - xen_flush_queue(); - PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void -xen_pt_pin(unsigned long ma) -{ - struct mmuext_op op; - op.cmd = MMUEXT_PIN_L1_TABLE; - op.mfn = ma >> PAGE_SHIFT; - xen_flush_queue(); - PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void -xen_pt_unpin(unsigned long ma) -{ - struct mmuext_op op; - op.cmd = MMUEXT_UNPIN_TABLE; - op.mfn = ma >> PAGE_SHIFT; - xen_flush_queue(); - PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void -xen_set_ldt(unsigned long ptr, unsigned long len) -{ - struct mmuext_op op; - op.cmd = MMUEXT_SET_LDT; - op.linear_addr = ptr; - op.nr_ents = len; - xen_flush_queue(); - PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void xen_tlb_flush(void) -{ - struct mmuext_op op; - op.cmd = MMUEXT_TLB_FLUSH_LOCAL; - xen_flush_queue(); - PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - - -/********** CODE WORTH KEEPING ABOVE HERE *****************/ - -void xen_failsafe_handler(void); - -void -xen_failsafe_handler(void) -{ - - panic("xen_failsafe_handler called!\n"); -} - - -void -xen_update_descriptor(union descriptor *table, union descriptor *entry) -{ - vm_paddr_t pa; - pt_entry_t *ptp; - uint32_t raw[2]; - - bcopy(entry, raw, 2*sizeof(int32_t)); - ptp = vtopte((vm_offset_t)table); - pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK); - if (HYPERVISOR_update_descriptor(pa, raw[0], raw[1])) - panic("HYPERVISOR_update_descriptor failed\n"); -} - - - -#if defined(XENDEBUG) -static void -xpmap_dump_pt(pt_entry_t *ptp, int p) -{ - pt_entry_t pte; - int j; - int bufpos; - - pte = xpmap_ptom((uint32_t)ptp - KERNTEXTOFF); - PRINTK(("%03x: %p(%p) %08x\n", p, ptp, (void *)pte, p << PDRSHIFT)); - - bufpos = 0; - for (j = 0; j < PTES_PER_PTP; j++) { - if ((ptp[j] & PG_V) == 0) - continue; - pte = ptp[j] /* & PG_FRAME */; - bufpos += sprintf(XBUF + bufpos, "%x:%03x:%08x ", - p, j, pte); - if (bufpos > 70) { - int k; - sprintf(XBUF + bufpos, "\n"); - PRINTK((XBUF)); - bufpos = 0; - for (k = 0; k < 1000000; k++); - } - } - if (bufpos) { - PRINTK((XBUF)); - bufpos = 0; - } -} -#endif - - diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,601 +0,0 @@ -/*- - * Copyright (c) 1993 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/include/cpufunc.h,v 1.135 2003/08/06 18:21:27 bde Exp $ - */ - -/* - * Functions to provide access to special i386 instructions. - * This in included in sys/systm.h, and that file should be - * used in preference to this. - */ - -#ifndef _MACHINE_CPUFUNC_H_ -#define _MACHINE_CPUFUNC_H_ - -#include <sys/cdefs.h> -#include <machine/psl.h> -#define NO_EXCHANGE -#include <machine/xen-os.h> -#include <machine/evtchn.h> -#include <machine/xenvar.h> -struct thread; -struct region_descriptor; - -__BEGIN_DECLS -#define readb(va) (*(volatile u_int8_t *) (va)) -#define readw(va) (*(volatile u_int16_t *) (va)) -#define readl(va) (*(volatile u_int32_t *) (va)) - -#define writeb(va, d) (*(volatile u_int8_t *) (va) = (d)) -#define writew(va, d) (*(volatile u_int16_t *) (va) = (d)) -#define writel(va, d) (*(volatile u_int32_t *) (va) = (d)) - -static __inline u_int -read_eflags(void) -{ - u_int ef; - __asm __volatile("pushfl; popl %0" : "=r" (ef)); - return (ef); -} - -static __inline void -write_eflags(u_int ef) -{ - __asm __volatile("pushl %0; popfl" : : "r" (ef)); -} -#ifdef __GNUC__ - -static __inline void -breakpoint(void) -{ - __asm __volatile("int $3"); -} - -static __inline u_int -bsfl(u_int mask) -{ - u_int result; - - __asm __volatile("bsfl %1,%0" : "=r" (result) : "rm" (mask)); - return (result); -} - -static __inline u_int -bsrl(u_int mask) -{ - u_int result; - - __asm __volatile("bsrl %1,%0" : "=r" (result) : "rm" (mask)); - return (result); -} -static __inline void -disable_intr(void) -{ - __cli(); -} -static __inline void -do_cpuid(u_int ax, u_int *p) -{ - __asm __volatile("cpuid" - : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (ax)); -} - -static __inline void -enable_intr(void) -{ - __sti(); -} - - -#define HAVE_INLINE_FFS - -static __inline int -ffs(int mask) -{ - /* - * Note that gcc-2's builtin ffs would be used if we didn't declare - * this inline or turn off the builtin. The builtin is faster but - * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later - * versions. - */ - return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1); -} - -#define HAVE_INLINE_FLS - -static __inline int -fls(int mask) -{ - return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1); -} - -static __inline void -halt(void) -{ - __asm __volatile("hlt"); -} - -#if __GNUC__ < 2 - -#define inb(port) inbv(port) -#define outb(port, data) outbv(port, data) - -#else /* __GNUC >= 2 */ - -/* - * The following complications are to get around gcc not having a - * constraint letter for the range 0..255. We still put "d" in the - * constraint because "i" isn't a valid constraint when the port - * isn't constant. This only matters for -O0 because otherwise - * the non-working version gets optimized away. - * - * Use an expression-statement instead of a conditional expression - * because gcc-2.6.0 would promote the operands of the conditional - * and produce poor code for "if ((inb(var) & const1) == const2)". - * - * The unnecessary test `(port) < 0x10000' is to generate a warning if - * the `port' has type u_short or smaller. Such types are pessimal. - * This actually only works for signed types. The range check is - * careful to avoid generating warnings. - */ -#define inb(port) __extension__ ({ \ - u_char _data; \ - if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \ - && (port) < 0x10000) \ - _data = inbc(port); \ - else \ - _data = inbv(port); \ - _data; }) - -#define outb(port, data) ( \ - __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \ - && (port) < 0x10000 \ - ? outbc(port, data) : outbv(port, data)) - -static __inline u_char -inbc(u_int port) -{ - u_char data; - - __asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port))); - return (data); -} - -static __inline void -outbc(u_int port, u_char data) -{ - __asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port))); -} - -#endif /* __GNUC <= 2 */ - -static __inline u_char -inbv(u_int port) -{ - u_char data; - /* - * We use %%dx and not %1 here because i/o is done at %dx and not at - * %edx, while gcc generates inferior code (movw instead of movl) - * if we tell it to load (u_short) port. - */ - __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); - return (data); -} - -static __inline u_int -inl(u_int port) -{ - u_int data; - - __asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port)); - return (data); -} - -static __inline void -insb(u_int port, void *addr, size_t cnt) -{ - __asm __volatile("cld; rep; insb" - : "+D" (addr), "+c" (cnt) - : "d" (port) - : "memory"); -} - -static __inline void -insw(u_int port, void *addr, size_t cnt) -{ - __asm __volatile("cld; rep; insw" - : "+D" (addr), "+c" (cnt) - : "d" (port) - : "memory"); -} - -static __inline void -insl(u_int port, void *addr, size_t cnt) -{ - __asm __volatile("cld; rep; insl" - : "+D" (addr), "+c" (cnt) - : "d" (port) - : "memory"); -} - -static __inline void -invd(void) -{ - __asm __volatile("invd"); -} - -static __inline u_short -inw(u_int port) -{ - u_short data; - - __asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port)); - return (data); -} - -static __inline void -outbv(u_int port, u_char data) -{ - u_char al; - /* - * Use an unnecessary assignment to help gcc's register allocator. - * This make a large difference for gcc-1.40 and a tiny difference - * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for - * best results. gcc-2.6.0 can't handle this. - */ - al = data; - __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); -} - -static __inline void -outl(u_int port, u_int data) -{ - /* - * outl() and outw() aren't used much so we haven't looked at - * possible micro-optimizations such as the unnecessary - * assignment for them. - */ - __asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port)); -} - -static __inline void -outsb(u_int port, const void *addr, size_t cnt) -{ - __asm __volatile("cld; rep; outsb" - : "+S" (addr), "+c" (cnt) - : "d" (port)); -} - -static __inline void -outsw(u_int port, const void *addr, size_t cnt) -{ - __asm __volatile("cld; rep; outsw" - : "+S" (addr), "+c" (cnt) - : "d" (port)); -} - -static __inline void -outsl(u_int port, const void *addr, size_t cnt) -{ - __asm __volatile("cld; rep; outsl" - : "+S" (addr), "+c" (cnt) - : "d" (port)); -} - -static __inline void -outw(u_int port, u_short data) -{ - __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port)); -} - -static __inline void -ia32_pause(void) -{ - __asm __volatile("pause"); -} - -static __inline u_int64_t -rdmsr(u_int msr) -{ - u_int64_t rv; - - __asm __volatile("rdmsr" : "=A" (rv) : "c" (msr)); - return (rv); -} - -static __inline u_int64_t -rdpmc(u_int pmc) -{ - u_int64_t rv; - - __asm __volatile("rdpmc" : "=A" (rv) : "c" (pmc)); - return (rv); -} - -static __inline u_int64_t -rdtsc(void) -{ - u_int64_t rv; - - __asm __volatile("rdtsc" : "=A" (rv)); - return (rv); -} - -static __inline void -wbinvd(void) -{ - __asm __volatile("wbinvd"); -} - -static __inline void -wrmsr(u_int msr, u_int64_t newval) -{ - __asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); -} - -static __inline u_int -rfs(void) -{ - u_int sel; - __asm __volatile("movl %%fs,%0" : "=rm" (sel)); - return (sel); -} - -static __inline u_int -rgs(void) -{ - u_int sel; - __asm __volatile("movl %%gs,%0" : "=rm" (sel)); - return (sel); -} - -static __inline void -load_fs(u_int sel) -{ - __asm __volatile("movl %0,%%fs" : : "rm" (sel)); -} - -static __inline void -load_gs(u_int sel) -{ - __asm __volatile("movl %0,%%gs" : : "rm" (sel)); -} - -/* void lidt(struct region_descriptor *addr); */ -static __inline void -lidt(struct region_descriptor *addr) -{ - __asm __volatile("lidt (%0)" : : "r" (addr)); -} - -static __inline u_int -rdr0(void) -{ - u_int data; - __asm __volatile("movl %%dr0,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr0(u_int dr0) -{ - __asm __volatile("movl %0,%%dr0" : : "r" (dr0)); -} - -static __inline u_int -rdr1(void) -{ - u_int data; - __asm __volatile("movl %%dr1,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr1(u_int dr1) -{ - __asm __volatile("movl %0,%%dr1" : : "r" (dr1)); -} - -static __inline u_int -rdr2(void) -{ - u_int data; - __asm __volatile("movl %%dr2,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr2(u_int dr2) -{ - __asm __volatile("movl %0,%%dr2" : : "r" (dr2)); -} - -static __inline u_int -rdr3(void) -{ - u_int data; - __asm __volatile("movl %%dr3,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr3(u_int dr3) -{ - __asm __volatile("movl %0,%%dr3" : : "r" (dr3)); -} - -static __inline u_int -rdr4(void) -{ - u_int data; - __asm __volatile("movl %%dr4,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr4(u_int dr4) -{ - __asm __volatile("movl %0,%%dr4" : : "r" (dr4)); -} - -static __inline u_int -rdr5(void) -{ - u_int data; - __asm __volatile("movl %%dr5,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr5(u_int dr5) -{ - __asm __volatile("movl %0,%%dr5" : : "r" (dr5)); -} - -static __inline u_int -rdr6(void) -{ - u_int data; - __asm __volatile("movl %%dr6,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr6(u_int dr6) -{ - __asm __volatile("movl %0,%%dr6" : : "r" (dr6)); -} - -static __inline u_int -rdr7(void) -{ - u_int data; - __asm __volatile("movl %%dr7,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr7(u_int dr7) -{ - __asm __volatile("movl %0,%%dr7" : : "r" (dr7)); -} - -static __inline register_t -intr_disable(void) -{ - register_t eflags; - - __save_and_cli(eflags); - return (eflags); -} - -static __inline void -intr_restore(register_t eflags) -{ - __restore_flags(eflags); -} - -#else /* !__GNUC__ */ - -int breakpoint(void); -u_int bsfl(u_int mask); -u_int bsrl(u_int mask); -void cpu_invlpg(u_int addr); -void cpu_invlpg_range(u_int start, u_int end); -void disable_intr(void); -void do_cpuid(u_int ax, u_int *p); -void enable_intr(void); -void halt(void); -u_char inb(u_int port); -u_int inl(u_int port); -void insb(u_int port, void *addr, size_t cnt); -void insl(u_int port, void *addr, size_t cnt); -void insw(u_int port, void *addr, size_t cnt); -void invd(void); -void invlpg(u_int addr); -void invlpg_range(u_int start, u_int end); -void invltlb(void); -u_short inw(u_int port); -void load_cr3(u_int cr3); -void load_cr4(u_int cr4); -void load_fs(u_int sel); -void load_gs(u_int sel); -struct region_descriptor; -void lidt(struct region_descriptor *addr); -void ltr(u_short sel); -void outb(u_int port, u_char data); -void outl(u_int port, u_int data); -void outsb(u_int port, void *addr, size_t cnt); -void outsl(u_int port, void *addr, size_t cnt); -void outsw(u_int port, void *addr, size_t cnt); -void outw(u_int port, u_short data); -void ia32_pause(void); -u_int rcr2(void); -u_int rcr3(void); -u_int rcr4(void); -u_int rfs(void); -u_int rgs(void); -u_int64_t rdmsr(u_int msr); -u_int64_t rdpmc(u_int pmc); -u_int64_t rdtsc(void); -u_int read_eflags(void); -void wbinvd(void); -void write_eflags(u_int ef); -void wrmsr(u_int msr, u_int64_t newval); -u_int rdr0(void); -void load_dr0(u_int dr0); -u_int rdr1(void); -void load_dr1(u_int dr1); -u_int rdr2(void); -void load_dr2(u_int dr2); -u_int rdr3(void); -void load_dr3(u_int dr3); -u_int rdr4(void); -void load_dr4(u_int dr4); -u_int rdr5(void); -void load_dr5(u_int dr5); -u_int rdr6(void); -void load_dr6(u_int dr6); -u_int rdr7(void); -void load_dr7(u_int dr7); -register_t intr_disable(void); -void intr_restore(register_t ef); - -#endif /* __GNUC__ */ - -void reset_dbregs(void); - -__END_DECLS - -#endif /* !_MACHINE_CPUFUNC_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,120 +0,0 @@ -/****************************************************************************** - * ctrl_if.h - * - * Management functions for special interface to the domain controller. - * - * Copyright (c) 2004, K A Fraser - */ - -#ifndef __I386_XENO__CTRL_IF_H__ -#define __I386_XENO__CTRL_IF_H__ - -#include <sys/taskqueue.h> -#include <machine/hypervisor.h> - - -typedef control_msg_t ctrl_msg_t; - -/* - * Callback function type. Called for asynchronous processing of received - * request messages, and responses to previously-transmitted request messages. - * The parameters are (@msg, @id). - * @msg: Original request/response message (not a copy). The message can be - * modified in-place by the handler (e.g., a response callback can - * turn a request message into a response message in place). The message - * is no longer accessible after the callback handler returns -- if the - * message is required to persist for longer then it must be copied. - * @id: (Response callbacks only) The 'id' that was specified when the - * original request message was queued for transmission. - */ -typedef void (*ctrl_msg_handler_t)(ctrl_msg_t *, unsigned long); - -/* - * Send @msg to the domain controller. Execute @hnd when a response is - * received, passing the response message and the specified @id. This - * operation will not block: it will return -EAGAIN if there is no space. - * Notes: - * 1. The @msg is copied if it is transmitted and so can be freed after this - * function returns. - * 2. If @hnd is NULL then no callback is executed. - */ -int ctrl_if_send_message_noblock( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id); - -/* - * Send @msg to the domain controller. Execute @hnd when a response is - * received, passing the response message and the specified @id. This - * operation will block until the message is sent, or a signal is received - * for the calling process (unless @wait_state is TASK_UNINTERRUPTIBLE). - * Notes: - * 1. The @msg is copied if it is transmitted and so can be freed after this - * function returns. - * 2. If @hnd is NULL then no callback is executed. - */ -int ctrl_if_send_message_block( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id, - long wait_state); - -/* - * Request a callback when there is /possibly/ space to immediately send a - * message to the domain controller. This function returns 0 if there is - * already space to trasnmit a message --- in this case the callback task /may/ - * still be executed. If this function returns 1 then the callback /will/ be - * executed when space becomes available. - */ -int ctrl_if_enqueue_space_callback(struct task *task); - -/* - * Send a response (@msg) to a message from the domain controller. This will - * never block. - * Notes: - * 1. The @msg is copied and so can be freed after this function returns. - * 2. The @msg may be the original request message, modified in-place. - */ -void ctrl_if_send_response(ctrl_msg_t *msg); - -/* - * Register a receiver for typed messages from the domain controller. The - * handler (@hnd) is called for every received message of specified @type. - * Returns TRUE (non-zero) if the handler was successfully registered. - * If CALLBACK_IN_BLOCKING CONTEXT is specified in @flags then callbacks will - * occur in a context in which it is safe to yield (i.e., process context). - */ -#define CALLBACK_IN_BLOCKING_CONTEXT 1 -int ctrl_if_register_receiver( - uint8_t type, - ctrl_msg_handler_t hnd, - unsigned int flags); - -/* - * Unregister a receiver for typed messages from the domain controller. The - * handler (@hnd) will not be executed after this function returns. - */ -void ctrl_if_unregister_receiver(uint8_t type, ctrl_msg_handler_t hnd); - -/* Suspend/resume notifications. */ -void ctrl_if_suspend(void); -void ctrl_if_resume(void); - - -/* - * Returns TRUE if there are no outstanding message requests at the domain - * controller. This can be used to ensure that messages have really flushed - * through when it is not possible to use the response-callback interface. - * WARNING: If other subsystems are using the control interface then this - * function might never return TRUE! - */ -int ctrl_if_transmitter_empty(void); /* !! DANGEROUS FUNCTION !! */ - -/* - * Manually discard response messages from the domain controller. - * WARNING: This is usually done automatically -- this function should only - * be called when normal interrupt mechanisms are disabled! - */ -void ctrl_if_discard_responses(void); /* !! DANGEROUS FUNCTION !! */ - -#endif /* __ASM_XEN__CONTROL_IF_H__ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,110 +0,0 @@ -/****************************************************************************** - * evtchn.h - * - * Communication via Xen event channels. - * Also definitions for the device that demuxes notifications to userspace. - * - * Copyright (c) 2004, K A Fraser - */ - -#ifndef __ASM_EVTCHN_H__ -#define __ASM_EVTCHN_H__ -#include <machine/pcpu.h> -#include <machine/hypervisor.h> -#include <machine/synch_bitops.h> -#include <machine/hypervisor-ifs.h> - -#ifdef SMP -#include <sys/param.h> /* XXX for time.h */ -#include <sys/time.h> /* XXX for pcpu.h */ -#include <sys/pcpu.h> /* XXX for PCPU_GET */ -extern int gdt_set; -static inline int -smp_processor_id(void) -{ - if (likely(gdt_set)) - return PCPU_GET(cpuid); - return 0; -} - -#else -#define smp_processor_id() 0 -#endif - -/* - * LOW-LEVEL DEFINITIONS - */ - -/* Force a proper event-channel callback from Xen. */ -void force_evtchn_callback(void); - -/* Entry point for notifications into Linux subsystems. */ -void evtchn_do_upcall(struct intrframe *frame); - -/* Entry point for notifications into the userland character device. */ -void evtchn_device_upcall(int port); - -static inline void -mask_evtchn(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - synch_set_bit(port, &s->evtchn_mask[0]); -} - -static inline void -unmask_evtchn(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()]; - - synch_clear_bit(port, &s->evtchn_mask[0]); - - /* - * The following is basically the equivalent of 'hw_resend_irq'. Just like - * a real IO-APIC we 'lose the interrupt edge' if the channel is masked. - */ - if ( synch_test_bit (port, &s->evtchn_pending[0]) && - !synch_test_and_set_bit(port>>5, &vcpu_info->evtchn_pending_sel) ) - { - s->vcpu_data[0].evtchn_upcall_pending = 1; - if ( !s->vcpu_data[0].evtchn_upcall_mask ) - force_evtchn_callback(); - } -} - -static inline void -clear_evtchn(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - synch_clear_bit(port, &s->evtchn_pending[0]); -} - -static inline void -notify_via_evtchn(int port) -{ - evtchn_op_t op; - op.cmd = EVTCHNOP_send; - op.u.send.local_port = port; - (void)HYPERVISOR_event_channel_op(&op); -} - -/* - * CHARACTER-DEVICE DEFINITIONS - */ - -#define PORT_NORMAL 0x0000 -#define PORT_EXCEPTION 0x8000 -#define PORTIDX_MASK 0x7fff - -/* /dev/xen/evtchn resides at device number major=10, minor=200 */ -#define EVTCHN_MINOR 200 - -/* /dev/xen/evtchn ioctls: */ -/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ -#define EVTCHN_RESET _IO('E', 1) -/* EVTCHN_BIND: Bind to the specified event-channel port. */ -#define EVTCHN_BIND _IO('E', 2) -/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ -#define EVTCHN_UNBIND _IO('E', 3) - -#endif /* __ASM_EVTCHN_H__ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/gnttab.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/gnttab.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,71 +0,0 @@ -/****************************************************************************** - * gnttab.h - * - * Two sets of functionality: - * 1. Granting foreign access to our memory reservation. - * 2. Accessing others' memory reservations via grant references. - * (i.e., mechanisms for both sender and recipient of grant references) - * - * Copyright (c) 2004, K A Fraser - * Copyright (c) 2005, Christopher Clark - */ - -#ifndef __ASM_GNTTAB_H__ -#define __ASM_GNTTAB_H__ - -#include <machine/hypervisor.h> -#include <machine/hypervisor-ifs.h> - -/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ -#define NR_GRANT_FRAMES 4 -#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) - -int -gnttab_grant_foreign_access( - domid_t domid, unsigned long frame, int readonly); - -void -gnttab_end_foreign_access( - grant_ref_t ref, int readonly); - -int -gnttab_grant_foreign_transfer( - domid_t domid, unsigned long pfn); - -unsigned long -gnttab_end_foreign_transfer( - grant_ref_t ref); - -int -gnttab_query_foreign_access( - grant_ref_t ref ); - -/* - * operations on reserved batches of grant references - */ -int -gnttab_alloc_grant_references( - uint16_t count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal ); - -void -gnttab_free_grant_references( - uint16_t count, grant_ref_t private_head ); - -int -gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal -); - -void -gnttab_release_grant_reference( - grant_ref_t *private_head, grant_ref_t release ); - -void -gnttab_grant_foreign_access_ref( - grant_ref_t ref, domid_t domid, unsigned long frame, int readonly); - -void -gnttab_grant_foreign_transfer_ref( - grant_ref_t, domid_t domid, unsigned long pfn); - - -#endif /* __ASM_GNTTAB_H__ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,38 +0,0 @@ -#ifndef _HYPERVISOR_IFS_H_ -#define _HYPERVISOR_IFS_H_ - -#define s8 int8_t -#define s16 int16_t -#define s32 int32_t -#define s64 int64_t - -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t - -#define CONFIG_XEN_BLKDEV_GRANT -#include <machine/xen-public/xen.h> -#include <machine/xen-public/io/domain_controller.h> -#include <machine/xen-public/io/netif.h> -#include <machine/xen-public/io/blkif.h> -#include <machine/xen-public/dom0_ops.h> -#include <machine/xen-public/event_channel.h> -#include <machine/xen-public/sched_ctl.h> -#include <machine/xen-public/physdev.h> -#include <machine/xen-public/grant_table.h> -#undef blkif_sector_t /* XXX pre-processor didn't do the */ -#define blkif_sector_t uint64_t /* right thing */ - -#undef s8 -#undef s16 -#undef s32 -#undef s64 - -#undef u8 -#undef u16 -#undef u32 -#undef u64 - - -#endif diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,448 +0,0 @@ -/****************************************************************************** - * hypervisor.h - * - * Linux-specific hypervisor handling. - * - * Copyright (c) 2002, K A Fraser - */ - -#ifndef __HYPERVISOR_H__ -#define __HYPERVISOR_H__ - - -#include <machine/hypervisor-ifs.h> -#include <machine/frame.h> -#include "opt_xen.h" - -extern start_info_t *xen_start_info; - -/* arch/xen/mm/hypervisor.c */ -/* - * NB. ptr values should be PHYSICAL, not MACHINE. 'vals' should be already - * be MACHINE addresses. - */ - -static inline void HYPERVISOR_crash(void) __dead2; - -void MULTICALL_flush_page_update_queue(void); - -#ifdef CONFIG_XEN_PHYSDEV_ACCESS -/* Allocate a contiguous empty region of low memory. Return virtual start. */ -unsigned long allocate_empty_lowmem_region(unsigned long pages); -/* Deallocate a contiguous region of low memory. Return it to the allocator. */ -void deallocate_lowmem_region(unsigned long vstart, unsigned long pages); -#endif - -typedef struct { unsigned long pte_low, pte_high; } pte_t; - -/* - * Assembler stubs for hyper-calls. - */ - -static inline int -HYPERVISOR_set_trap_table(trap_info_t *table) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table), - "b" (table) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_mmu_update(mmu_update_t *req, int count, - int *success_count, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_mmuext_op( - struct mmuext_op *op, int count, int *success_count, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; -} - - - -static inline int -HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_stack_switch), - "b" (ss), "c" (esp) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_set_callbacks( - unsigned long event_selector, unsigned long event_address, - unsigned long failsafe_selector, unsigned long failsafe_address) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks), - "b" (event_selector), "c" (event_address), - "d" (failsafe_selector), "S" (failsafe_address) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_fpu_taskswitch(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_yield(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_yield) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_block(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_block) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_shutdown(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift)) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_reboot(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift)) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_suspend(unsigned long srec) -{ - int ret; - /* NB. On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), - "S" (srec) : "memory" ); - - return ret; -} - - -static inline void -HYPERVISOR_crash(void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift)) - : "memory" ); - - for (;;) ; /* eliminate noreturn error */ - -} - -static inline long -HYPERVISOR_set_timer_op(uint64_t timeout) -{ - int ret; - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_timer_op), - "b" (timeout_lo), "c" (timeout_hi) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_dom0_op(dom0_op_t *dom0_op) -{ - int ret; - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_dom0_op), - "b" (dom0_op) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_set_debugreg(int reg, unsigned long value) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg), - "b" (reg), "c" (value) : "memory" ); - - return ret; -} - -static inline unsigned long -HYPERVISOR_get_debugreg(int reg) -{ - unsigned long ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg), - "b" (reg) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_update_descriptor( - unsigned long pa, unsigned long word1, unsigned long word2) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor), - "b" (pa), "c" (word1), "d" (word2) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_dom_mem_op(unsigned int op, - unsigned long *pages, - unsigned long nr_pages) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op), - "b" (op), "c" (pages), "d" (nr_pages) : "memory" ); - return ret; -} - -static inline int -HYPERVISOR_multicall(void *call_list, int nr_calls) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_multicall), - "b" (call_list), "c" (nr_calls) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_update_va_mapping( - unsigned long page_nr, unsigned long new_val, unsigned long flags) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), - "b" (page_nr), "c" (new_val), "d" (flags): - "memory" ); - /* XXX */ -#if 0 - if ( unlikely(ret < 0) ) - panic("Failed update VA mapping: %08lx, %08lx, %08lx", - page_nr, (new_val).pte_low, flags); -#endif - return ret; -} - -static inline int -HYPERVISOR_event_channel_op(void *op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_event_channel_op), - "b" (op) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_xen_version(int cmd) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_xen_version), - "b" (cmd) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_console_io(int cmd, int count, char *str) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_console_io), - "b" (cmd), "c" (count), "d" (str) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_console_write(char *str, int count) -{ - return HYPERVISOR_console_io(CONSOLEIO_write, count, str); -} - -static inline int -HYPERVISOR_physdev_op(void *physdev_op) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_update_va_mapping_otherdomain( - unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_update_va_mapping_otherdomain), - "1" (va), "2" ((new_val).pte_low), "3" (flags), "4" (domid) : - "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_vm_assist), - "b" (cmd), "c" (type) : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_boot_vcpu( - unsigned long vcpu, vcpu_guest_context_t *ctxt) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt) - : "memory"); - - return ret; -} - -#endif /* __HYPERVISOR_H__ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/md_var.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/md_var.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,108 +0,0 @@ -/*- - * Copyright (c) 1995 Bruce D. Evans. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the author nor the names of contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/include/md_var.h,v 1.66 2003/11/03 22:37:28 jhb Exp $ - */ - -#ifndef _MACHINE_MD_VAR_H_ -#define _MACHINE_MD_VAR_H_ - -/* - * Miscellaneous machine-dependent declarations. - */ - -extern void (*bcopy_vector)(const void *from, void *to, size_t len); -extern void (*bzero_vector)(void *buf, size_t len); -extern int (*copyin_vector)(const void *udaddr, void *kaddr, size_t len); -extern int (*copyout_vector)(const void *kaddr, void *udaddr, size_t len); - -extern long Maxmem; -extern u_int atdevbase; /* offset in virtual memory of ISA io mem */ -extern u_int basemem; /* PA of original top of base memory */ -extern int busdma_swi_pending; -extern u_int cpu_exthigh; -extern u_int cpu_feature; -extern u_int cpu_fxsr; -extern u_int cpu_high; -extern u_int cpu_id; -extern u_int cpu_procinfo; -extern char cpu_vendor[]; -extern u_int cyrix_did; -extern uint16_t *elan_mmcr; -extern char kstack[]; -#ifdef PC98 -extern int need_pre_dma_flush; -extern int need_post_dma_flush; -#endif -extern char sigcode[]; -extern int szsigcode; -#ifdef COMPAT_FREEBSD4 -extern int szfreebsd4_sigcode; -#endif -#ifdef COMPAT_43 -extern int szosigcode; -#endif - -typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); -struct thread; -struct reg; -struct fpreg; -struct dbreg; - -void bcopyb(const void *from, void *to, size_t len); -void busdma_swi(void); -void cpu_setregs(void); -void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs)); -void doreti_iret(void) __asm(__STRING(doreti_iret)); -void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); -void doreti_popl_ds(void) __asm(__STRING(doreti_popl_ds)); -void doreti_popl_ds_fault(void) __asm(__STRING(doreti_popl_ds_fault)); -void doreti_popl_es(void) __asm(__STRING(doreti_popl_es)); -void doreti_popl_es_fault(void) __asm(__STRING(doreti_popl_es_fault)); -void doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs)); -void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault)); -void scrit(void) __asm(__STRING(scrit)); -void ecrit(void) __asm(__STRING(ecrit)); -void critical_region_fixup(void) __asm(__STRING(critical_region_fixup)); -void enable_sse(void); -void fillw(int /*u_short*/ pat, void *base, size_t cnt); -void i486_bzero(void *buf, size_t len); -void i586_bcopy(const void *from, void *to, size_t len); -void i586_bzero(void *buf, size_t len); -int i586_copyin(const void *udaddr, void *kaddr, size_t len); -int i586_copyout(const void *kaddr, void *udaddr, size_t len); -void i686_pagezero(void *addr); -void sse2_pagezero(void *addr); -void init_AMD_Elan_sc520(void); -int is_physical_memory(vm_offset_t addr); -int isa_nmi(int cd); -vm_paddr_t kvtop(void *addr); -void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec); -int user_dbreg_trap(void); - -#endif /* !_MACHINE_MD_VAR_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/multicall.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/multicall.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,98 +0,0 @@ -/****************************************************************************** - * multicall.h - */ - -#ifndef __MULTICALL_H__ -#define __MULTICALL_H__ - -#include <machine/hypervisor.h> -#define MAX_MULTICALL_ENTS 8 -extern multicall_entry_t multicall_list[]; -extern int nr_multicall_ents; - -static inline void execute_multicall_list(void) -{ - if ( unlikely(nr_multicall_ents == 0) ) return; - (void)HYPERVISOR_multicall(multicall_list, nr_multicall_ents); - nr_multicall_ents = 0; -} - - -static inline void handle_edge(void) -{ - if (unlikely(nr_multicall_ents == MAX_MULTICALL_ENTS)) - execute_multicall_list(); -} - -static inline void queue_multicall0(unsigned long op) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - nr_multicall_ents = i+1; - handle_edge(); -} - -static inline void queue_multicall1(unsigned long op, unsigned long arg1) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - nr_multicall_ents = i+1; - handle_edge(); -} - -static inline void queue_multicall2( - unsigned long op, unsigned long arg1, unsigned long arg2) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - multicall_list[i].args[1] = arg2; - nr_multicall_ents = i+1; - handle_edge(); -} - -static inline void queue_multicall3( - unsigned long op, unsigned long arg1, unsigned long arg2, - unsigned long arg3) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - multicall_list[i].args[1] = arg2; - multicall_list[i].args[2] = arg3; - nr_multicall_ents = i+1; - handle_edge(); -} - -static inline void queue_multicall4( - unsigned long op, unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - multicall_list[i].args[1] = arg2; - multicall_list[i].args[2] = arg3; - multicall_list[i].args[3] = arg4; - nr_multicall_ents = i+1; - handle_edge(); -} - -static inline void queue_multicall5( - unsigned long op, unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, unsigned long arg5) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - multicall_list[i].args[1] = arg2; - multicall_list[i].args[2] = arg3; - multicall_list[i].args[3] = arg4; - multicall_list[i].args[4] = arg5; - nr_multicall_ents = i+1; - handle_edge(); -} - - -#endif /* __MULTICALL_H__ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/param.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/param.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,146 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)param.h 5.8 (Berkeley) 6/28/91 - * $FreeBSD: src/sys/i386/include/param.h,v 1.69 2003/06/14 23:23:53 alc Exp $ - */ - -/* - * Machine dependent constants for Intel 386. - */ - -/* - * Round p (pointer or byte index) up to a correctly-aligned value - * for all data types (int, long, ...). The result is unsigned int - * and must be cast to any desired pointer type. - */ -#ifndef _ALIGNBYTES -#define _ALIGNBYTES (sizeof(int) - 1) -#endif -#ifndef _ALIGN -#define _ALIGN(p) (((unsigned)(p) + _ALIGNBYTES) & ~_ALIGNBYTES) -#endif - -#ifndef _MACHINE -#define _MACHINE i386-xen -#endif -#ifndef _MACHINE_ARCH -#define _MACHINE_ARCH i386-xen -#endif - -#ifndef _NO_NAMESPACE_POLLUTION - -#ifndef _MACHINE_PARAM_H_ -#define _MACHINE_PARAM_H_ - -#ifndef MACHINE -#define MACHINE "i386" -#endif -#ifndef MACHINE_ARCH -#define MACHINE_ARCH "i386" -#endif -#define MID_MACHINE MID_I386 - -#ifdef SMP -#define MAXCPU 16 -#else -#define MAXCPU 1 -#endif /* SMP */ - -#define ALIGNBYTES _ALIGNBYTES -#define ALIGN(p) _ALIGN(p) - -#define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ -#define PAGE_SIZE (1<<PAGE_SHIFT) /* bytes/page */ -#define PAGE_MASK (PAGE_SIZE-1) -#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) - -#ifdef PAE -#define NPGPTD 4 -#define PDRSHIFT 21 /* LOG2(NBPDR) */ -#else -#define NPGPTD 1 -#define PDRSHIFT 22 /* LOG2(NBPDR) */ -#endif - -#define NBPTD (NPGPTD<<PAGE_SHIFT) -#define NPDEPTD (NBPTD/(sizeof (pd_entry_t))) -#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) -#define NBPDR (1<<PDRSHIFT) /* bytes/page dir */ -#define PDRMASK (NBPDR-1) - -#define IOPAGES 2 /* pages of i/o permission bitmap */ - -#ifndef KSTACK_PAGES -#define KSTACK_PAGES 2 /* Includes pcb! */ -#endif -#define KSTACK_GUARD_PAGES 1 /* pages of kstack guard; 0 disables */ -#define UAREA_PAGES 1 /* holds struct user WITHOUT PCB (see def.) */ - -/* - * Ceiling on amount of swblock kva space, can be changed via - * the kern.maxswzone /boot/loader.conf variable. - */ -#ifndef VM_SWZONE_SIZE_MAX -#define VM_SWZONE_SIZE_MAX (32 * 1024 * 1024) -#endif - -/* - * Ceiling on size of buffer cache (really only effects write queueing, - * the VM page cache is not effected), can be changed via - * the kern.maxbcache /boot/loader.conf variable. - */ -#ifndef VM_BCACHE_SIZE_MAX -#define VM_BCACHE_SIZE_MAX (200 * 1024 * 1024) -#endif - -/* - * Mach derived conversion macros - */ -#define trunc_page(x) ((x) & ~PAGE_MASK) -#define round_page(x) (((x) + PAGE_MASK) & ~PAGE_MASK) -#define trunc_4mpage(x) ((x) & ~PDRMASK) -#define round_4mpage(x) ((((x)) + PDRMASK) & ~PDRMASK) - -#define atop(x) ((x) >> PAGE_SHIFT) -#define ptoa(x) ((x) << PAGE_SHIFT) - -#define i386_btop(x) ((x) >> PAGE_SHIFT) -#define i386_ptob(x) ((x) << PAGE_SHIFT) - -#define pgtok(x) ((x) * (PAGE_SIZE / 1024)) - -#endif /* !_MACHINE_PARAM_H_ */ -#endif /* !_NO_NAMESPACE_POLLUTION */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/pcb.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/pcb.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,96 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)pcb.h 5.10 (Berkeley) 5/12/91 - * $FreeBSD: src/sys/i386/include/pcb.h,v 1.50 2003/09/30 08:11:36 jeff Exp $ - */ - -#ifndef _I386_PCB_H_ -#define _I386_PCB_H_ - -/* - * Intel 386 process control block - */ -#include <machine/npx.h> - -struct pcb { - int pcb_cr3; - int pcb_edi; - int pcb_esi; - int pcb_ebp; - int pcb_esp; - int pcb_eax; - int pcb_ebx; - int pcb_ecx; - int pcb_edx; - int pcb_eip; - - int pcb_dr0; - int pcb_dr1; - int pcb_dr2; - int pcb_dr3; - int pcb_dr6; - int pcb_dr7; - - union savefpu pcb_save; - u_int pcb_flags; -#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ -#define PCB_DBREGS 0x02 /* process using debug registers */ -#define PCB_NPXTRAP 0x04 /* npx trap pending */ -#define PCB_NPXINITDONE 0x08 /* fpu state is initialized */ -#define PCB_VM86CALL 0x10 /* in vm86 call */ - - caddr_t pcb_onfault; /* copyin/out fault recovery */ - int pcb_cs; - int pcb_ds; - int pcb_ss; - int pcb_es; - int pcb_gs; - int pcb_fs; - struct pcb_ext *pcb_ext; /* optional pcb extension */ - int pcb_psl; /* process status long */ - void (*pcb_switchout)(void); /* Special switchout function. */ - u_long __pcb_spare[2]; /* adjust to avoid core dump size changes */ -}; - -#ifdef _KERNEL -struct trapframe; - -void makectx(struct trapframe *, struct pcb *); - -void savectx(struct pcb *); -#endif - -#endif /* _I386_PCB_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,179 +0,0 @@ -/*- - * Copyright (c) Peter Wemm <peter@xxxxxxxxxxxxxx> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/i386/include/pcpu.h,v 1.41 2003/11/20 23:23:22 peter Exp $ - */ - -#ifndef _MACHINE_PCPU_H_ -#define _MACHINE_PCPU_H_ - -#ifdef _KERNEL - -#include <machine/segments.h> -#include <machine/tss.h> - -/* - * The SMP parts are setup in pmap.c and locore.s for the BSP, and - * mp_machdep.c sets up the data for the AP's to "see" when they awake. - * The reason for doing it via a struct is so that an array of pointers - * to each CPU's data can be set up for things like "check curproc on all - * other processors" - */ -#define PCPU_MD_FIELDS \ - struct pcpu *pc_prvspace; /* Self-reference */ \ - struct pmap *pc_curpmap; \ - struct i386tss pc_common_tss; \ - struct segment_descriptor pc_common_tssd; \ - struct segment_descriptor *pc_tss_gdt; \ - int pc_currentldt; \ - u_int pc_acpi_id; \ - u_int pc_apic_id; \ - int *pc_ipi_to_evtchn; \ - int *pc_virq_to_irq; \ - u_int pc_cr2; \ - u_int pc_pdir; \ - u_int pc_lazypmap; \ - u_int pc_rendezvous; \ - u_int pc_cpuast; \ - u_int pc_time_irq; \ - uint64_t pc_processed_system_time; - -#if defined(lint) - -extern struct pcpu *pcpup; - -#define PCPU_GET(member) (pcpup->pc_ ## member) -#define PCPU_PTR(member) (&pcpup->pc_ ## member) -#define PCPU_SET(member,value) (pcpup->pc_ ## member = (value)) - -#elif defined(__GNUC__) - -/* - * Evaluates to the byte offset of the per-cpu variable name. - */ -#define __pcpu_offset(name) \ - __offsetof(struct pcpu, name) - -/* - * Evaluates to the type of the per-cpu variable name. - */ -#define __pcpu_type(name) \ - __typeof(((struct pcpu *)0)->name) - -/* - * Evaluates to the address of the per-cpu variable name. - */ -#define __PCPU_PTR(name) __extension__ ({ \ - __pcpu_type(name) *__p; \ - \ - __asm __volatile("movl %%fs:%1,%0; addl %2,%0" \ - : "=r" (__p) \ - : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))), \ - "i" (__pcpu_offset(name))); \ - \ - __p; \ -}) - -/* - * Evaluates to the value of the per-cpu variable name. - */ -#define __PCPU_GET(name) __extension__ ({ \ - __pcpu_type(name) __result; \ - \ - if (sizeof(__result) == 1) { \ - u_char __b; \ - __asm __volatile("movb %%fs:%1,%0" \ - : "=r" (__b) \ - : "m" (*(u_char *)(__pcpu_offset(name)))); \ - __result = *(__pcpu_type(name) *)(void *)&__b; \ - } else if (sizeof(__result) == 2) { \ - u_short __w; \ - __asm __volatile("movw %%fs:%1,%0" \ - : "=r" (__w) \ - : "m" (*(u_short *)(__pcpu_offset(name)))); \ - __result = *(__pcpu_type(name) *)(void *)&__w; \ - } else if (sizeof(__result) == 4) { \ - u_int __i; \ - __asm __volatile("movl %%fs:%1,%0" \ - : "=r" (__i) \ - : "m" (*(u_int *)(__pcpu_offset(name)))); \ - __result = *(__pcpu_type(name) *)(void *)&__i; \ - } else { \ - __result = *__PCPU_PTR(name); \ - } \ - \ - __result; \ -}) - -/* - * Sets the value of the per-cpu variable name to value val. - */ -#define __PCPU_SET(name, val) { \ - __pcpu_type(name) __val = (val); \ - \ - if (sizeof(__val) == 1) { \ - u_char __b; \ - __b = *(u_char *)&__val; \ - __asm __volatile("movb %1,%%fs:%0" \ - : "=m" (*(u_char *)(__pcpu_offset(name))) \ - : "r" (__b)); \ - } else if (sizeof(__val) == 2) { \ - u_short __w; \ - __w = *(u_short *)&__val; \ - __asm __volatile("movw %1,%%fs:%0" \ - : "=m" (*(u_short *)(__pcpu_offset(name))) \ - : "r" (__w)); \ - } else if (sizeof(__val) == 4) { \ - u_int __i; \ - __i = *(u_int *)&__val; \ - __asm __volatile("movl %1,%%fs:%0" \ - : "=m" (*(u_int *)(__pcpu_offset(name))) \ - : "r" (__i)); \ - } else { \ - *__PCPU_PTR(name) = __val; \ - } \ -} - -#define PCPU_GET(member) __PCPU_GET(pc_ ## member) -#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member) -#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val) - -static __inline struct thread * -__curthread(void) -{ - struct thread *td; - - __asm __volatile("movl %%fs:0,%0" : "=r" (td)); - return (td); -} -#define curthread (__curthread()) - -#else -#error gcc or lint is required to use this file -#endif - -#endif /* _KERNEL */ - -#endif /* ! _MACHINE_PCPU_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/pmap.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,356 +0,0 @@ -/* - * Copyright (c) 1991 Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department and William Jolitz of UUNET Technologies Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Derived from hp300 version by Mike Hibler, this version by William - * Jolitz uses a recursive map [a pde points to the page directory] to - * map the page tables using the pagetables themselves. This is done to - * reduce the impact on kernel virtual memory for lots of sparse address - * space, and to reduce the cost of memory to each process. - * - * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 - * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $FreeBSD: src/sys/i386/include/pmap.h,v 1.103 2003/11/08 03:01:26 alc Exp $ - */ - -#ifndef _MACHINE_PMAP_H_ -#define _MACHINE_PMAP_H_ - -/* - * Page-directory and page-table entires follow this format, with a few - * of the fields not present here and there, depending on a lot of things. - */ - /* ---- Intel Nomenclature ---- */ -#define PG_V 0x001 /* P Valid */ -#define PG_RW 0x002 /* R/W Read/Write */ -#define PG_U 0x004 /* U/S User/Supervisor */ -#define PG_NC_PWT 0x008 /* PWT Write through */ -#define PG_NC_PCD 0x010 /* PCD Cache disable */ -#define PG_A 0x020 /* A Accessed */ -#define PG_M 0x040 /* D Dirty */ -#define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */ -#define PG_G 0x100 /* G Global */ -#define PG_AVAIL1 0x200 /* / Available for system */ -#define PG_AVAIL2 0x400 /* < programmers use */ -#define PG_AVAIL3 0x800 /* \ */ - - -/* Our various interpretations of the above */ -#define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ -#define PG_MANAGED PG_AVAIL2 -#define PG_FRAME (~((vm_paddr_t)PAGE_MASK)) -#define PG_PROT (PG_RW|PG_U) /* all protection bits . */ -#define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ - -#define PG_KERNEL (PG_V | PG_RW | PG_M | PG_A) -#define PG_KERNEL_NC (PG_KERNEL | PG_N) -#define PG_KERNEL_RO (PG_VALID | PG_M | PG_A) - -/* - * Page Protection Exception bits - */ - -#define PGEX_P 0x01 /* Protection violation vs. not present */ -#define PGEX_W 0x02 /* during a Write cycle */ -#define PGEX_U 0x04 /* access from User mode (UPL) */ -#define XEN_PAGES 16 - -/* - * Size of Kernel address space. This is the number of page table pages - * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. - * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). - */ - -#ifndef KVA_PAGES -#ifdef PAE -#define KVA_PAGES 512 -#else -#define KVA_PAGES 256 -#endif -#endif - -/* - * Pte related macros - */ -#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT))) - -#ifndef NKPT -#ifdef PAE -#define NKPT 120 /* actual number of kernel page tables */ -#else -#define NKPT 30 /* actual number of kernel page tables */ -#endif -#endif - -/* - * XEN NOTE: Xen consumes 64MB of memory, so subtract that from the number - * of page available to the kernel virutal address space. - */ -#ifndef NKPDE -#ifdef SMP -#define NKPDE (KVA_PAGES - 1 - XEN_PAGES) /* number of page tables/pde's */ -#else -#define NKPDE (KVA_PAGES - XEN_PAGES) /* number of page tables/pde's */ -#endif -#endif - -/* - * The *PTDI values control the layout of virtual memory - * - * XXX This works for now, but I am not real happy with it, I'll fix it - * right after I fix locore.s and the magic 28K hole - * - * SMP_PRIVPAGES: The per-cpu address space is 0xff80000 -> 0xffbfffff - */ - -/* - * XEN NOTE: We need to shift down the start of KVA by 64MB to account for - * Xen using the upper 64MB. - * - * The layout of VA for XenoBSD is: - * | USER | PTDPTDI | KVA | XEN | - * | 0x00000000 | 0xbfc00000 | 0xc0000000 | 0xfc000000 - 0xffffffff| - * - * Normally it is just: - * | USER | PTDPTDI | KVA | - * | 0x00000000 | 0xbfc00000 | 0xc0000000 - 0xffffffff | - */ - -#ifdef SMP -#define MPPTDI (NPDEPTD-1-XEN_PAGES) /* per cpu ptd entry */ -#define KPTDI (MPPTDI-NKPDE) /* start of kernel virtual pde's */ -#else -#define KPTDI (NPDEPTD-NKPDE-XEN_PAGES) /* start of kernel virtual pde's */ -#endif /* SMP */ - -#define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to ptd! */ - -/* - * XXX doesn't really belong here I guess... - */ -#define ISA_HOLE_START 0xa0000 -#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START) - -#ifndef LOCORE - -#include <sys/queue.h> -#include <sys/_lock.h> -#include <sys/_mutex.h> - - -typedef uint32_t pd_entry_t; -typedef uint32_t pt_entry_t; - -#define PTESHIFT (2) -#define PDESHIFT (2) - - -/* - * Address of current and alternate address space page table maps - * and directories. - */ -#ifdef _KERNEL -extern pt_entry_t PTmap[]; -extern pd_entry_t PTD[]; -extern pd_entry_t PTDpde[]; - -extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ - -#include <machine/xen-os.h> -#include <machine/xenvar.h> -#include <machine/xenpmap.h> - - -/* - * virtual address to page table entry and - * to physical address. Likewise for alternate address space. - * Note: these work recursively, thus vtopte of a pte will give - * the corresponding pde that in turn maps it. - */ -#define vtopte(va) (PTmap + i386_btop(va)) - -/* - * Given a virtual address, return the machine address of its PTE - * - */ -#define vtoptema(va) pmap_kextract_ma((vm_offset_t) vtopte(va)) - -/* - * Routine: pmap_kextract/pmap_kextract_ma - * Function: - * Extract the physical/machine page address associated - * kernel virtual address. - */ - -static __inline vm_paddr_t -pmap_kextract_ma(vm_offset_t va) -{ - vm_paddr_t ma; - if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) { - ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1)); - } else { - ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK); - } - return ma; -} - -static __inline vm_paddr_t -pmap_kextract(vm_offset_t va) -{ - return xpmap_mtop(pmap_kextract_ma(va)); -} - -#define vtophys(va) pmap_kextract(((vm_offset_t) (va))) -#define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va))) - -static __inline pt_entry_t -pte_load_clear(pt_entry_t *ptep) -{ - pt_entry_t r; - - r = PT_GET(ptep); - PT_CLEAR_VA(ptep, TRUE); - return (r); -} -static __inline pt_entry_t -pte_load_store(pt_entry_t *ptep, pt_entry_t v) -{ - pt_entry_t r; - r = PT_GET(ptep); - PT_SET_VA_MA(ptep, v, TRUE); - return (r); -} - -#define pte_store(ptep, pte) PT_SET_VA(ptep, pte, TRUE) -#define pte_clear(pte) PT_CLEAR_VA(pte, TRUE) - - -#endif /* _KERNEL */ - -/* - * Pmap stuff - */ -struct pv_entry; - -struct md_page { - int pv_list_count; - TAILQ_HEAD(,pv_entry) pv_list; -}; - -struct pmap { - struct mtx pm_mtx; - pd_entry_t *pm_pdir; /* KVA of page directory */ - TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ - u_int pm_active; /* active on cpus */ - struct pmap_statistics pm_stats; /* pmap statistics */ - LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ -}; - - -typedef struct pmap *pmap_t; - -#ifdef _KERNEL -extern struct pmap kernel_pmap_store; -#define kernel_pmap (&kernel_pmap_store) - -#define PMAP_LOCK(pmap)mtx_lock(&(pmap)->pm_mtx) -#define PMAP_LOCK_ASSERT(pmap, type) \ -mtx_assert(&(pmap)->pm_mtx, (type)) -#define PMAP_LOCK_DESTROY(pmap)mtx_destroy(&(pmap)->pm_mtx) -#define PMAP_LOCK_INIT(pmap)mtx_init(&(pmap)->pm_mtx, "pmap", \ - NULL, MTX_DEF | MTX_DUPOK) -#define PMAP_LOCKED(pmap)mtx_owned(&(pmap)->pm_mtx) -#define PMAP_MTX(pmap)(&(pmap)->pm_mtx) -#define PMAP_TRYLOCK(pmap)mtx_trylock(&(pmap)->pm_mtx) -#define PMAP_UNLOCK(pmap)mtx_unlock(&(pmap)->pm_mtx) - -#endif - -/* - * For each vm_page_t, there is a list of all currently valid virtual - * mappings of that page. An entry is a pv_entry_t, the list is pv_table. - */ -typedef struct pv_entry { - pmap_t pv_pmap; /* pmap where mapping lies */ - vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; - TAILQ_ENTRY(pv_entry) pv_plist; -} *pv_entry_t; - -#ifdef _KERNEL - -#define NPPROVMTRR 8 -#define PPRO_VMTRRphysBase0 0x200 -#define PPRO_VMTRRphysMask0 0x201 -struct ppro_vmtrr { - u_int64_t base, mask; -}; -extern struct ppro_vmtrr PPro_vmtrr[NPPROVMTRR]; - -extern caddr_t CADDR1; -extern pt_entry_t *CMAP1; -extern vm_paddr_t avail_end; -extern vm_paddr_t phys_avail[]; -extern int pseflag; -extern int pgeflag; -extern char *ptvmmap; /* poor name! */ -extern vm_offset_t virtual_avail; -extern vm_offset_t virtual_end; - -#define pmap_page_is_mapped(m)(!TAILQ_EMPTY(&(m)->md.pv_list)) - -void pmap_bootstrap(vm_paddr_t, vm_paddr_t); -void pmap_kenter(vm_offset_t va, vm_paddr_t pa); -void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa); -void *pmap_kenter_temporary(vm_paddr_t pa, int i); -void pmap_kremove(vm_offset_t); -void *pmap_mapdev(vm_paddr_t, vm_size_t); -void pmap_unmapdev(vm_offset_t, vm_size_t); -pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2; -void pmap_set_pg(void); -void pmap_invalidate_page(pmap_t, vm_offset_t); -void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); -void pmap_invalidate_all(pmap_t); -void pmap_lazyfix_action(void); - -void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len); -void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len); - - -#endif /* _KERNEL */ - -#endif /* !LOCORE */ - -#endif /* !_MACHINE_PMAP_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/segments.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/segments.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,260 +0,0 @@ -/*- - * Copyright (c) 1989, 1990 William F. Jolitz - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)segments.h 7.1 (Berkeley) 5/9/91 - * $FreeBSD: src/sys/i386/include/segments.h,v 1.36 2003/11/03 21:12:04 jhb Exp $ - */ - -#ifndef _MACHINE_SEGMENTS_H_ -#define _MACHINE_SEGMENTS_H_ - -/* - * 386 Segmentation Data Structures and definitions - * William F. Jolitz (william@xxxxxxxxxxxxxxxxxx) 6/20/1989 - */ - -/* - * Selectors - */ - -#define ISPL(s) ((s)&3) /* what is the priority level of a selector */ -#define SEL_KPL 1 /* kernel priority level */ -#define SEL_UPL 3 /* user priority level */ -#define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */ -#define SEL_LDT 4 /* local descriptor table */ -#define IDXSEL(s) (((s)>>3) & 0x1fff) /* index of selector */ -#define LSEL(s,r) (((s)<<3) | SEL_LDT | r) /* a local selector */ -#define GSEL(s,r) (((s)<<3) | r) /* a global selector */ - -/* - * Memory and System segment descriptors - */ -struct segment_descriptor { - unsigned sd_lolimit:16 ; /* segment extent (lsb) */ - unsigned sd_lobase:24 __packed; /* segment base address (lsb) */ - unsigned sd_type:5 ; /* segment type */ - unsigned sd_dpl:2 ; /* segment descriptor priority level */ - unsigned sd_p:1 ; /* segment descriptor present */ - unsigned sd_hilimit:4 ; /* segment extent (msb) */ - unsigned sd_xx:2 ; /* unused */ - unsigned sd_def32:1 ; /* default 32 vs 16 bit size */ - unsigned sd_gran:1 ; /* limit granularity (byte/page units)*/ - unsigned sd_hibase:8 ; /* segment base address (msb) */ -} ; - -/* - * Gate descriptors (e.g. indirect descriptors) - */ -struct gate_descriptor { - unsigned gd_looffset:16 ; /* gate offset (lsb) */ - unsigned gd_selector:16 ; /* gate segment selector */ - unsigned gd_stkcpy:5 ; /* number of stack wds to cpy */ - unsigned gd_xx:3 ; /* unused */ - unsigned gd_type:5 ; /* segment type */ - unsigned gd_dpl:2 ; /* segment descriptor priority level */ - unsigned gd_p:1 ; /* segment descriptor present */ - unsigned gd_hioffset:16 ; /* gate offset (msb) */ -} ; - -/* - * Generic descriptor - */ -union descriptor { - struct segment_descriptor sd; - struct gate_descriptor gd; -}; - - /* system segments and gate types */ -#define SDT_SYSNULL 0 /* system null */ -#define SDT_SYS286TSS 1 /* system 286 TSS available */ -#define SDT_SYSLDT 2 /* system local descriptor table */ -#define SDT_SYS286BSY 3 /* system 286 TSS busy */ -#define SDT_SYS286CGT 4 /* system 286 call gate */ -#define SDT_SYSTASKGT 5 /* system task gate */ -#define SDT_SYS286IGT 6 /* system 286 interrupt gate */ -#define SDT_SYS286TGT 7 /* system 286 trap gate */ -#define SDT_SYSNULL2 8 /* system null again */ -#define SDT_SYS386TSS 9 /* system 386 TSS available */ -#define SDT_SYSNULL3 10 /* system null again */ -#define SDT_SYS386BSY 11 /* system 386 TSS busy */ -#define SDT_SYS386CGT 12 /* system 386 call gate */ -#define SDT_SYSNULL4 13 /* system null again */ -#define SDT_SYS386IGT 14 /* system 386 interrupt gate */ -#define SDT_SYS386TGT 15 /* system 386 trap gate */ - - /* memory segment types */ -#define SDT_MEMRO 16 /* memory read only */ -#define SDT_MEMROA 17 /* memory read only accessed */ -#define SDT_MEMRW 18 /* memory read write */ -#define SDT_MEMRWA 19 /* memory read write accessed */ -#define SDT_MEMROD 20 /* memory read only expand dwn limit */ -#define SDT_MEMRODA 21 /* memory read only expand dwn limit accessed */ -#define SDT_MEMRWD 22 /* memory read write expand dwn limit */ -#define SDT_MEMRWDA 23 /* memory read write expand dwn limit accessed */ -#define SDT_MEME 24 /* memory execute only */ -#define SDT_MEMEA 25 /* memory execute only accessed */ -#define SDT_MEMER 26 /* memory execute read */ -#define SDT_MEMERA 27 /* memory execute read accessed */ -#define SDT_MEMEC 28 /* memory execute only conforming */ -#define SDT_MEMEAC 29 /* memory execute only accessed conforming */ -#define SDT_MEMERC 30 /* memory execute read conforming */ -#define SDT_MEMERAC 31 /* memory execute read accessed conforming */ - -/* - * Software definitions are in this convenient format, - * which are translated into inconvenient segment descriptors - * when needed to be used by the 386 hardware - */ - -struct soft_segment_descriptor { - unsigned ssd_base ; /* segment base address */ - unsigned ssd_limit ; /* segment extent */ - unsigned ssd_type:5 ; /* segment type */ - unsigned ssd_dpl:2 ; /* segment descriptor priority level */ - unsigned ssd_p:1 ; /* segment descriptor present */ - unsigned ssd_xx:4 ; /* unused */ - unsigned ssd_xx1:2 ; /* unused */ - unsigned ssd_def32:1 ; /* default 32 vs 16 bit size */ - unsigned ssd_gran:1 ; /* limit granularity (byte/page units)*/ -}; - -/* - * region descriptors, used to load gdt/idt tables before segments yet exist. - */ -struct region_descriptor { - unsigned rd_limit:16; /* segment extent */ - unsigned rd_base:32 __packed; /* base address */ -}; - -/* - * Segment Protection Exception code bits - */ - -#define SEGEX_EXT 0x01 /* recursive or externally induced */ -#define SEGEX_IDT 0x02 /* interrupt descriptor table */ -#define SEGEX_TI 0x04 /* local descriptor table */ - /* other bits are affected descriptor index */ -#define SEGEX_IDX(s) (((s)>>3)&0x1fff) - -/* - * Size of IDT table - */ - -#define NIDT 256 /* 32 reserved, 0x80 syscall, most are h/w */ -#define NRSVIDT 32 /* reserved entries for cpu exceptions */ - -/* - * Entries in the Interrupt Descriptor Table (IDT) - */ -#define IDT_DE 0 /* #DE: Divide Error */ -#define IDT_DB 1 /* #DB: Debug */ -#define IDT_NMI 2 /* Nonmaskable External Interrupt */ -#define IDT_BP 3 /* #BP: Breakpoint */ -#define IDT_OF 4 /* #OF: Overflow */ -#define IDT_BR 5 /* #BR: Bound Range Exceeded */ -#define IDT_UD 6 /* #UD: Undefined/Invalid Opcode */ -#define IDT_NM 7 /* #NM: No Math Coprocessor */ -#define IDT_DF 8 /* #DF: Double Fault */ -#define IDT_FPUGP 9 /* Coprocessor Segment Overrun */ -#define IDT_TS 10 /* #TS: Invalid TSS */ -#define IDT_NP 11 /* #NP: Segment Not Present */ -#define IDT_SS 12 /* #SS: Stack Segment Fault */ -#define IDT_GP 13 /* #GP: General Protection Fault */ -#define IDT_PF 14 /* #PF: Page Fault */ -#define IDT_MF 16 /* #MF: FPU Floating-Point Error */ -#define IDT_AC 17 /* #AC: Alignment Check */ -#define IDT_MC 18 /* #MC: Machine Check */ -#define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ -#define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ -#define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ - -/* - * Entries in the Global Descriptor Table (GDT) - */ -#define GNULL_SEL 0 /* Null Descriptor */ -#if 0 -#define GCODE_SEL 1 /* Kernel Code Descriptor */ -#define GDATA_SEL 2 /* Kernel Data Descriptor */ -#else -#define GCODE_SEL (__KERNEL_CS >> 3) /* Kernel Code Descriptor */ -#define GDATA_SEL (__KERNEL_DS >> 3) /* Kernel Data Descriptor */ -#endif -#define GPRIV_SEL 3 /* SMP Per-Processor Private Data */ -#define GPROC0_SEL 4 /* Task state process slot zero and up */ -#define GLDT_SEL 5 /* LDT - eventually one per process */ -#define GUSERLDT_SEL 6 /* User LDT */ -#define GTGATE_SEL 7 /* Process task switch gate */ -#define GBIOSLOWMEM_SEL 8 /* BIOS low memory access (must be entry 8) */ -#define GPANIC_SEL 9 /* Task state to consider panic from */ -#define GBIOSCODE32_SEL 10 /* BIOS interface (32bit Code) */ -#define GBIOSCODE16_SEL 11 /* BIOS interface (16bit Code) */ -#define GBIOSDATA_SEL 12 /* BIOS interface (Data) */ -#define GBIOSUTIL_SEL 13 /* BIOS interface (Utility) */ -#define GBIOSARGS_SEL 14 /* BIOS interface (Arguments) */ - -#define NGDT 4 - -/* - * Entries in the Local Descriptor Table (LDT) - */ -#define LSYS5CALLS_SEL 0 /* forced by intel BCS */ -#define LSYS5SIGR_SEL 1 -#define L43BSDCALLS_SEL 2 /* notyet */ -#define LUCODE_SEL 3 -#define LSOL26CALLS_SEL 4 /* Solaris >= 2.6 system call gate */ -#define LUDATA_SEL 5 -/* separate stack, es,fs,gs sels ? */ -/* #define LPOSIXCALLS_SEL 5*/ /* notyet */ -#define LBSDICALLS_SEL 16 /* BSDI system call gate */ -#define NLDT (LBSDICALLS_SEL + 1) - -#ifdef _KERNEL -extern int _default_ldt; -extern union descriptor *gdt; -extern struct soft_segment_descriptor gdt_segs[]; -extern struct gate_descriptor *idt; -extern union descriptor *ldt; -extern struct region_descriptor r_gdt, r_idt; - -void lgdt(struct region_descriptor *rdp); -void lgdt_finish(void); -void sdtossd(struct segment_descriptor *sdp, - struct soft_segment_descriptor *ssdp); -void ssdtosd(struct soft_segment_descriptor *ssdp, - struct segment_descriptor *sdp); -#endif /* _KERNEL */ - -#endif /* !_MACHINE_SEGMENTS_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,82 +0,0 @@ -#ifndef __XEN_SYNCH_BITOPS_H__ -#define __XEN_SYNCH_BITOPS_H__ - -/* - * Copyright 1992, Linus Torvalds. - * Heavily modified to provide guaranteed strong synchronisation - * when communicating with Xen or other guest OSes running on other CPUs. - */ - - -#define ADDR (*(volatile long *) addr) - -static __inline__ void synch_set_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__ ( - "lock btsl %1,%0" - : "=m" (ADDR) : "Ir" (nr) : "memory" ); -} - -static __inline__ void synch_clear_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__ ( - "lock btrl %1,%0" - : "=m" (ADDR) : "Ir" (nr) : "memory" ); -} - -static __inline__ void synch_change_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__ ( - "lock btcl %1,%0" - : "=m" (ADDR) : "Ir" (nr) : "memory" ); -} - -static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr) -{ - int oldbit; - __asm__ __volatile__ ( - "lock btsl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); - return oldbit; -} - -static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr) -{ - int oldbit; - __asm__ __volatile__ ( - "lock btrl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); - return oldbit; -} - -static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__ ( - "lock btcl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); - return oldbit; -} - -static __inline__ int synch_const_test_bit(int nr, const volatile void * addr) -{ - return ((1UL << (nr & 31)) & - (((const volatile unsigned int *) addr)[nr >> 5])) != 0; -} - -static __inline__ int synch_var_test_bit(int nr, volatile void * addr) -{ - int oldbit; - __asm__ __volatile__ ( - "btl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) ); - return oldbit; -} - -#define synch_test_bit(nr,addr) \ -(__builtin_constant_p(nr) ? \ - synch_const_test_bit((nr),(addr)) : \ - synch_var_test_bit((nr),(addr))) - -#endif /* __XEN_SYNCH_BITOPS_H__ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/trap.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/trap.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,111 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)trap.h 5.4 (Berkeley) 5/9/91 - * $FreeBSD: src/sys/i386/include/trap.h,v 1.13 2001/07/12 06:32:51 peter Exp $ - */ - -#ifndef _MACHINE_TRAP_H_ -#define _MACHINE_TRAP_H_ - -/* - * Trap type values - * also known in trap.c for name strings - */ - -#define T_PRIVINFLT 1 /* privileged instruction */ -#define T_BPTFLT 3 /* breakpoint instruction */ -#define T_ARITHTRAP 6 /* arithmetic trap */ -#define T_PROTFLT 9 /* protection fault */ -#define T_TRCTRAP 10 /* debug exception (sic) */ -#define T_PAGEFLT 12 /* page fault */ -#define T_ALIGNFLT 14 /* alignment fault */ - -#define T_NESTED 16 -#define T_HYPCALLBACK 17 /* hypervisor callback */ - - -#define T_DIVIDE 18 /* integer divide fault */ -#define T_NMI 19 /* non-maskable trap */ -#define T_OFLOW 20 /* overflow trap */ -#define T_BOUND 21 /* bound instruction fault */ -#define T_DNA 22 /* device not available fault */ -#define T_DOUBLEFLT 23 /* double fault */ -#define T_FPOPFLT 24 /* fp coprocessor operand fetch fault */ -#define T_TSSFLT 25 /* invalid tss fault */ -#define T_SEGNPFLT 26 /* segment not present fault */ -#define T_STKFLT 27 /* stack fault */ -#define T_MCHK 28 /* machine check trap */ -#define T_XMMFLT 29 /* SIMD floating-point exception */ -#define T_RESERVED 30 /* reserved (unknown) */ - -/* XXX most of the following codes aren't used, but could be. */ - -/* definitions for <sys/signal.h> */ -#define ILL_RESAD_FAULT T_RESADFLT -#define ILL_PRIVIN_FAULT T_PRIVINFLT -#define ILL_RESOP_FAULT T_RESOPFLT -#define ILL_ALIGN_FAULT T_ALIGNFLT -#define ILL_FPOP_FAULT T_FPOPFLT /* coprocessor operand fault */ - -/* portable macros for SIGFPE/ARITHTRAP */ -#define FPE_INTOVF 1 /* integer overflow */ -#define FPE_INTDIV 2 /* integer divide by zero */ -#define FPE_FLTDIV 3 /* floating point divide by zero */ -#define FPE_FLTOVF 4 /* floating point overflow */ -#define FPE_FLTUND 5 /* floating point underflow */ -#define FPE_FLTRES 6 /* floating point inexact result */ -#define FPE_FLTINV 7 /* invalid floating point operation */ -#define FPE_FLTSUB 8 /* subscript out of range */ - -/* old FreeBSD macros, deprecated */ -#define FPE_INTOVF_TRAP 0x1 /* integer overflow */ -#define FPE_INTDIV_TRAP 0x2 /* integer divide by zero */ -#define FPE_FLTDIV_TRAP 0x3 /* floating/decimal divide by zero */ -#define FPE_FLTOVF_TRAP 0x4 /* floating overflow */ -#define FPE_FLTUND_TRAP 0x5 /* floating underflow */ -#define FPE_FPU_NP_TRAP 0x6 /* floating point unit not present */ -#define FPE_SUBRNG_TRAP 0x7 /* subrange out of bounds */ - -/* codes for SIGBUS */ -#define BUS_PAGE_FAULT T_PAGEFLT /* page fault protection base */ -#define BUS_SEGNP_FAULT T_SEGNPFLT /* segment not present */ -#define BUS_STK_FAULT T_STKFLT /* stack segment */ -#define BUS_SEGM_FAULT T_RESERVED /* segment protection base */ - -/* Trap's coming from user mode */ -#define T_USER 0x100 - -#endif /* !_MACHINE_TRAP_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,141 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 1994 John S. Dyson - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 - * $FreeBSD: src/sys/i386/include/vmparam.h,v 1.37 2003/10/01 23:46:08 peter Exp $ - */ - - -#ifndef _MACHINE_VMPARAM_H_ -#define _MACHINE_VMPARAM_H_ 1 - -/* - * Machine dependent constants for 386. - */ - -#define VM_PROT_READ_IS_EXEC /* if you can read -- then you can exec */ - -/* - * Virtual memory related constants, all in bytes - */ -#define MAXTSIZ (128UL*1024*1024) /* max text size */ -#ifndef DFLDSIZ -#define DFLDSIZ (128UL*1024*1024) /* initial data size limit */ -#endif -#ifndef MAXDSIZ -#define MAXDSIZ (512UL*1024*1024) /* max data size */ -#endif -#ifndef DFLSSIZ -#define DFLSSIZ (8UL*1024*1024) /* initial stack size limit */ -#endif -#ifndef MAXSSIZ -#define MAXSSIZ (64UL*1024*1024) /* max stack size */ -#endif -#ifndef SGROWSIZ -#define SGROWSIZ (128UL*1024) /* amount to grow stack */ -#endif - -#define USRTEXT (1*PAGE_SIZE) /* base of user text XXX bogus */ - -/* - * The time for a process to be blocked before being very swappable. - * This is a number of seconds which the system takes as being a non-trivial - * amount of real time. You probably shouldn't change this; - * it is used in subtle ways (fractions and multiples of it are, that is, like - * half of a ``long time'', almost a long time, etc.) - * It is related to human patience and other factors which don't really - * change over time. - */ -#define MAXSLP 20 - - -/* - * Kernel physical load address. - */ -#ifndef KERNLOAD -#define KERNLOAD (1 << PDRSHIFT) -#endif - -/* - * Virtual addresses of things. Derived from the page directory and - * page table indexes from pmap.h for precision. - * Because of the page that is both a PD and PT, it looks a little - * messy at times, but hey, we'll do anything to save a page :-) - */ - -#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1) -#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI) - -#define KERNBASE VADDR(KPTDI, 0) - -#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI) -#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0) - -#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI, 0) - -#define USRSTACK VM_MAXUSER_ADDRESS - -#define VM_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI) -#define VM_MIN_ADDRESS ((vm_offset_t)0) - -/* virtual sizes (bytes) for various kernel submaps */ -#ifndef VM_KMEM_SIZE -#define VM_KMEM_SIZE (12 * 1024 * 1024) -#endif - -/* - * How many physical pages per KVA page allocated. - * min(max(VM_KMEM_SIZE, Physical memory/VM_KMEM_SIZE_SCALE), VM_KMEM_SIZE_MAX) - * is the total KVA space allocated for kmem_map. - */ -#ifndef VM_KMEM_SIZE_SCALE -#define VM_KMEM_SIZE_SCALE (3) -#endif - -/* - * Ceiling on amount of kmem_map kva space. - */ -#ifndef VM_KMEM_SIZE_MAX -#define VM_KMEM_SIZE_MAX (320 * 1024 * 1024) -#endif - -/* initial pagein size of beginning of executable file */ -#ifndef VM_INITIAL_PAGEIN -#define VM_INITIAL_PAGEIN 16 -#endif - -#endif /* _MACHINE_VMPARAM_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,314 +0,0 @@ -/****************************************************************************** - * os.h - * - * random collection of macros and definition - */ - -#ifndef _OS_H_ -#define _OS_H_ -#include <machine/param.h> - -#ifndef NULL -#define NULL (void *)0 -#endif - -/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented - a mechanism by which the user can annotate likely branch directions and - expect the blocks to be reordered appropriately. Define __builtin_expect - to nothing for earlier compilers. */ - -#if __GNUC__ == 2 && __GNUC_MINOR__ < 96 -#define __builtin_expect(x, expected_value) (x) -#endif - - - -/* - * These are the segment descriptors provided for us by the hypervisor. - * For now, these are hardwired -- guest OSes cannot update the GDT - * or LDT. - * - * It shouldn't be hard to support descriptor-table frobbing -- let me - * know if the BSD or XP ports require flexibility here. - */ - - -/* - * these are also defined in hypervisor-if.h but can't be pulled in as - * they are used in start of day assembly. Need to clean up the .h files - * a bit more... - */ - -#ifndef FLAT_RING1_CS -#define FLAT_RING1_CS 0x0819 -#define FLAT_RING1_DS 0x0821 -#define FLAT_RING3_CS 0x082b -#define FLAT_RING3_DS 0x0833 -#endif - -#define __KERNEL_CS FLAT_RING1_CS -#define __KERNEL_DS FLAT_RING1_DS - -/* Everything below this point is not included by assembler (.S) files. */ -#ifndef __ASSEMBLY__ -#include <sys/types.h> - -#include <machine/hypervisor-ifs.h> -void printk(const char *fmt, ...); - -/* some function prototypes */ -void trap_init(void); - -extern int preemptable; -#define preempt_disable() (preemptable = 0) -#define preempt_enable() (preemptable = 1) -#define preempt_enable_no_resched() (preemptable = 1) - - -/* - * STI/CLI equivalents. These basically set and clear the virtual - * event_enable flag in teh shared_info structure. Note that when - * the enable bit is set, there may be pending events to be handled. - * We may therefore call into do_hypervisor_callback() directly. - */ -#define likely(x) __builtin_expect((x),1) -#define unlikely(x) __builtin_expect((x),0) - - - -#define __cli() \ -do { \ - vcpu_info_t *_vcpu; \ - preempt_disable(); \ - _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \ - _vcpu->evtchn_upcall_mask = 1; \ - preempt_enable_no_resched(); \ - barrier(); \ -} while (0) - -#define __sti() \ -do { \ - vcpu_info_t *_vcpu; \ - barrier(); \ - preempt_disable(); \ - _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \ - _vcpu->evtchn_upcall_mask = 0; \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ - preempt_enable(); \ -} while (0) - - -#define __save_flags(x) \ -do { \ - vcpu_info_t *vcpu; \ - vcpu = HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \ - (x) = _vcpu->evtchn_upcall_mask; \ -} while (0) - -#define __restore_flags(x) \ -do { \ - vcpu_info_t *_vcpu; \ - barrier(); \ - preempt_disable(); \ - _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \ - if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ - preempt_enable(); \ - } else \ - preempt_enable_no_resched(); \ -} while (0) - - -#define __save_and_cli(x) \ -do { \ - vcpu_info_t *_vcpu; \ - preempt_disable(); \ - _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \ - (x) = _vcpu->evtchn_upcall_mask; \ - _vcpu->evtchn_upcall_mask = 1; \ - preempt_enable_no_resched(); \ - barrier(); \ -} while (0) - - -#define cli() __cli() -#define sti() __sti() -#define save_flags(x) __save_flags(x) -#define restore_flags(x) __restore_flags(x) -#define save_and_cli(x) __save_and_cli(x) - -#define local_irq_save(x) __save_and_cli(x) -#define local_irq_restore(x) __restore_flags(x) -#define local_irq_disable() __cli() -#define local_irq_enable() __sti() - -#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));} -#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); } - -#define mb() -#define rmb() -#define wmb() -#ifdef SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - - -/* This is a barrier for the compiler only, NOT the processor! */ -#define barrier() __asm__ __volatile__("": : :"memory") - -#define LOCK_PREFIX "" -#define LOCK "" -#define ADDR (*(volatile long *) addr) -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { volatile int counter; } atomic_t; - - - -#define xen_xchg(ptr,v) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) -struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((volatile struct __xchg_dummy *)(x)) -static __inline unsigned long __xchg(unsigned long x, volatile void * ptr, - int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("xchgb %b0,%1" - :"=q" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 2: - __asm__ __volatile__("xchgw %w0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 4: - __asm__ __volatile__("xchgl %0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - } - return x; -} - -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __inline__ int test_and_clear_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - -static __inline__ int constant_test_bit(int nr, const volatile void * addr) -{ - return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; -} - -static __inline__ int variable_test_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__( - "btl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit) - :"m" (ADDR),"Ir" (nr)); - return oldbit; -} - -#define test_bit(nr,addr) \ -(__builtin_constant_p(nr) ? \ - constant_test_bit((nr),(addr)) : \ - variable_test_bit((nr),(addr))) - - -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static __inline__ void set_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btsl %1,%0" - :"=m" (ADDR) - :"Ir" (nr)); -} - -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() - * in order to ensure changes are visible on other processors. - */ -static __inline__ void clear_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btrl %1,%0" - :"=m" (ADDR) - :"Ir" (nr)); -} - -/** - * atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_inc(atomic_t *v) -{ - __asm__ __volatile__( - LOCK "incl %0" - :"=m" (v->counter) - :"m" (v->counter)); -} - - -#define rdtscll(val) \ - __asm__ __volatile__("rdtsc" : "=A" (val)) - - -#endif /* !__ASSEMBLY__ */ - -#endif /* _OS_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,53 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */ -#ifndef _XEN_INTR_H_ -#define _XEN_INTR_H_ - -/* -* The flat IRQ space is divided into two regions: -* 1. A one-to-one mapping of real physical IRQs. This space is only used -* if we have physical device-access privilege. This region is at the -* start of the IRQ space so that existing device drivers do not need -* to be modified to translate physical IRQ numbers into our IRQ space. -* 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These -* are bound using the provided bind/unbind functions. -*/ - -#define PIRQ_BASE 0 -#define NR_PIRQS 128 - -#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) -#define NR_DYNIRQS 128 - -#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) - -#define pirq_to_irq(_x) ((_x) + PIRQ_BASE) -#define irq_to_pirq(_x) ((_x) - PIRQ_BASE) - -#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE) -#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE) - -/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */ -extern int bind_virq_to_irq(int virq); -extern void unbind_virq_from_irq(int virq); -extern int bind_evtchn_to_irq(int evtchn); -extern void unbind_evtchn_from_irq(int evtchn); -extern int bind_ipi_on_cpu_to_irq(int cpu, int ipi); -extern void unbind_ipi_on_cpu_from_irq(int cpu, int ipi); -extern void ap_evtchn_init(int cpu); - -static __inline__ int irq_cannonicalize(int irq) -{ - return (irq == 2) ? 9 : irq; -} - -extern void disable_irq(unsigned int); -extern void disable_irq_nosync(unsigned int); -extern void enable_irq(unsigned int); - -extern void irq_suspend(void); -extern void irq_resume(void); - -extern void idle_block(void); - - -#endif /* _XEN_INTR_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,87 +0,0 @@ -/* - * - * Copyright (c) 2004 Christian Limpach. - * Copyright (c) 2004,2005 Kip Macy - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _XEN_XENFUNC_H_ -#define _XEN_XENFUNC_H_ - -#include <machine/xen-os.h> -#include <machine/hypervisor.h> -#include <machine/xenpmap.h> -#include <machine/segments.h> -#include <sys/pcpu.h> -#define BKPT __asm__("int3"); -#define XPQ_CALL_DEPTH 5 -#define XPQ_CALL_COUNT 2 -#define PG_PRIV PG_AVAIL3 -typedef struct { - unsigned long pt_ref; - unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH]; -} pteinfo_t; - -extern pteinfo_t *pteinfo_list; -#ifdef XENDEBUG_LOW -#define __PRINTK(x) printk x -#else -#define __PRINTK(x) -#endif - -char *xen_setbootenv(char *cmd_line); -int xen_boothowto(char *envp); -void load_cr3(uint32_t val); -void xen_machphys_update(unsigned long, unsigned long); -void xen_update_descriptor(union descriptor *, union descriptor *); -void lldt(u_short sel); -void ap_cpu_initclocks(void); - - -/* - * Invalidate a patricular VA on all cpus - * - * N.B. Made these global for external loadable modules to reference. - */ -static __inline void -invlpg(u_int addr) -{ - xen_invlpg(addr); -} - -static __inline void -invltlb(void) -{ - xen_tlb_flush(); - -} - -#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} - -#endif /* _XEN_XENFUNC_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,180 +0,0 @@ -/* - * - * Copyright (c) 2004 Christian Limpach. - * Copyright (c) 2004,2005 Kip Macy - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _XEN_XENPMAP_H_ -#define _XEN_XENPMAP_H_ -#include <machine/xenvar.h> -void xen_invlpg(vm_offset_t); -void xen_queue_pt_update(vm_paddr_t, vm_paddr_t); -void xen_pt_switch(uint32_t); -void xen_set_ldt(unsigned long, unsigned long); -void xen_tlb_flush(void); -void xen_pgd_pin(unsigned long); -void xen_pgd_unpin(unsigned long); -void xen_pt_pin(unsigned long); -void xen_pt_unpin(unsigned long); -void xen_flush_queue(void); -void pmap_ref(pt_entry_t *pte, unsigned long ma); - - -#ifdef PMAP_DEBUG -#define PMAP_REF pmap_ref -#define PMAP_DEC_REF_PAGE pmap_dec_ref_page -#define PMAP_MARK_PRIV pmap_mark_privileged -#define PMAP_MARK_UNPRIV pmap_mark_unprivileged -#else -#define PMAP_MARK_PRIV(a) -#define PMAP_MARK_UNPRIV(a) -#define PMAP_REF(a, b) -#define PMAP_DEC_REF_PAGE(a) -#endif - -#if 0 -#define WRITABLE_PAGETABLES -#endif -#define ALWAYS_SYNC 0 - -#ifdef PT_DEBUG -#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__) -#else -#define PT_LOG() -#endif - -#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ - -#define SH_PD_SET_VA 1 -#define SH_PD_SET_VA_MA 2 -#define SH_PD_SET_VA_CLEAR 3 - -struct pmap; -void pd_set(struct pmap *pmap, vm_paddr_t *ptr, vm_paddr_t val, int type); - -#define PT_GET(_ptp) \ - (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : *(_ptp)) - -#ifdef WRITABLE_PAGETABLES -#define PT_SET_VA(_ptp,_npte,sync) do { \ - PMAP_REF((_ptp), xpmap_ptom(_npte)); \ - PT_LOG(); \ - *(_ptp) = xpmap_ptom((_npte)); \ -} while (/*CONSTCOND*/0) -#define PT_SET_VA_MA(_ptp,_npte,sync) do { \ - PMAP_REF((_ptp), (_npte)); \ - PT_LOG(); \ - *(_ptp) = (_npte); \ -} while (/*CONSTCOND*/0) -#define PT_CLEAR_VA(_ptp, sync) do { \ - PMAP_REF((pt_entry_t *)(_ptp), 0); \ - PT_LOG(); \ - *(_ptp) = 0; \ -} while (/*CONSTCOND*/0) - -#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \ - PMAP_REF((_ptp), xpmap_ptom(_npte)); \ - pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \ - PMAP_REF((_ptp), (_npte)); \ - pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \ - PMAP_REF((pt_entry_t *)(_ptp), 0); \ - pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) - - -#else /* !WRITABLE_PAGETABLES */ - -#define PT_SET_VA(_ptp,_npte,sync) do { \ - PMAP_REF((_ptp), xpmap_ptom(_npte)); \ - xen_queue_pt_update(vtomach(_ptp), \ - xpmap_ptom(_npte)); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PT_SET_VA_MA(_ptp,_npte,sync) do { \ - PMAP_REF((_ptp), (_npte)); \ - xen_queue_pt_update(vtomach(_ptp), _npte); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PT_CLEAR_VA(_ptp, sync) do { \ - PMAP_REF((pt_entry_t *)(_ptp), 0); \ - xen_queue_pt_update(vtomach(_ptp), 0); \ - if (sync || ALWAYS_SYNC) \ - xen_flush_queue(); \ -} while (/*CONSTCOND*/0) - -#define PD_SET_VA(_pmap, _ptp,_npte,sync) do { \ - PMAP_REF((_ptp), xpmap_ptom(_npte)); \ - pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PD_SET_VA_MA(_pmap, _ptp,_npte,sync) do { \ - PMAP_REF((_ptp), (_npte)); \ - pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \ - PMAP_REF((pt_entry_t *)(_ptp), 0); \ - pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \ - if (sync || ALWAYS_SYNC) xen_flush_queue(); \ -} while (/*CONSTCOND*/0) - -#endif - -#define PT_SET_MA(_va, _ma) \ - HYPERVISOR_update_va_mapping(((unsigned long)_va), \ - ((unsigned long)_ma), \ - UVMF_INVLPG| UVMF_LOCAL)\ - -#define PT_UPDATES_FLUSH() do { \ - xen_flush_queue(); \ -} while (/*CONSTCOND*/0) - - -static __inline uint32_t -xpmap_mtop(uint32_t mpa) -{ - return (((xen_machine_phys[(mpa >> PAGE_SHIFT)]) << PAGE_SHIFT) - | (mpa & ~PG_FRAME)); -} - -static __inline vm_paddr_t -xpmap_ptom(uint32_t ppa) -{ - return phystomach(ppa) | (ppa & ~PG_FRAME); -} - -#endif /* _XEN_XENPMAP_H_ */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h --- a/freebsd-5.3-xen-sparse/i386-xen/include/xenvar.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,30 +0,0 @@ -#ifndef XENVAR_H_ -#define XENVAR_H_ - -#define XBOOTUP 0x1 -#define XPMAP 0x2 -extern int xendebug_flags; -#ifndef NOXENDEBUG -#define XENPRINTF printk -#else -#define XENPRINTF(x...) -#endif -extern unsigned long *xen_phys_machine; -#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__) -#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__) -#define TRACE_DEBUG(argflags, _f, _a...) \ -if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a); - -extern unsigned long *xen_machine_phys; -#define PTOM(i) (((unsigned long *)xen_phys_machine)[i]) -#define phystomach(pa) ((((unsigned long *)xen_phys_machine)[(pa >> PAGE_SHIFT)]) << PAGE_SHIFT) -void xpq_init(void); - -struct sockaddr_in; - -int xen_setnfshandle(void); -int setinaddr(struct sockaddr_in *addr, char *ipstr); - -#define RB_GDB_PAUSE RB_RESERVED1 - -#endif diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c --- a/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1048 +0,0 @@ -/*- - * All rights reserved. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -/* - * XenoBSD block device driver - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <vm/vm.h> -#include <vm/pmap.h> - -#include <sys/bio.h> -#include <sys/bus.h> -#include <sys/conf.h> - -#include <machine/bus.h> -#include <sys/rman.h> -#include <machine/resource.h> -#include <machine/intr_machdep.h> -#include <machine/vmparam.h> - -#include <machine/hypervisor.h> -#include <machine/hypervisor-ifs.h> -#include <machine/xen-os.h> -#include <machine/xen_intr.h> -#include <machine/evtchn.h> - -#include <geom/geom_disk.h> -#include <machine/ctrl_if.h> -#include <machine/xenfunc.h> - - - -#ifdef CONFIG_XEN_BLKDEV_GRANT -#include <machine/gnttab.h> -#endif - -/* prototypes */ -struct xb_softc; -static void xb_startio(struct xb_softc *sc); -static void xb_vbdinit(void); -static void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); -static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id); -static void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp, unsigned long address); - -struct xb_softc { - device_t xb_dev; - struct disk xb_disk; /* disk params */ - struct bio_queue_head xb_bioq; /* sort queue */ - struct resource *xb_irq; - void *xb_resp_handler; - int xb_unit; - int xb_flags; - struct xb_softc *xb_next_blocked; -#define XB_OPEN (1<<0) /* drive is open (can't shut down) */ -}; - -/* Control whether runtime update of vbds is enabled. */ -#define ENABLE_VBD_UPDATE 1 - -#if ENABLE_VBD_UPDATE -static void vbd_update(void); -#else -static void vbd_update(void){}; -#endif - -#define BLKIF_STATE_CLOSED 0 -#define BLKIF_STATE_DISCONNECTED 1 -#define BLKIF_STATE_CONNECTED 2 - -static char *blkif_state_name[] = { - [BLKIF_STATE_CLOSED] = "closed", - [BLKIF_STATE_DISCONNECTED] = "disconnected", - [BLKIF_STATE_CONNECTED] = "connected", -}; - -static char * blkif_status_name[] = { - [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", - [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", - [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", - [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", -}; - -#define WPRINTK(fmt, args...) printk("[XEN] " fmt, ##args) - -static int blkif_handle; -static unsigned int blkif_state = BLKIF_STATE_CLOSED; -static unsigned int blkif_evtchn; -static unsigned int blkif_irq; - -static int blkif_control_rsp_valid; -static blkif_response_t blkif_control_rsp; - -static blkif_front_ring_t blk_ring; - -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) - -#ifdef CONFIG_XEN_BLKDEV_GRANT -static domid_t rdomid = 0; -static grant_ref_t gref_head, gref_terminal; -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) -#endif - -static struct xb_softc *xb_kick_pending_head = NULL; -static struct xb_softc *xb_kick_pending_tail = NULL; -static struct mtx blkif_io_block_lock; - -static unsigned long rec_ring_free; -blkif_request_t rec_ring[BLK_RING_SIZE]; - -/* XXX move to xb_vbd.c when VBD update support is added */ -#define MAX_VBDS 64 -static vdisk_t xb_diskinfo[MAX_VBDS]; -static int xb_ndisks; - -#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ -#define XBD_SECTOR_SHFT 9 - -static unsigned int xb_kick_pending; - -static struct mtx blkif_io_lock; - - -static int xb_recovery = 0; /* "Recovery in progress" flag. Protected - * by the blkif_io_lock */ - - -void blkif_completion(blkif_request_t *req); -void xb_response_intr(void *); - -/* XXX: This isn't supported in FreeBSD, so ignore it for now. */ -#define TASK_UNINTERRUPTIBLE 0 - -static inline int -GET_ID_FROM_FREELIST( void ) -{ - unsigned long free = rec_ring_free; - - KASSERT(free <= BLK_RING_SIZE, ("free %lu > RING_SIZE", free)); - - rec_ring_free = rec_ring[free].id; - - rec_ring[free].id = 0x0fffffee; /* debug */ - - return free; -} - -static inline void -ADD_ID_TO_FREELIST( unsigned long id ) -{ - rec_ring[id].id = rec_ring_free; - rec_ring_free = id; -} - -static inline void -translate_req_to_pfn(blkif_request_t *xreq, - blkif_request_t *req) -{ - int i; - - xreq->operation = req->operation; - xreq->nr_segments = req->nr_segments; - xreq->device = req->device; - /* preserve id */ - xreq->sector_number = req->sector_number; - - for ( i = 0; i < req->nr_segments; i++ ){ -#ifdef CONFIG_XEN_BLKDEV_GRANT - xreq->frame_and_sects[i] = req->frame_and_sects[i]; -#else - xreq->frame_and_sects[i] = xpmap_mtop(req->frame_and_sects[i]); -#endif - } -} - -static inline void translate_req_to_mfn(blkif_request_t *xreq, - blkif_request_t *req) -{ - int i; - - xreq->operation = req->operation; - xreq->nr_segments = req->nr_segments; - xreq->device = req->device; - xreq->id = req->id; /* copy id (unlike above) */ - xreq->sector_number = req->sector_number; - - for ( i = 0; i < req->nr_segments; i++ ){ -#ifdef CONFIG_XEN_BLKDEV_GRANT - xreq->frame_and_sects[i] = req->frame_and_sects[i]; -#else - xreq->frame_and_sects[i] = xpmap_ptom(req->frame_and_sects[i]); -#endif - } -} - - -static inline void flush_requests(void) -{ - RING_PUSH_REQUESTS(&blk_ring); - notify_via_evtchn(blkif_evtchn); -} - - -#if ENABLE_VBD_UPDATE -static void vbd_update() -{ - XENPRINTF(">\n"); - XENPRINTF("<\n"); -} -#endif /* ENABLE_VBD_UPDATE */ - -void -xb_response_intr(void *xsc) -{ - struct xb_softc *sc = NULL; - struct bio *bp; - blkif_response_t *bret; - RING_IDX i, rp; - unsigned long flags; - - mtx_lock_irqsave(&blkif_io_lock, flags); - - if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) || - unlikely(xb_recovery) ) { - mtx_unlock_irqrestore(&blkif_io_lock, flags); - return; - } - - rp = blk_ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - /* sometimes we seem to lose i/o. stay in the interrupt handler while - * there is stuff to process: continually recheck the response producer. - */ - process_rcvd: - for ( i = blk_ring.rsp_cons; i != (rp = blk_ring.sring->rsp_prod); i++ ) { - unsigned long id; - bret = RING_GET_RESPONSE(&blk_ring, i); - - id = bret->id; - bp = (struct bio *)rec_ring[id].id; - - blkif_completion(&rec_ring[id]); - - ADD_ID_TO_FREELIST(id); /* overwrites req */ - - switch ( bret->operation ) { - case BLKIF_OP_READ: - /* had an unaligned buffer that needs to be copied */ - if (bp->bio_driver1) - bcopy(bp->bio_data, bp->bio_driver1, bp->bio_bcount); - case BLKIF_OP_WRITE: - - /* free the copy buffer */ - if (bp->bio_driver1) { - free(bp->bio_data, M_DEVBUF); - bp->bio_data = bp->bio_driver1; - bp->bio_driver1 = NULL; - } - - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) { - XENPRINTF("Bad return from blkdev data request: %x\n", - bret->status); - bp->bio_flags |= BIO_ERROR; - } - - sc = (struct xb_softc *)bp->bio_disk->d_drv1; - - if (bp->bio_flags & BIO_ERROR) - bp->bio_error = EIO; - else - bp->bio_resid = 0; - - biodone(bp); - break; - case BLKIF_OP_PROBE: - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); - blkif_control_rsp_valid = 1; - break; - default: - panic("received invalid operation"); - break; - } - } - - blk_ring.rsp_cons = i; - - if (xb_kick_pending) { - unsigned long flags; - mtx_lock_irqsave(&blkif_io_block_lock, flags); - xb_kick_pending = FALSE; - /* Run as long as there are blocked devs or queue fills again */ - while ((NULL != xb_kick_pending_head) && (FALSE == xb_kick_pending)) { - struct xb_softc *xb_cur = xb_kick_pending_head; - xb_kick_pending_head = xb_cur->xb_next_blocked; - if(NULL == xb_kick_pending_head) { - xb_kick_pending_tail = NULL; - } - xb_cur->xb_next_blocked = NULL; - mtx_unlock_irqrestore(&blkif_io_block_lock, flags); - xb_startio(xb_cur); - mtx_lock_irqsave(&blkif_io_block_lock, flags); - } - mtx_unlock_irqrestore(&blkif_io_block_lock, flags); - - if(blk_ring.rsp_cons != blk_ring.sring->rsp_prod) { - /* Consume those, too */ - goto process_rcvd; - } - } - - mtx_unlock_irqrestore(&blkif_io_lock, flags); -} - -static int -xb_open(struct disk *dp) -{ - struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; - - if (sc == NULL) { - printk("xb%d: not found", sc->xb_unit); - return (ENXIO); - } - - /* block dev not active */ - if (blkif_state != BLKIF_STATE_CONNECTED) { - printk("xb%d: bad state: %dn", sc->xb_unit, blkif_state); - return(ENXIO); - } - - sc->xb_flags |= XB_OPEN; - return (0); -} - -static int -xb_close(struct disk *dp) -{ - struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; - - if (sc == NULL) - return (ENXIO); - sc->xb_flags &= ~XB_OPEN; - return (0); -} - -static int -xb_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) -{ - struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; - - if (sc == NULL) - return (ENXIO); - - return (ENOTTY); -} - -/* - * Dequeue buffers and place them in the shared communication ring. - * Return when no more requests can be accepted or all buffers have - * been queued. - * - * Signal XEN once the ring has been filled out. - */ -static void -xb_startio(struct xb_softc *sc) -{ - struct bio *bp; - unsigned long buffer_ma; - blkif_request_t *req; - int s, queued = 0; - unsigned long id; - unsigned int fsect, lsect; -#ifdef CONFIG_XEN_BLKDEV_GRANT - int ref; -#endif - - - if (unlikely(blkif_state != BLKIF_STATE_CONNECTED)) - return; - - s = splbio(); - - for (bp = bioq_first(&sc->xb_bioq); - bp && !RING_FULL(&blk_ring); - blk_ring.req_prod_pvt++, queued++, bp = bioq_first(&sc->xb_bioq)) { - - /* Check if the buffer is properly aligned */ - if ((vm_offset_t)bp->bio_data & PAGE_MASK) { - int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : - PAGE_SIZE; - caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, - M_WAITOK); - caddr_t alignbuf = (char *)roundup2((u_long)newbuf, align); - - /* save a copy of the current buffer */ - bp->bio_driver1 = bp->bio_data; - - /* Copy the data for a write */ - if (bp->bio_cmd == BIO_WRITE) - bcopy(bp->bio_data, alignbuf, bp->bio_bcount); - bp->bio_data = alignbuf; - } - - bioq_remove(&sc->xb_bioq, bp); - buffer_ma = vtomach(bp->bio_data); - fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; - lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1; - - KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0, - ("XEN buffer must be sector aligned")); - KASSERT(lsect <= 7, - ("XEN disk driver data cannot cross a page boundary")); - - buffer_ma &= ~PAGE_MASK; - - /* Fill out a communications ring structure. */ - req = RING_GET_REQUEST(&blk_ring, - blk_ring.req_prod_pvt); - id = GET_ID_FROM_FREELIST(); - rec_ring[id].id= (unsigned long)bp; - - req->id = id; - req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : - BLKIF_OP_WRITE; - - req->sector_number= (blkif_sector_t)bp->bio_pblkno; - req->device = xb_diskinfo[sc->xb_unit].device; - - req->nr_segments = 1; /* not doing scatter/gather since buffer - * chaining is not supported. - */ -#ifdef CONFIG_XEN_BLKDEV_GRANT - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); - KASSERT( ref != -ENOSPC, ("grant_reference failed") ); - - gnttab_grant_foreign_access_ref( - ref, - rdomid, - buffer_ma >> PAGE_SHIFT, - req->operation & 1 ); /* ??? */ - - req->frame_and_sects[0] = - (((uint32_t) ref) << 16) | (fsect << 3) | lsect; -#else - /* - * upper bits represent the machine address of the buffer and the - * lower bits is the number of sectors to be read/written. - */ - req->frame_and_sects[0] = buffer_ma | (fsect << 3) | lsect; -#endif - /* Keep a private copy so we can reissue requests when recovering. */ - translate_req_to_pfn( &rec_ring[id], req); - - } - - if (RING_FULL(&blk_ring)) { - unsigned long flags; - mtx_lock_irqsave(&blkif_io_block_lock, flags); - xb_kick_pending = TRUE; - /* If we are not already on blocked list, add us */ - if((NULL == sc->xb_next_blocked) && (xb_kick_pending_tail != sc)) { - - if(NULL == xb_kick_pending_head) { - xb_kick_pending_head = xb_kick_pending_tail = sc; - } else { - xb_kick_pending_tail->xb_next_blocked = sc; - xb_kick_pending_tail = sc; - } - } - mtx_unlock_irqrestore(&blkif_io_block_lock, flags); - } - - if (queued != 0) - flush_requests(); - splx(s); -} - -/* - * Read/write routine for a buffer. Finds the proper unit, place it on - * the sortq and kick the controller. - */ -static void -xb_strategy(struct bio *bp) -{ - struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; - int s; - - /* bogus disk? */ - if (sc == NULL) { - bp->bio_error = EINVAL; - bp->bio_flags |= BIO_ERROR; - goto bad; - } - - s = splbio(); - /* - * Place it in the queue of disk activities for this disk - */ - bioq_disksort(&sc->xb_bioq, bp); - splx(s); - - xb_startio(sc); - return; - - bad: - /* - * Correctly set the bio to indicate a failed tranfer. - */ - bp->bio_resid = bp->bio_bcount; - biodone(bp); - return; -} - - -static int -xb_create(int unit) -{ - struct xb_softc *sc; - int error = 0; - - sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK); - sc->xb_unit = unit; - sc->xb_next_blocked = NULL; - - memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); - sc->xb_disk.d_unit = unit; - sc->xb_disk.d_open = xb_open; - sc->xb_disk.d_close = xb_close; - sc->xb_disk.d_ioctl = xb_ioctl; - sc->xb_disk.d_strategy = xb_strategy; - sc->xb_disk.d_name = "xbd"; - sc->xb_disk.d_drv1 = sc; - sc->xb_disk.d_sectorsize = XBD_SECTOR_SIZE; - sc->xb_disk.d_mediasize = xb_diskinfo[sc->xb_unit].capacity - << XBD_SECTOR_SHFT; -#if 0 - sc->xb_disk.d_maxsize = DFLTPHYS; -#else /* XXX: xen can't handle large single i/o requests */ - sc->xb_disk.d_maxsize = 4096; -#endif - - XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", - xb_diskinfo[sc->xb_unit].device, sc->xb_unit, - sc->xb_disk.d_mediasize); - - disk_create(&sc->xb_disk, DISK_VERSION_00); - bioq_init(&sc->xb_bioq); - - return error; -} - -/* XXX move to xb_vbd.c when vbd update support is added */ -static void -xb_vbdinit(void) -{ - int i; - blkif_request_t req; - blkif_response_t rsp; - vdisk_t *buf; - - buf = (vdisk_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); - - /* Probe for disk information. */ - memset(&req, 0, sizeof(req)); - req.operation = BLKIF_OP_PROBE; - req.nr_segments = 1; -#ifdef CONFIG_XEN_BLKDEV_GRANT - blkif_control_probe_send(&req, &rsp, - (unsigned long)(vtomach(buf))); - -#else - req.frame_and_sects[0] = vtomach(buf) | 7; - blkif_control_send(&req, &rsp); -#endif - if ( rsp.status <= 0 ) { - printk("xb_identify: Could not identify disks (%d)\n", rsp.status); - free(buf, M_DEVBUF); - return; - } - - if ((xb_ndisks = rsp.status) > MAX_VBDS) - xb_ndisks = MAX_VBDS; - - memcpy(xb_diskinfo, buf, xb_ndisks * sizeof(vdisk_t)); - - for (i = 0; i < xb_ndisks; i++) - xb_create(i); - - free(buf, M_DEVBUF); -} - - -/***************************** COMMON CODE *******************************/ - -#ifdef CONFIG_XEN_BLKDEV_GRANT -static void -blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp, - unsigned long address) -{ - int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); - KASSERT( ref != -ENOSPC, ("couldn't get grant reference") ); - - gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 ); - - req->frame_and_sects[0] = (((uint32_t) ref) << 16) | 7; - - blkif_control_send(req, rsp); -} -#endif - -void -blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) -{ - unsigned long flags, id; - blkif_request_t *req_d; - - retry: - while ( RING_FULL(&blk_ring) ) - { - tsleep( req, PWAIT | PCATCH, "blkif", hz); - } - - mtx_lock_irqsave(&blkif_io_lock, flags); - if ( RING_FULL(&blk_ring) ) - { - mtx_unlock_irqrestore(&blkif_io_lock, flags); - goto retry; - } - - req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); - *req_d = *req; - - id = GET_ID_FROM_FREELIST(); - req_d->id = id; - rec_ring[id].id = (unsigned long) req; - - translate_req_to_pfn( &rec_ring[id], req ); - - blk_ring.req_prod_pvt++; - flush_requests(); - - mtx_unlock_irqrestore(&blkif_io_lock, flags); - - while ( !blkif_control_rsp_valid ) - { - tsleep( &blkif_control_rsp_valid, PWAIT | PCATCH, "blkif", hz); - } - - memcpy(rsp, &blkif_control_rsp, sizeof(*rsp)); - blkif_control_rsp_valid = 0; -} - - -/* Send a driver status notification to the domain controller. */ -static void -send_driver_status(int ok) -{ - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_DRIVER_STATUS, - .length = sizeof(blkif_fe_driver_status_t), - }; - blkif_fe_driver_status_t *msg = (void*)cmsg.msg; - - msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN); - - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} - -/* Tell the controller to bring up the interface. */ -static void -blkif_send_interface_connect(void) -{ - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT, - .length = sizeof(blkif_fe_interface_connect_t), - }; - blkif_fe_interface_connect_t *msg = (void*)cmsg.msg; - - msg->handle = 0; - msg->shmem_frame = (vtomach(blk_ring.sring) >> PAGE_SHIFT); - - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} - -static void -blkif_free(void) -{ - - unsigned long flags; - - printk("[XEN] Recovering virtual block device driver\n"); - - /* Prevent new requests being issued until we fix things up. */ - mtx_lock_irqsave(&blkif_io_lock, flags); - xb_recovery = 1; - blkif_state = BLKIF_STATE_DISCONNECTED; - mtx_unlock_irqrestore(&blkif_io_lock, flags); - - /* Free resources associated with old device channel. */ - if (blk_ring.sring != NULL) { - free(blk_ring.sring, M_DEVBUF); - blk_ring.sring = NULL; - } - /* free_irq(blkif_irq, NULL);*/ - blkif_irq = 0; - - unbind_evtchn_from_irq(blkif_evtchn); - blkif_evtchn = 0; -} - -static void -blkif_close(void) -{ -} - -/* Move from CLOSED to DISCONNECTED state. */ -static void -blkif_disconnect(void) -{ - if (blk_ring.sring) free(blk_ring.sring, M_DEVBUF); - blk_ring.sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); - SHARED_RING_INIT(blk_ring.sring); - FRONT_RING_INIT(&blk_ring, blk_ring.sring, PAGE_SIZE); - blkif_state = BLKIF_STATE_DISCONNECTED; - blkif_send_interface_connect(); -} - -static void -blkif_reset(void) -{ - printk("[XEN] Recovering virtual block device driver\n"); - blkif_free(); - blkif_disconnect(); -} - -static void -blkif_recover(void) -{ - - int i; - blkif_request_t *req; - - /* Hmm, requests might be re-ordered when we re-issue them. - * This will need to be fixed once we have barriers */ - - /* Stage 1 : Find active and move to safety. */ - for ( i = 0; i < BLK_RING_SIZE; i++ ) { - if ( rec_ring[i].id >= KERNBASE ) { - req = RING_GET_REQUEST(&blk_ring, - blk_ring.req_prod_pvt); - translate_req_to_mfn(req, &rec_ring[i]); - blk_ring.req_prod_pvt++; - } - } - - printk("blkfront: recovered %d descriptors\n",blk_ring.req_prod_pvt); - - /* Stage 2 : Set up shadow list. */ - for ( i = 0; i < blk_ring.req_prod_pvt; i++ ) { - req = RING_GET_REQUEST(&blk_ring, i); - rec_ring[i].id = req->id; - req->id = i; - translate_req_to_pfn(&rec_ring[i], req); - } - - /* Stage 3 : Set up free list. */ - for ( ; i < BLK_RING_SIZE; i++ ){ - rec_ring[i].id = i+1; - } - rec_ring_free = blk_ring.req_prod_pvt; - rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff; - - /* blk_ring.req_prod will be set when we flush_requests().*/ - wmb(); - - /* Switch off recovery mode, using a memory barrier to ensure that - * it's seen before we flush requests - we don't want to miss any - * interrupts. */ - xb_recovery = 0; - wmb(); - - /* Kicks things back into life. */ - flush_requests(); - - /* Now safe to left other peope use interface. */ - blkif_state = BLKIF_STATE_CONNECTED; -} - -static void -blkif_connect(blkif_fe_interface_status_t *status) -{ - int err = 0; - - blkif_evtchn = status->evtchn; - blkif_irq = bind_evtchn_to_irq(blkif_evtchn); -#ifdef CONFIG_XEN_BLKDEV_GRANT - rdomid = status->domid; -#endif - - - err = intr_add_handler("xbd", blkif_irq, - (driver_intr_t *)xb_response_intr, NULL, - INTR_TYPE_BIO | INTR_MPSAFE, NULL); - if(err){ - printk("[XEN] blkfront request_irq failed (err=%d)\n", err); - return; - } - - if ( xb_recovery ) { - blkif_recover(); - } else { - /* Probe for discs attached to the interface. */ - xb_vbdinit(); - - /* XXX: transition state after probe */ - blkif_state = BLKIF_STATE_CONNECTED; - } - - /* Kick pending requests. */ -#if 0 /* XXX: figure out sortq logic */ - mtx_lock_irq(&blkif_io_lock); - kick_pending_request_queues(); - mtx_unlock_irq(&blkif_io_lock); -#endif -} - -static void -unexpected(blkif_fe_interface_status_t *status) -{ - WPRINTK(" Unexpected blkif status %s in state %s\n", - blkif_status_name[status->status], - blkif_state_name[blkif_state]); -} - -static void -blkif_status(blkif_fe_interface_status_t *status) -{ - if (status->handle != blkif_handle) { - WPRINTK(" Invalid blkif: handle=%u", status->handle); - return; - } - - switch (status->status) { - - case BLKIF_INTERFACE_STATUS_CLOSED: - switch(blkif_state){ - case BLKIF_STATE_CLOSED: - unexpected(status); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_close(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_DISCONNECTED: - switch(blkif_state){ - case BLKIF_STATE_CLOSED: - blkif_disconnect(); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_reset(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CONNECTED: - switch(blkif_state){ - case BLKIF_STATE_CLOSED: - unexpected(status); - blkif_disconnect(); - blkif_connect(status); - break; - case BLKIF_STATE_DISCONNECTED: - blkif_connect(status); - break; - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_connect(status); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CHANGED: - switch(blkif_state){ - case BLKIF_STATE_CLOSED: - case BLKIF_STATE_DISCONNECTED: - unexpected(status); - break; - case BLKIF_STATE_CONNECTED: - vbd_update(); - break; - } - break; - - default: - WPRINTK("Invalid blkif status: %d\n", status->status); - break; - } -} - - -static void -blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - switch ( msg->subtype ) - { - case CMSG_BLKIF_FE_INTERFACE_STATUS: - if ( msg->length != sizeof(blkif_fe_interface_status_t) ) - goto parse_error; - blkif_status((blkif_fe_interface_status_t *) - &msg->msg[0]); - break; - default: - goto parse_error; - } - - ctrl_if_send_response(msg); - return; - - parse_error: - msg->length = 0; - ctrl_if_send_response(msg); -} - -static int -wait_for_blkif(void) -{ - int err = 0; - int i; - send_driver_status(1); - - /* - * We should read 'nr_interfaces' from response message and wait - * for notifications before proceeding. For now we assume that we - * will be notified of exactly one interface. - */ - for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*hz); i++ ) - { - tsleep(&blkif_state, PWAIT | PCATCH, "blkif", hz); - } - - if (blkif_state != BLKIF_STATE_CONNECTED){ - printk("[XEN] Timeout connecting block device driver!\n"); - err = -ENOSYS; - } - return err; -} - - -static void -xb_init(void *unused) -{ - int i; - - printk("[XEN] Initialising virtual block device driver\n"); - -#ifdef CONFIG_XEN_BLKDEV_GRANT - if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS, - &gref_head, &gref_terminal )) - return; - printk("Blkif frontend is using grant tables.\n"); -#endif - - xb_kick_pending = FALSE; - xb_kick_pending_head = NULL; - xb_kick_pending_tail = NULL; - - rec_ring_free = 0; - for (i = 0; i < BLK_RING_SIZE; i++) { - rec_ring[i].id = i+1; - } - rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff; - - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 0); - - wait_for_blkif(); -} - -#if 0 /* XXX not yet */ -void -blkdev_suspend(void) -{ -} - -void -blkdev_resume(void) -{ - send_driver_status(1); -} -#endif - -void -blkif_completion(blkif_request_t *req) -{ - int i; - -#ifdef CONFIG_XEN_BLKDEV_GRANT - grant_ref_t gref; - - for ( i = 0; i < req->nr_segments; i++ ) - { - gref = blkif_gref_from_fas(req->frame_and_sects[i]); - gnttab_release_grant_reference(&gref_head, gref); - } -#else - /* This is a hack to get the dirty logging bits set */ - switch ( req->operation ) - { - case BLKIF_OP_READ: - for ( i = 0; i < req->nr_segments; i++ ) - { - unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT; - unsigned long mfn = xen_phys_machine[pfn]; - xen_machphys_update(mfn, pfn); - } - break; - } -#endif -} -MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_SPIN | MTX_NOWITNESS); /* XXX how does one enroll a lock? */ - MTX_SYSINIT(ioreq_block, &blkif_io_block_lock, "BIO BLOCK LOCK", MTX_SPIN | MTX_NOWITNESS); -SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_ANY, xb_init, NULL) diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c --- a/freebsd-5.3-xen-sparse/i386-xen/xen/char/console.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,536 +0,0 @@ -#include <sys/cdefs.h> - - -#include <sys/param.h> -#include <sys/module.h> -#include <sys/systm.h> -#include <sys/consio.h> -#include <sys/proc.h> -#include <sys/uio.h> -#include <sys/tty.h> -#include <sys/systm.h> -#include <sys/taskqueue.h> -#include <sys/conf.h> -#include <sys/kernel.h> -#include <sys/bus.h> -#include <machine/stdarg.h> -#include <machine/xen-os.h> -#include <machine/hypervisor.h> -#include <machine/ctrl_if.h> -#include <sys/cons.h> - -#include "opt_ddb.h" -#ifdef DDB -#include <ddb/ddb.h> -#endif - -static char driver_name[] = "xc"; -devclass_t xc_devclass; -static void xcstart (struct tty *); -static int xcparam (struct tty *, struct termios *); -static void xcstop (struct tty *, int); -static void xc_timeout(void *); -static void xencons_tx_flush_task_routine(void *,int ); -static void __xencons_tx_flush(void); -static void xencons_rx(ctrl_msg_t *msg,unsigned long id); -static boolean_t xcons_putc(int c); - -/* switch console so that shutdown can occur gracefully */ -static void xc_shutdown(void *arg, int howto); -static int xc_mute; - -void xcons_force_flush(void); - -static cn_probe_t xccnprobe; -static cn_init_t xccninit; -static cn_getc_t xccngetc; -static cn_putc_t xccnputc; -static cn_checkc_t xccncheckc; - -#define XC_POLLTIME (hz/10) - -CONS_DRIVER(xc, xccnprobe, xccninit, NULL, xccngetc, - xccncheckc, xccnputc, NULL); - -static int xen_console_up; -static boolean_t xc_tx_task_queued; -static boolean_t xc_start_needed; -static struct callout xc_callout; -struct mtx cn_mtx; - -#define RBUF_SIZE 1024 -#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1)) -#define WBUF_SIZE 4096 -#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) -static char wbuf[WBUF_SIZE]; -static char rbuf[RBUF_SIZE]; -static int rc, rp; -static int cnsl_evt_reg; -static unsigned int wc, wp; /* write_cons, write_prod */ -static struct task xencons_tx_flush_task = { {NULL},0,0,&xencons_tx_flush_task_routine,NULL }; - - -#define CDEV_MAJOR 12 -#define XCUNIT(x) (minor(x)) -#define ISTTYOPEN(tp) ((tp) && ((tp)->t_state & TS_ISOPEN)) -#define CN_LOCK_INIT(x, _name) \ - mtx_init(&x, _name, _name, MTX_SPIN) -#define CN_LOCK(l, f) mtx_lock_irqsave(&(l), (f)) -#define CN_UNLOCK(l, f) mtx_unlock_irqrestore(&(l), (f)) -#define CN_LOCK_ASSERT(x) mtx_assert(&x, MA_OWNED) -#define CN_LOCK_DESTROY(x) mtx_destroy(&x) - - -static struct tty *xccons; - -struct xc_softc { - int xc_unit; - struct cdev *xc_dev; -}; - - -static d_open_t xcopen; -static d_close_t xcclose; -static d_ioctl_t xcioctl; - -static struct cdevsw xc_cdevsw = { - /* version */ D_VERSION_00, - /* maj */ CDEV_MAJOR, - /* flags */ D_TTY | D_NEEDGIANT, - /* name */ driver_name, - - /* open */ xcopen, - /* fdopen */ 0, - /* close */ xcclose, - /* read */ ttyread, - /* write */ ttywrite, - /* ioctl */ xcioctl, - /* poll */ ttypoll, - /* mmap */ 0, - /* strategy */ 0, - /* dump */ 0, - /* kqfilter */ ttykqfilter -}; - -static void -xccnprobe(struct consdev *cp) -{ - cp->cn_pri = CN_REMOTE; - cp->cn_tp = xccons; - sprintf(cp->cn_name, "%s0", driver_name); -} - - -static void -xccninit(struct consdev *cp) -{ - CN_LOCK_INIT(cn_mtx,"XCONS LOCK"); - -} -int -xccngetc(struct consdev *dev) -{ - int c; - if (xc_mute) - return 0; - do { - if ((c = xccncheckc(dev)) == -1) { - /* polling without sleeping in Xen doesn't work well. - * Sleeping gives other things like clock a chance to - * run - */ - tsleep(&cn_mtx, PWAIT | PCATCH, "console sleep", - XC_POLLTIME); - } - } while( c == -1 ); - return c; -} - -int -xccncheckc(struct consdev *dev) -{ - int ret = (xc_mute ? 0 : -1); - int flags; - CN_LOCK(cn_mtx, flags); - if ( (rp - rc) ){ - /* we need to return only one char */ - ret = (int)rbuf[RBUF_MASK(rc)]; - rc++; - } - CN_UNLOCK(cn_mtx, flags); - return(ret); -} - -static void -xccnputc(struct consdev *dev, int c) -{ - int flags; - CN_LOCK(cn_mtx, flags); - xcons_putc(c); - CN_UNLOCK(cn_mtx, flags); -} - -static boolean_t -xcons_putc(int c) -{ - int force_flush = xc_mute || -#ifdef DDB - db_active || -#endif - panicstr; /* we're not gonna recover, so force - * flush - */ - - if ( (wp-wc) < (WBUF_SIZE-1) ){ - if ( (wbuf[WBUF_MASK(wp++)] = c) == '\n' ) { - wbuf[WBUF_MASK(wp++)] = '\r'; - if (force_flush) - xcons_force_flush(); - } - } else if (force_flush) { - xcons_force_flush(); - - } - if (cnsl_evt_reg) - __xencons_tx_flush(); - - /* inform start path that we're pretty full */ - return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE; -} - -static void -xc_identify(driver_t *driver, device_t parent) -{ - device_t child; - child = BUS_ADD_CHILD(parent, 0, driver_name, 0); - device_set_driver(child, driver); - device_set_desc(child, "Xen Console"); -} - -static int -xc_probe(device_t dev) -{ - struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev); - - sc->xc_unit = device_get_unit(dev); - return (0); -} - -static int -xc_attach(device_t dev) -{ - struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev); - - sc->xc_dev = make_dev(&xc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "xc%r", 0); - xccons = ttymalloc(NULL); - - sc->xc_dev->si_drv1 = (void *)sc; - sc->xc_dev->si_tty = xccons; - - xccons->t_oproc = xcstart; - xccons->t_param = xcparam; - xccons->t_stop = xcstop; - xccons->t_dev = sc->xc_dev; - - callout_init(&xc_callout, 0); - - /* Ensure that we don't attach before the event channel is able to receive - * a registration. The XenBus code delays the probe/attach order until - * this has occurred. - */ - (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0); - cnsl_evt_reg = 1; - - callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons); - - /* register handler to flush console on shutdown */ - if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown, - NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) - printf("xencons: shutdown event registration failed!\n"); - - return (0); -} - -/* - * return 0 for all console input, force flush all output. - */ -static void -xc_shutdown(void *arg, int howto) -{ - xc_mute = 1; - xcons_force_flush(); - -} - -static void -xencons_rx(ctrl_msg_t *msg,unsigned long id) -{ - int i, flags; - struct tty *tp = xccons; - - CN_LOCK(cn_mtx, flags); - for ( i = 0; i < msg->length; i++ ) { - if ( xen_console_up ) - (*linesw[tp->t_line]->l_rint)(msg->msg[i], tp); - else - rbuf[RBUF_MASK(rp++)] = msg->msg[i]; - } - CN_UNLOCK(cn_mtx, flags); - msg->length = 0; - ctrl_if_send_response(msg); -} - -static void -__xencons_tx_flush(void) -{ - int sz, work_done = 0; - ctrl_msg_t msg; - - while ( wc != wp ) - { - sz = wp - wc; - if ( sz > sizeof(msg.msg) ) - sz = sizeof(msg.msg); - if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) ) - sz = WBUF_SIZE - WBUF_MASK(wc); - - msg.type = CMSG_CONSOLE; - msg.subtype = CMSG_CONSOLE_DATA; - msg.length = sz; - memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz); - - if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ){ - wc += sz; - } - else if (xc_tx_task_queued) { - /* avoid the extra enqueue check if we know we're already queued */ - break; - } else if (ctrl_if_enqueue_space_callback(&xencons_tx_flush_task)) { - xc_tx_task_queued = TRUE; - break; - } - - work_done = 1; - } - - if ( work_done && xen_console_up ) - ttwakeup(xccons); -} -static void -xencons_tx_flush_task_routine(void * data, int arg) -{ - int flags; - CN_LOCK(cn_mtx, flags); - xc_tx_task_queued = FALSE; - __xencons_tx_flush(); - CN_UNLOCK(cn_mtx, flags); -} - -int -xcopen(struct cdev *dev, int flag, int mode, struct thread *td) -{ - struct xc_softc *sc; - int unit = XCUNIT(dev); - struct tty *tp; - int s, error; - - sc = (struct xc_softc *)device_get_softc( - devclass_get_device(xc_devclass, unit)); - if (sc == NULL) - return (ENXIO); - - tp = dev->si_tty; - s = spltty(); - if (!ISTTYOPEN(tp)) { - tp->t_state |= TS_CARR_ON; - ttychars(tp); - tp->t_iflag = TTYDEF_IFLAG; - tp->t_oflag = TTYDEF_OFLAG; - tp->t_cflag = TTYDEF_CFLAG|CLOCAL; - tp->t_lflag = TTYDEF_LFLAG; - tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; - xcparam(tp, &tp->t_termios); - ttsetwater(tp); - } else if (tp->t_state & TS_XCLUDE && suser(td)) { - splx(s); - return (EBUSY); - } - splx(s); - - xen_console_up = 1; - - error = (*linesw[tp->t_line]->l_open)(dev, tp); - - return error; -} - -int -xcclose(struct cdev *dev, int flag, int mode, struct thread *td) -{ - struct tty *tp = dev->si_tty; - - if (tp == NULL) - return (0); - xen_console_up = 0; - - spltty(); - (*linesw[tp->t_line]->l_close)(tp, flag); - tty_close(tp); - spl0(); - return (0); -} - - -int -xcioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) -{ - struct tty *tp = dev->si_tty; - int error; - - error = (*linesw[tp->t_line]->l_ioctl)(tp, cmd, data, flag, td); - if (error != ENOIOCTL) - return (error); - error = ttioctl(tp, cmd, data, flag); - if (error != ENOIOCTL) - return (error); - return (ENOTTY); -} - -static inline int -__xencons_put_char(int ch) -{ - char _ch = (char)ch; - if ( (wp - wc) == WBUF_SIZE ) - return 0; - wbuf[WBUF_MASK(wp++)] = _ch; - return 1; -} - - -static void -xcstart(struct tty *tp) -{ - int flags; - int s; - boolean_t cons_full = FALSE; - - s = spltty(); - CN_LOCK(cn_mtx, flags); - if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { - ttwwakeup(tp); - CN_UNLOCK(cn_mtx, flags); - return; - } - - tp->t_state |= TS_BUSY; - while (tp->t_outq.c_cc != 0 && !cons_full) - cons_full = xcons_putc(getc(&tp->t_outq)); - - /* if the console is close to full leave our state as busy */ - if (!cons_full) { - tp->t_state &= ~TS_BUSY; - ttwwakeup(tp); - } else { - /* let the timeout kick us in a bit */ - xc_start_needed = TRUE; - } - CN_UNLOCK(cn_mtx, flags); - splx(s); -} - -static void -xcstop(struct tty *tp, int flag) -{ - - if (tp->t_state & TS_BUSY) { - if ((tp->t_state & TS_TTSTOP) == 0) { - tp->t_state |= TS_FLUSH; - } - } -} - -static void -xc_timeout(void *v) -{ - struct tty *tp; - int c; - - tp = (struct tty *)v; - - while ((c = xccncheckc(NULL)) != -1) { - if (tp->t_state & TS_ISOPEN) { - (*linesw[tp->t_line]->l_rint)(c, tp); - } - } - - if (xc_start_needed) { - xc_start_needed = FALSE; - xcstart(tp); - } - - callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp); -} - -/* - * Set line parameters. - */ -int -xcparam(struct tty *tp, struct termios *t) -{ - tp->t_ispeed = t->c_ispeed; - tp->t_ospeed = t->c_ospeed; - tp->t_cflag = t->c_cflag; - return (0); -} - - -static device_method_t xc_methods[] = { - DEVMETHOD(device_identify, xc_identify), - DEVMETHOD(device_probe, xc_probe), - DEVMETHOD(device_attach, xc_attach), - {0, 0} -}; - -static driver_t xc_driver = { - driver_name, - xc_methods, - sizeof(struct xc_softc), -}; - -/*** Forcibly flush console data before dying. ***/ -void -xcons_force_flush(void) -{ - ctrl_msg_t msg; - int sz; - - /* - * We use dangerous control-interface functions that require a quiescent - * system and no interrupts. Try to ensure this with a global cli(). - */ - cli(); - - /* Spin until console data is flushed through to the domain controller. */ - while ( (wc != wp) && !ctrl_if_transmitter_empty() ) - { - /* Interrupts are disabled -- we must manually reap responses. */ - ctrl_if_discard_responses(); - - if ( (sz = wp - wc) == 0 ) - continue; - if ( sz > sizeof(msg.msg) ) - sz = sizeof(msg.msg); - if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) ) - sz = WBUF_SIZE - WBUF_MASK(wc); - - msg.type = CMSG_CONSOLE; - msg.subtype = CMSG_CONSOLE_DATA; - msg.length = sz; - memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz); - - if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ) - wc += sz; - } -} - -DRIVER_MODULE(xc, xenbus, xc_driver, xc_devclass, 0, 0); diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c --- a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,412 +0,0 @@ -/****************************************************************************** - * evtchn.c - * - * Xenolinux driver for receiving and demuxing event-channel signals. - * - * Copyright (c) 2004, K A Fraser - */ -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/uio.h> -#include <sys/bus.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/mutex.h> -#include <sys/selinfo.h> -#include <sys/poll.h> -#include <sys/conf.h> -#include <sys/fcntl.h> -#include <sys/ioccom.h> - -#include <machine/cpufunc.h> -#include <machine/intr_machdep.h> -#include <machine/xen-os.h> -#include <machine/xen_intr.h> -#include <machine/bus.h> -#include <sys/rman.h> -#include <machine/resource.h> -#include <machine/synch_bitops.h> - -#include <machine/hypervisor.h> - - -typedef struct evtchn_sotfc { - - struct selinfo ev_rsel; -} evtchn_softc_t; - - -#ifdef linuxcrap -/* NB. This must be shared amongst drivers if more things go in /dev/xen */ -static devfs_handle_t xen_dev_dir; -#endif - -/* Only one process may open /dev/xen/evtchn at any time. */ -static unsigned long evtchn_dev_inuse; - -/* Notification ring, accessed via /dev/xen/evtchn. */ - -#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */ - -#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) -static uint16_t *ring; -static unsigned int ring_cons, ring_prod, ring_overflow; - -/* Which ports is user-space bound to? */ -static uint32_t bound_ports[32]; - -/* Unique address for processes to sleep on */ -static void *evtchn_waddr = ˚ - -static struct mtx lock, upcall_lock; - -static d_read_t evtchn_read; -static d_write_t evtchn_write; -static d_ioctl_t evtchn_ioctl; -static d_poll_t evtchn_poll; -static d_open_t evtchn_open; -static d_close_t evtchn_close; - - -void -evtchn_device_upcall(int port) -{ - mtx_lock(&upcall_lock); - - mask_evtchn(port); - clear_evtchn(port); - - if ( ring != NULL ) { - if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) { - ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port; - if ( ring_cons == ring_prod++ ) { - wakeup(evtchn_waddr); - } - } - else { - ring_overflow = 1; - } - } - - mtx_unlock(&upcall_lock); -} - -static void -__evtchn_reset_buffer_ring(void) -{ - /* Initialise the ring to empty. Clear errors. */ - ring_cons = ring_prod = ring_overflow = 0; -} - -static int -evtchn_read(struct cdev *dev, struct uio *uio, int ioflag) -{ - int rc; - unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0; - count = uio->uio_resid; - - count &= ~1; /* even number of bytes */ - - if ( count == 0 ) - { - rc = 0; - goto out; - } - - if ( count > PAGE_SIZE ) - count = PAGE_SIZE; - - for ( ; ; ) { - if ( (c = ring_cons) != (p = ring_prod) ) - break; - - if ( ring_overflow ) { - rc = EFBIG; - goto out; - } - - if (sst != 0) { - rc = EINTR; - goto out; - } - - /* PCATCH == check for signals before and after sleeping - * PWAIT == priority of waiting on resource - */ - sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10); - } - - /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ - if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) { - bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t); - bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t); - } - else { - bytes1 = (p - c) * sizeof(uint16_t); - bytes2 = 0; - } - - /* Truncate chunks according to caller's maximum byte count. */ - if ( bytes1 > count ) { - bytes1 = count; - bytes2 = 0; - } - else if ( (bytes1 + bytes2) > count ) { - bytes2 = count - bytes1; - } - - if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) || - ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio))) - /* keeping this around as its replacement is not equivalent - * copyout(&ring[0], &buf[bytes1], bytes2) - */ - { - rc = EFAULT; - goto out; - } - - ring_cons += (bytes1 + bytes2) / sizeof(uint16_t); - - rc = bytes1 + bytes2; - - out: - - return rc; -} - -static int -evtchn_write(struct cdev *dev, struct uio *uio, int ioflag) -{ - int rc, i, count; - - count = uio->uio_resid; - - uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); - - - if ( kbuf == NULL ) - return ENOMEM; - - count &= ~1; /* even number of bytes */ - - if ( count == 0 ) { - rc = 0; - goto out; - } - - if ( count > PAGE_SIZE ) - count = PAGE_SIZE; - - if ( uiomove(kbuf, count, uio) != 0 ) { - rc = EFAULT; - goto out; - } - - mtx_lock_spin(&lock); - for ( i = 0; i < (count/2); i++ ) - if ( test_bit(kbuf[i], &bound_ports[0]) ) - unmask_evtchn(kbuf[i]); - mtx_unlock_spin(&lock); - - rc = count; - - out: - free(kbuf, M_DEVBUF); - return rc; -} - -static int -evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, - int mode, struct thread *td __unused) -{ - int rc = 0; - - mtx_lock_spin(&lock); - - switch ( cmd ) - { - case EVTCHN_RESET: - __evtchn_reset_buffer_ring(); - break; - case EVTCHN_BIND: - if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) ) - unmask_evtchn((int)arg); - else - rc = EINVAL; - break; - case EVTCHN_UNBIND: - if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) ) - mask_evtchn((int)arg); - else - rc = EINVAL; - break; - default: - rc = ENOSYS; - break; - } - - mtx_unlock_spin(&lock); - - return rc; -} - -static int -evtchn_poll(struct cdev *dev, int poll_events, struct thread *td) -{ - - evtchn_softc_t *sc; - unsigned int mask = POLLOUT | POLLWRNORM; - - sc = dev->si_drv1; - - if ( ring_cons != ring_prod ) - mask |= POLLIN | POLLRDNORM; - else if ( ring_overflow ) - mask = POLLERR; - else - selrecord(td, &sc->ev_rsel); - - - return mask; -} - - -static int -evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td) -{ - uint16_t *_ring; - - if (flag & O_NONBLOCK) - return EBUSY; - - if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) ) - return EBUSY; - - if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL ) - return ENOMEM; - - mtx_lock_spin(&lock); - ring = _ring; - __evtchn_reset_buffer_ring(); - mtx_unlock_spin(&lock); - - - return 0; -} - -static int -evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused) -{ - int i; - - mtx_lock_spin(&lock); - if (ring != NULL) { - free(ring, M_DEVBUF); - ring = NULL; - } - for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) - if ( synch_test_and_clear_bit(i, &bound_ports[0]) ) - mask_evtchn(i); - mtx_unlock_spin(&lock); - - evtchn_dev_inuse = 0; - - return 0; -} - - - -/* XXX wild assed guess as to a safe major number */ -#define EVTCHN_MAJOR 140 - -static struct cdevsw evtchn_devsw = { - d_version: D_VERSION_00, - d_open: evtchn_open, - d_close: evtchn_close, - d_read: evtchn_read, - d_write: evtchn_write, - d_ioctl: evtchn_ioctl, - d_poll: evtchn_poll, - d_name: "evtchn", - d_maj: EVTCHN_MAJOR, - d_flags: 0, -}; - - -/* XXX - if this device is ever supposed to support use by more than one process - * this global static will have to go away - */ -static struct cdev *evtchn_dev; - - - -static int -evtchn_init(void *dummy __unused) -{ - /* XXX I believe we don't need these leaving them here for now until we - * have some semblance of it working - */ -#if 0 - devfs_handle_t symlink_handle; - int err, pos; - char link_dest[64]; -#endif - mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF); - - /* (DEVFS) create '/dev/misc/evtchn'. */ - evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn"); - - mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS); - - evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK); - bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t)); - - /* XXX I don't think we need any of this rubbish */ -#if 0 - if ( err != 0 ) - { - printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); - return err; - } - - /* (DEVFS) create directory '/dev/xen'. */ - xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); - - /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ - pos = devfs_generate_path(evtchn_miscdev.devfs_handle, - &link_dest[3], - sizeof(link_dest) - 3); - if ( pos >= 0 ) - strncpy(&link_dest[pos], "../", 3); - /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ - (void)devfs_mk_symlink(xen_dev_dir, - "evtchn", - DEVFS_FL_DEFAULT, - &link_dest[pos], - &symlink_handle, - NULL); - - /* (DEVFS) automatically destroy the symlink with its destination. */ - devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); -#endif - printk("Event-channel device installed.\n"); - - return 0; -} - - -SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL); - - -#if 0 - -static void cleanup_module(void) -{ - destroy_dev(evtchn_dev); -; -} - -module_init(init_module); -module_exit(cleanup_module); -#endif diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c --- a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1109 +0,0 @@ -/*- - * Copyright (c) 1990 William Jolitz. - * Copyright (c) 1991 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/i386/isa/npx.c,v 1.144 2003/11/03 21:53:38 jhb Exp $"); - -#include "opt_cpu.h" -#include "opt_debug_npx.h" -#include "opt_isa.h" -#include "opt_npx.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bus.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/module.h> -#include <sys/mutex.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/smp.h> -#include <sys/sysctl.h> -#include <machine/bus.h> -#include <sys/rman.h> -#ifdef NPX_DEBUG -#include <sys/syslog.h> -#endif -#include <sys/signalvar.h> -#include <sys/user.h> - -#include <machine/asmacros.h> -#include <machine/cputypes.h> -#include <machine/frame.h> -#include <machine/md_var.h> -#include <machine/pcb.h> -#include <machine/psl.h> -#include <machine/clock.h> -#include <machine/resource.h> -#include <machine/specialreg.h> -#include <machine/segments.h> -#include <machine/ucontext.h> - -#include <machine/multicall.h> - -#include <i386/isa/icu.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif -#include <machine/intr_machdep.h> -#ifdef DEV_ISA -#include <isa/isavar.h> -#endif - -#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif -#if defined(CPU_DISABLE_SSE) -#undef CPU_ENABLE_SSE -#endif - -/* - * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. - */ - -/* Configuration flags. */ -#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0) -#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1) -#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2) - -#if defined(__GNUC__) && !defined(lint) - -#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) -#define fnclex() __asm("fnclex") -#define fninit() __asm("fninit") -#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) -#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) -#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) -#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") -#define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) -#ifdef CPU_ENABLE_SSE -#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) -#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) -#endif -#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ - : : "n" (CR0_TS) : "ax") -#define stop_emulating() __asm("clts") - -#else /* not __GNUC__ */ - -void fldcw(caddr_t addr); -void fnclex(void); -void fninit(void); -void fnsave(caddr_t addr); -void fnstcw(caddr_t addr); -void fnstsw(caddr_t addr); -void fp_divide_by_0(void); -void frstor(caddr_t addr); -#ifdef CPU_ENABLE_SSE -void fxsave(caddr_t addr); -void fxrstor(caddr_t addr); -#endif -void start_emulating(void); -void stop_emulating(void); - -#endif /* __GNUC__ */ - -#ifdef CPU_ENABLE_SSE -#define GET_FPU_CW(thread) \ - (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw) -#define GET_FPU_SW(thread) \ - (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw) -#else /* CPU_ENABLE_SSE */ -#define GET_FPU_CW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_cw) -#define GET_FPU_SW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_sw) -#endif /* CPU_ENABLE_SSE */ - -typedef u_char bool_t; - -static void fpusave(union savefpu *); -static void fpurstor(union savefpu *); -static int npx_attach(device_t dev); -static void npx_identify(driver_t *driver, device_t parent); -#if 0 -static void npx_intr(void *); -#endif -static int npx_probe(device_t dev); -#ifdef I586_CPU_XXX -static long timezero(const char *funcname, - void (*func)(void *buf, size_t len)); -#endif /* I586_CPU */ - -int hw_float; /* XXX currently just alias for npx_exists */ - -SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, - CTLFLAG_RD, &hw_float, 0, - "Floatingpoint instructions executed in hardware"); -#if 0 -static volatile u_int npx_intrs_while_probing; -#endif -static union savefpu npx_cleanstate; -static bool_t npx_cleanstate_ready; -static bool_t npx_ex16; -static bool_t npx_exists; -static bool_t npx_irq13; - -alias_for_inthand_t probetrap; -#if 0 -__asm(" \n\ - .text \n\ - .p2align 2,0x90 \n\ - .type " __XSTRING(CNAME(probetrap)) ",@function \n\ -" __XSTRING(CNAME(probetrap)) ": \n\ - ss \n\ - incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ - fnclex \n\ - iret \n\ -"); -#endif -/* - * Identify routine. Create a connection point on our parent for probing. - */ -static void -npx_identify(driver, parent) - driver_t *driver; - device_t parent; -{ - device_t child; - - child = BUS_ADD_CHILD(parent, 0, "npx", 0); - if (child == NULL) - panic("npx_identify"); -} -#if 0 -/* - * Do minimal handling of npx interrupts to convert them to traps. - */ -static void -npx_intr(dummy) - void *dummy; -{ - struct thread *td; - - npx_intrs_while_probing++; - - /* - * The BUSY# latch must be cleared in all cases so that the next - * unmasked npx exception causes an interrupt. - */ -#ifdef PC98 - outb(0xf8, 0); -#else - outb(0xf0, 0); -#endif - - /* - * fpcurthread is normally non-null here. In that case, schedule an - * AST to finish the exception handling in the correct context - * (this interrupt may occur after the thread has entered the - * kernel via a syscall or an interrupt). Otherwise, the npx - * state of the thread that caused this interrupt must have been - * pushed to the thread's pcb, and clearing of the busy latch - * above has finished the (essentially null) handling of this - * interrupt. Control will eventually return to the instruction - * that caused it and it will repeat. We will eventually (usually - * soon) win the race to handle the interrupt properly. - */ - td = PCPU_GET(fpcurthread); - if (td != NULL) { - td->td_pcb->pcb_flags |= PCB_NPXTRAP; - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_ASTPENDING; - mtx_unlock_spin(&sched_lock); - } -} -#endif - -static int -npx_probe(device_t dev) -{ - - return 1; -} - -#if 0 -/* - * Probe routine. Initialize cr0 to give correct behaviour for [f]wait - * whether the device exists or not (XXX should be elsewhere). Set flags - * to tell npxattach() what to do. Modify device struct if npx doesn't - * need to use interrupts. Return 0 if device exists. - */ -static int -npx_probe(device_t dev) -{ - struct gate_descriptor save_idt_npxtrap; - struct resource *ioport_res, *irq_res; - void *irq_cookie; - int ioport_rid, irq_num, irq_rid; - u_short control; - u_short status; - - save_idt_npxtrap = idt[IDT_MF]; - setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - ioport_rid = 0; - ioport_res = bus_alloc_resource(dev, SYS_RES_IOPORT, &ioport_rid, - IO_NPX, IO_NPX, IO_NPXSIZE, RF_ACTIVE); - if (ioport_res == NULL) - panic("npx: can't get ports"); -#ifdef PC98 - if (resource_int_value("npx", 0, "irq", &irq_num) != 0) - irq_num = 8; -#else - if (resource_int_value("npx", 0, "irq", &irq_num) != 0) - irq_num = 13; -#endif - irq_rid = 0; - irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &irq_rid, irq_num, - irq_num, 1, RF_ACTIVE); - if (irq_res == NULL) - panic("npx: can't get IRQ"); - if (bus_setup_intr(dev, irq_res, INTR_TYPE_MISC | INTR_FAST, npx_intr, - NULL, &irq_cookie) != 0) - panic("npx: can't create intr"); - - /* - * Partially reset the coprocessor, if any. Some BIOS's don't reset - * it after a warm boot. - */ -#ifdef PC98 - outb(0xf8,0); -#else - outb(0xf1, 0); /* full reset on some systems, NOP on others */ - outb(0xf0, 0); /* clear BUSY# latch */ -#endif - /* - * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT - * instructions. We must set the CR0_MP bit and use the CR0_TS - * bit to control the trap, because setting the CR0_EM bit does - * not cause WAIT instructions to trap. It's important to trap - * WAIT instructions - otherwise the "wait" variants of no-wait - * control instructions would degenerate to the "no-wait" variants - * after FP context switches but work correctly otherwise. It's - * particularly important to trap WAITs when there is no NPX - - * otherwise the "wait" variants would always degenerate. - * - * Try setting CR0_NE to get correct error reporting on 486DX's. - * Setting it should fail or do nothing on lesser processors. - */ - load_cr0(rcr0() | CR0_MP | CR0_NE); - /* - * But don't trap while we're probing. - */ - stop_emulating(); - /* - * Finish resetting the coprocessor, if any. If there is an error - * pending, then we may get a bogus IRQ13, but npx_intr() will handle - * it OK. Bogus halts have never been observed, but we enabled - * IRQ13 and cleared the BUSY# latch early to handle them anyway. - */ - fninit(); - - device_set_desc(dev, "math processor"); - - /* - * Don't use fwait here because it might hang. - * Don't use fnop here because it usually hangs if there is no FPU. - */ - DELAY(1000); /* wait for any IRQ13 */ -#ifdef DIAGNOSTIC - if (npx_intrs_while_probing != 0) - printf("fninit caused %u bogus npx interrupt(s)\n", - npx_intrs_while_probing); - if (npx_traps_while_probing != 0) - printf("fninit caused %u bogus npx trap(s)\n", - npx_traps_while_probing); -#endif - /* - * Check for a status of mostly zero. - */ - status = 0x5a5a; - fnstsw(&status); - if ((status & 0xb8ff) == 0) { - /* - * Good, now check for a proper control word. - */ - control = 0x5a5a; - fnstcw(&control); - if ((control & 0x1f3f) == 0x033f) { - hw_float = npx_exists = 1; - /* - * We have an npx, now divide by 0 to see if exception - * 16 works. - */ - control &= ~(1 << 2); /* enable divide by 0 trap */ - fldcw(&control); -#ifdef FPU_ERROR_BROKEN - /* - * FPU error signal doesn't work on some CPU - * accelerator board. - */ - npx_ex16 = 1; - return (0); -#endif - npx_traps_while_probing = npx_intrs_while_probing = 0; - fp_divide_by_0(); - if (npx_traps_while_probing != 0) { - /* - * Good, exception 16 works. - */ - npx_ex16 = 1; - goto no_irq13; - } - if (npx_intrs_while_probing != 0) { - /* - * Bad, we are stuck with IRQ13. - */ - npx_irq13 = 1; - idt[IDT_MF] = save_idt_npxtrap; -#ifdef SMP - if (mp_ncpus > 1) - panic("npx0 cannot use IRQ 13 on an SMP system"); -#endif - return (0); - } - /* - * Worse, even IRQ13 is broken. Use emulator. - */ - } - } - /* - * Probe failed, but we want to get to npxattach to initialize the - * emulator and say that it has been installed. XXX handle devices - * that aren't really devices better. - */ -#ifdef SMP - if (mp_ncpus > 1) - panic("npx0 cannot be emulated on an SMP system"); -#endif - /* FALLTHROUGH */ -no_irq13: - idt[IDT_MF] = save_idt_npxtrap; - bus_teardown_intr(dev, irq_res, irq_cookie); - - /* - * XXX hack around brokenness of bus_teardown_intr(). If we left the - * irq active then we would get it instead of exception 16. - */ - { - struct intsrc *isrc; - - isrc = intr_lookup_source(irq_num); - isrc->is_pic->pic_disable_source(isrc); - } - - bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res); - bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res); - return (0); -} -#endif - -/* - * Attach routine - announce which it is, and wire into system - */ -static int -npx_attach(device_t dev) -{ - int flags; - register_t s; - - if (resource_int_value("npx", 0, "flags", &flags) != 0) - flags = 0; - - if (flags) - device_printf(dev, "flags 0x%x ", flags); - if (npx_irq13) { - device_printf(dev, "using IRQ 13 interface\n"); - } else { - if (npx_ex16) - device_printf(dev, "INT 16 interface\n"); - else - device_printf(dev, "WARNING: no FPU!\n"); - } - npxinit(__INITIAL_NPXCW__); - - if (npx_cleanstate_ready == 0) { - s = intr_disable(); - stop_emulating(); - fpusave(&npx_cleanstate); - start_emulating(); - npx_cleanstate_ready = 1; - intr_restore(s); - } -#ifdef I586_CPU_XXX - if (cpu_class == CPUCLASS_586 && npx_ex16 && npx_exists && - timezero("i586_bzero()", i586_bzero) < - timezero("bzero()", bzero) * 4 / 5) { - if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY)) - bcopy_vector = i586_bcopy; - if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BZERO)) - bzero_vector = i586_bzero; - if (!(flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) { - copyin_vector = i586_copyin; - copyout_vector = i586_copyout; - } - } -#endif - - return (0); /* XXX unused */ -} - -/* - * Initialize floating point unit. - */ -void -npxinit(control) - u_short control; -{ - static union savefpu dummy; - register_t savecrit; - - if (!npx_exists) - return; - /* - * fninit has the same h/w bugs as fnsave. Use the detoxified - * fnsave to throw away any junk in the fpu. npxsave() initializes - * the fpu and sets fpcurthread = NULL as important side effects. - */ - savecrit = intr_disable(); - npxsave(&dummy); - stop_emulating(); -#ifdef CPU_ENABLE_SSE - /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ - if (cpu_fxsr) - fninit(); -#endif - fldcw(&control); - start_emulating(); - intr_restore(savecrit); -} - -/* - * Free coprocessor (if we have it). - */ -void -npxexit(td) - struct thread *td; -{ - register_t savecrit; - - savecrit = intr_disable(); - if (curthread == PCPU_GET(fpcurthread)) - npxsave(&PCPU_GET(curpcb)->pcb_save); - intr_restore(savecrit); -#ifdef NPX_DEBUG - if (npx_exists) { - u_int masked_exceptions; - - masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; - /* - * Log exceptions that would have trapped with the old - * control word (overflow, divide by 0, and invalid operand). - */ - if (masked_exceptions & 0x0d) - log(LOG_ERR, - "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", - td->td_proc->p_pid, td->td_proc->p_comm, - masked_exceptions); - } -#endif -} - -int -npxformat() -{ - - if (!npx_exists) - return (_MC_FPFMT_NODEV); -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) - return (_MC_FPFMT_XMM); -#endif - return (_MC_FPFMT_387); -} - -/* - * The following mechanism is used to ensure that the FPE_... value - * that is passed as a trapcode to the signal handler of the user - * process does not have more than one bit set. - * - * Multiple bits may be set if the user process modifies the control - * word while a status word bit is already set. While this is a sign - * of bad coding, we have no choise than to narrow them down to one - * bit, since we must not send a trapcode that is not exactly one of - * the FPE_ macros. - * - * The mechanism has a static table with 127 entries. Each combination - * of the 7 FPU status word exception bits directly translates to a - * position in this table, where a single FPE_... value is stored. - * This FPE_... value stored there is considered the "most important" - * of the exception bits and will be sent as the signal code. The - * precedence of the bits is based upon Intel Document "Numerical - * Applications", Chapter "Special Computational Situations". - * - * The macro to choose one of these values does these steps: 1) Throw - * away status word bits that cannot be masked. 2) Throw away the bits - * currently masked in the control word, assuming the user isn't - * interested in them anymore. 3) Reinsert status word bit 7 (stack - * fault) if it is set, which cannot be masked but must be presered. - * 4) Use the remaining bits to point into the trapcode table. - * - * The 6 maskable bits in order of their preference, as stated in the - * above referenced Intel manual: - * 1 Invalid operation (FP_X_INV) - * 1a Stack underflow - * 1b Stack overflow - * 1c Operand of unsupported format - * 1d SNaN operand. - * 2 QNaN operand (not an exception, irrelavant here) - * 3 Any other invalid-operation not mentioned above or zero divide - * (FP_X_INV, FP_X_DZ) - * 4 Denormal operand (FP_X_DNML) - * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) - * 6 Inexact result (FP_X_IMP) - */ -static char fpetable[128] = { - 0, - FPE_FLTINV, /* 1 - INV */ - FPE_FLTUND, /* 2 - DNML */ - FPE_FLTINV, /* 3 - INV | DNML */ - FPE_FLTDIV, /* 4 - DZ */ - FPE_FLTINV, /* 5 - INV | DZ */ - FPE_FLTDIV, /* 6 - DNML | DZ */ - FPE_FLTINV, /* 7 - INV | DNML | DZ */ - FPE_FLTOVF, /* 8 - OFL */ - FPE_FLTINV, /* 9 - INV | OFL */ - FPE_FLTUND, /* A - DNML | OFL */ - FPE_FLTINV, /* B - INV | DNML | OFL */ - FPE_FLTDIV, /* C - DZ | OFL */ - FPE_FLTINV, /* D - INV | DZ | OFL */ - FPE_FLTDIV, /* E - DNML | DZ | OFL */ - FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ - FPE_FLTUND, /* 10 - UFL */ - FPE_FLTINV, /* 11 - INV | UFL */ - FPE_FLTUND, /* 12 - DNML | UFL */ - FPE_FLTINV, /* 13 - INV | DNML | UFL */ - FPE_FLTDIV, /* 14 - DZ | UFL */ - FPE_FLTINV, /* 15 - INV | DZ | UFL */ - FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ - FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ - FPE_FLTOVF, /* 18 - OFL | UFL */ - FPE_FLTINV, /* 19 - INV | OFL | UFL */ - FPE_FLTUND, /* 1A - DNML | OFL | UFL */ - FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ - FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ - FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ - FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ - FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ - FPE_FLTRES, /* 20 - IMP */ - FPE_FLTINV, /* 21 - INV | IMP */ - FPE_FLTUND, /* 22 - DNML | IMP */ - FPE_FLTINV, /* 23 - INV | DNML | IMP */ - FPE_FLTDIV, /* 24 - DZ | IMP */ - FPE_FLTINV, /* 25 - INV | DZ | IMP */ - FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ - FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ - FPE_FLTOVF, /* 28 - OFL | IMP */ - FPE_FLTINV, /* 29 - INV | OFL | IMP */ - FPE_FLTUND, /* 2A - DNML | OFL | IMP */ - FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ - FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ - FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ - FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ - FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ - FPE_FLTUND, /* 30 - UFL | IMP */ - FPE_FLTINV, /* 31 - INV | UFL | IMP */ - FPE_FLTUND, /* 32 - DNML | UFL | IMP */ - FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ - FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ - FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ - FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ - FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ - FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ - FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ - FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ - FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ - FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ - FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ - FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ - FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ - FPE_FLTSUB, /* 40 - STK */ - FPE_FLTSUB, /* 41 - INV | STK */ - FPE_FLTUND, /* 42 - DNML | STK */ - FPE_FLTSUB, /* 43 - INV | DNML | STK */ - FPE_FLTDIV, /* 44 - DZ | STK */ - FPE_FLTSUB, /* 45 - INV | DZ | STK */ - FPE_FLTDIV, /* 46 - DNML | DZ | STK */ - FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ - FPE_FLTOVF, /* 48 - OFL | STK */ - FPE_FLTSUB, /* 49 - INV | OFL | STK */ - FPE_FLTUND, /* 4A - DNML | OFL | STK */ - FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ - FPE_FLTDIV, /* 4C - DZ | OFL | STK */ - FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ - FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ - FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ - FPE_FLTUND, /* 50 - UFL | STK */ - FPE_FLTSUB, /* 51 - INV | UFL | STK */ - FPE_FLTUND, /* 52 - DNML | UFL | STK */ - FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ - FPE_FLTDIV, /* 54 - DZ | UFL | STK */ - FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ - FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ - FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ - FPE_FLTOVF, /* 58 - OFL | UFL | STK */ - FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ - FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ - FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ - FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ - FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ - FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ - FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ - FPE_FLTRES, /* 60 - IMP | STK */ - FPE_FLTSUB, /* 61 - INV | IMP | STK */ - FPE_FLTUND, /* 62 - DNML | IMP | STK */ - FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ - FPE_FLTDIV, /* 64 - DZ | IMP | STK */ - FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ - FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ - FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ - FPE_FLTOVF, /* 68 - OFL | IMP | STK */ - FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ - FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ - FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ - FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ - FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ - FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ - FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ - FPE_FLTUND, /* 70 - UFL | IMP | STK */ - FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ - FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ - FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ - FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ - FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ - FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ - FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ - FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ - FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ - FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ - FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ - FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ - FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ - FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ - FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ -}; - -/* - * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. - * - * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now - * depend on longjmp() restoring a usable state. Restoring the state - * or examining it might fail if we didn't clear exceptions. - * - * The error code chosen will be one of the FPE_... macros. It will be - * sent as the second argument to old BSD-style signal handlers and as - * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. - * - * XXX the FP state is not preserved across signal handlers. So signal - * handlers cannot afford to do FP unless they preserve the state or - * longjmp() out. Both preserving the state and longjmp()ing may be - * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable - * solution for signals other than SIGFPE. - */ -int -npxtrap() -{ - register_t savecrit; - u_short control, status; - - if (!npx_exists) { - printf("npxtrap: fpcurthread = %p, curthread = %p, npx_exists = %d\n", - PCPU_GET(fpcurthread), curthread, npx_exists); - panic("npxtrap from nowhere"); - } - savecrit = intr_disable(); - - /* - * Interrupt handling (for another interrupt) may have pushed the - * state to memory. Fetch the relevant parts of the state from - * wherever they are. - */ - if (PCPU_GET(fpcurthread) != curthread) { - control = GET_FPU_CW(curthread); - status = GET_FPU_SW(curthread); - } else { - fnstcw(&control); - fnstsw(&status); - } - - if (PCPU_GET(fpcurthread) == curthread) - fnclex(); - intr_restore(savecrit); - return (fpetable[status & ((~control & 0x3f) | 0x40)]); -} - -/* - * Implement device not available (DNA) exception - * - * It would be better to switch FP context here (if curthread != fpcurthread) - * and not necessarily for every context switch, but it is too hard to - * access foreign pcb's. - */ - -static int err_count = 0; - -int -npxdna() -{ - struct pcb *pcb; - register_t s; - u_short control; - - if (!npx_exists) - return (0); - if (PCPU_GET(fpcurthread) == curthread) { - printf("npxdna: fpcurthread == curthread %d times\n", - ++err_count); - stop_emulating(); - return (1); - } - if (PCPU_GET(fpcurthread) != NULL) { - printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", - PCPU_GET(fpcurthread), - PCPU_GET(fpcurthread)->td_proc->p_pid, - curthread, curthread->td_proc->p_pid); - panic("npxdna"); - } - s = intr_disable(); - stop_emulating(); - /* - * Record new context early in case frstor causes an IRQ13. - */ - PCPU_SET(fpcurthread, curthread); - pcb = PCPU_GET(curpcb); - - if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { - /* - * This is the first time this thread has used the FPU or - * the PCB doesn't contain a clean FPU state. Explicitly - * initialize the FPU and load the default control word. - */ - fninit(); - control = __INITIAL_NPXCW__; - fldcw(&control); - pcb->pcb_flags |= PCB_NPXINITDONE; - } else { - /* - * The following frstor may cause an IRQ13 when the state - * being restored has a pending error. The error will - * appear to have been triggered by the current (npx) user - * instruction even when that instruction is a no-wait - * instruction that should not trigger an error (e.g., - * fnclex). On at least one 486 system all of the no-wait - * instructions are broken the same as frstor, so our - * treatment does not amplify the breakage. On at least - * one 386/Cyrix 387 system, fnclex works correctly while - * frstor and fnsave are broken, so our treatment breaks - * fnclex if it is the first FPU instruction after a context - * switch. - */ - fpurstor(&pcb->pcb_save); - } - intr_restore(s); - - return (1); -} - -/* - * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx - * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by - * no-wait npx instructions. See the Intel application note AP-578 for - * details. This doesn't cause any additional complications here. IRQ13's - * are inherently asynchronous unless the CPU is frozen to deliver them -- - * one that started in userland may be delivered many instructions later, - * after the process has entered the kernel. It may even be delivered after - * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in - * the same way as a very-late-arriving non-spurious IRQ13 from user mode: - * it is normally ignored at first because we set fpcurthread to NULL; it is - * normally retriggered in npxdna() after return to user mode. - * - * npxsave() must be called with interrupts disabled, so that it clears - * fpcurthread atomically with saving the state. We require callers to do the - * disabling, since most callers need to disable interrupts anyway to call - * npxsave() atomically with checking fpcurthread. - * - * A previous version of npxsave() went to great lengths to excecute fnsave - * with interrupts enabled in case executing it froze the CPU. This case - * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply - * spurious freezes. - */ -void -npxsave(addr) - union savefpu *addr; -{ - - stop_emulating(); - fpusave(addr); - - start_emulating(); - PCPU_SET(fpcurthread, NULL); - queue_multicall0(__HYPERVISOR_fpu_taskswitch); -} - -/* - * This should be called with interrupts disabled and only when the owning - * FPU thread is non-null. - */ -void -npxdrop() -{ - struct thread *td; - - td = PCPU_GET(fpcurthread); - PCPU_SET(fpcurthread, NULL); - td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; - start_emulating(); -} - -/* - * Get the state of the FPU without dropping ownership (if possible). - * It returns the FPU ownership status. - */ -int -npxgetregs(td, addr) - struct thread *td; - union savefpu *addr; -{ - register_t s; - - if (!npx_exists) - return (_MC_FPOWNED_NONE); - - if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) { - if (npx_cleanstate_ready) - bcopy(&npx_cleanstate, addr, sizeof(npx_cleanstate)); - else - bzero(addr, sizeof(*addr)); - return (_MC_FPOWNED_NONE); - } - s = intr_disable(); - if (td == PCPU_GET(fpcurthread)) { - fpusave(addr); -#ifdef CPU_ENABLE_SSE - if (!cpu_fxsr) -#endif - /* - * fnsave initializes the FPU and destroys whatever - * context it contains. Make sure the FPU owner - * starts with a clean state next time. - */ - npxdrop(); - intr_restore(s); - return (_MC_FPOWNED_FPU); - } else { - intr_restore(s); - bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); - return (_MC_FPOWNED_PCB); - } -} - -/* - * Set the state of the FPU. - */ -void -npxsetregs(td, addr) - struct thread *td; - union savefpu *addr; -{ - register_t s; - - if (!npx_exists) - return; - - s = intr_disable(); - if (td == PCPU_GET(fpcurthread)) { - fpurstor(addr); - intr_restore(s); - } else { - intr_restore(s); - bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); - } - curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE; -} - -static void -fpusave(addr) - union savefpu *addr; -{ - -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) - fxsave(addr); - else -#endif - fnsave(addr); -} - -static void -fpurstor(addr) - union savefpu *addr; -{ - -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) - fxrstor(addr); - else -#endif - frstor(addr); -} - -#ifdef I586_CPU_XXX -static long -timezero(funcname, func) - const char *funcname; - void (*func)(void *buf, size_t len); - -{ - void *buf; -#define BUFSIZE 1048576 - long usec; - struct timeval finish, start; - - buf = malloc(BUFSIZE, M_TEMP, M_NOWAIT); - if (buf == NULL) - return (BUFSIZE); - microtime(&start); - (*func)(buf, BUFSIZE); - microtime(&finish); - usec = 1000000 * (finish.tv_sec - start.tv_sec) + - finish.tv_usec - start.tv_usec; - if (usec <= 0) - usec = 1; - if (bootverbose) - printf("%s bandwidth = %u kBps\n", funcname, - (u_int32_t)(((BUFSIZE >> 10) * 1000000) / usec)); - free(buf, M_TEMP); - return (usec); -} -#endif /* I586_CPU */ - -static device_method_t npx_methods[] = { - /* Device interface */ - DEVMETHOD(device_identify, npx_identify), - DEVMETHOD(device_probe, npx_probe), - DEVMETHOD(device_attach, npx_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - - { 0, 0 } -}; - -static driver_t npx_driver = { - "npx", - npx_methods, - 1, /* no softc */ -}; - -static devclass_t npx_devclass; -DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); - -#ifdef DEV_ISA -/* - * We prefer to attach to the root nexus so that the usual case (exception 16) - * doesn't describe the processor as being `on isa'. - */ -DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); - -/* - * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. - */ -static struct isa_pnp_id npxisa_ids[] = { - { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ - { 0 } -}; - -static int -npxisa_probe(device_t dev) -{ - int result; - if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { - device_quiet(dev); - } - return(result); -} - -static int -npxisa_attach(device_t dev) -{ - return (0); -} - -static device_method_t npxisa_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, npxisa_probe), - DEVMETHOD(device_attach, npxisa_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - - { 0, 0 } -}; - -static driver_t npxisa_driver = { - "npxisa", - npxisa_methods, - 1, /* no softc */ -}; - -static devclass_t npxisa_devclass; - -DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); -#ifndef PC98 -DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); -#endif -#endif /* DEV_ISA */ diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c --- a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1434 +0,0 @@ -/* - * - * Copyright (c) 2004 Kip Macy - * All rights reserved. - * - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "opt_nfsroot.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/sockio.h> -#include <sys/mbuf.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/socket.h> -#include <sys/queue.h> - -#include <net/if.h> -#include <net/if_arp.h> -#include <net/ethernet.h> -#include <net/if_dl.h> -#include <net/if_media.h> - -#include <net/bpf.h> - -#include <net/if_types.h> -#include <net/if_vlan_var.h> - -#include <netinet/in_systm.h> -#include <netinet/in.h> -#include <netinet/ip.h> - -#include <vm/vm.h> -#include <vm/pmap.h> - -#include <machine/clock.h> /* for DELAY */ -#include <machine/bus_memio.h> -#include <machine/bus.h> -#include <machine/resource.h> -#include <machine/frame.h> - - -#include <sys/bus.h> -#include <sys/rman.h> - -#include <machine/intr_machdep.h> - -#include <machine/xen-os.h> -#include <machine/hypervisor.h> -#include <machine/hypervisor-ifs.h> -#include <machine/xen_intr.h> -#include <machine/evtchn.h> -#include <machine/ctrl_if.h> - -struct xn_softc; -static void xn_txeof(struct xn_softc *); -static void xn_rxeof(struct xn_softc *); -static void xn_alloc_rx_buffers(struct xn_softc *); - -static void xn_tick_locked(struct xn_softc *); -static void xn_tick(void *); - -static void xn_intr(void *); -static void xn_start_locked(struct ifnet *); -static void xn_start(struct ifnet *); -static int xn_ioctl(struct ifnet *, u_long, caddr_t); -static void xn_ifinit_locked(struct xn_softc *); -static void xn_ifinit(void *); -static void xn_stop(struct xn_softc *); -#ifdef notyet -static void xn_watchdog(struct ifnet *); -#endif -/* Xenolinux helper functions */ -static void network_connect(struct xn_softc *, netif_fe_interface_status_t *); -static void create_netdev(int handle, struct xn_softc **); -static void netif_ctrlif_rx(ctrl_msg_t *,unsigned long); - -static void xn_free_rx_ring(struct xn_softc *); - -static void xn_free_tx_ring(struct xn_softc *); - - - -/* XXX: This isn't supported in FreeBSD, so ignore it for now. */ -#define TASK_UNINTERRUPTIBLE 0 -#define INVALID_P2M_ENTRY (~0UL) - -/* - * If the backend driver is pipelining transmit requests then we can be very - * aggressive in avoiding new-packet notifications -- only need to send a - * notification if there are no outstanding unreceived responses. - * If the backend may be buffering our transmit buffers for any reason then we - * are rather more conservative. - */ -#ifdef CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER -#define TX_TEST_IDX resp_prod /* aggressive: any outstanding responses? */ -#else -#define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */ -#endif - -/* - * Mbuf pointers. We need these to keep track of the virtual addresses - * of our mbuf chains since we can only convert from virtual to physical, - * not the other way around. The size must track the free index arrays. - */ -struct xn_chain_data { - struct mbuf *xn_tx_chain[NETIF_TX_RING_SIZE+1]; - struct mbuf *xn_rx_chain[NETIF_RX_RING_SIZE+1]; -}; - -struct xn_softc { - struct arpcom arpcom; /* interface info */ - device_t xn_dev; - SLIST_ENTRY(xn_softc) xn_links; - struct mtx xn_mtx; - void *xn_intrhand; - struct resource *xn_res; - u_int8_t xn_ifno; /* interface number */ - struct xn_chain_data xn_cdata; /* mbufs */ - - netif_tx_interface_t *xn_tx_if; - netif_rx_interface_t *xn_rx_if; - - int xn_if_flags; - int xn_txcnt; - int xn_rxbufcnt; - struct callout xn_stat_ch; - unsigned int xn_irq; - unsigned int xn_evtchn; - - - /* What is the status of our connection to the remote backend? */ -#define BEST_CLOSED 0 -#define BEST_DISCONNECTED 1 -#define BEST_CONNECTED 2 - unsigned int xn_backend_state; - - /* Is this interface open or closed (down or up)? */ -#define UST_CLOSED 0 -#define UST_OPEN 1 - unsigned int xn_user_state; - - /* Receive-ring batched refills. */ -#define RX_MIN_TARGET 64 /* XXX: larger than linux. was causing packet - * loss at the default of 8. - */ -#define RX_MAX_TARGET NETIF_RX_RING_SIZE - int xn_rx_target; /* number to allocate */ - struct mbuf *xn_rx_batch; /* head of the batch queue */ - struct mbuf *xn_rx_batchtail; - int xn_rx_batchlen; /* how many queued */ - - int xn_rx_resp_cons; - int xn_tx_resp_cons; - unsigned short xn_rx_free_idxs[NETIF_RX_RING_SIZE+1]; - unsigned short xn_tx_free_idxs[NETIF_RX_RING_SIZE+1]; -}; - -static unsigned long xn_rx_pfns[NETIF_RX_RING_SIZE]; -static multicall_entry_t xn_rx_mcl[NETIF_RX_RING_SIZE+1]; -static mmu_update_t xn_rx_mmu[NETIF_RX_RING_SIZE]; - -static SLIST_HEAD(, xn_softc) xn_dev_list = - SLIST_HEAD_INITIALIZER(xn_dev_list); - -#define XN_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->xn_mtx, _name, MTX_NETWORK_LOCK, MTX_DEF) -#define XN_LOCK(_sc) mtx_lock(&(_sc)->xn_mtx) -#define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->xn_mtx, MA_OWNED) -#define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->xn_mtx) -#define XN_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->xn_mtx) - -/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ -#define ADD_ID_TO_FREELIST(_list, _id) \ - (_list)[(_id)] = (_list)[0]; \ - (_list)[0] = (_id); -#define GET_ID_FROM_FREELIST(_list) \ - ({ unsigned short _id = (_list)[0]; \ - (_list)[0] = (_list)[_id]; \ - (unsigned short)_id; }) -#define FREELIST_EMPTY(_list, _maxid) \ - ((_list)[0] == (_maxid+1)) - -static char *status_name[] = { - [NETIF_INTERFACE_STATUS_CLOSED] = "closed", - [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", - [NETIF_INTERFACE_STATUS_CONNECTED] = "connected", - [NETIF_INTERFACE_STATUS_CHANGED] = "changed", -}; - -static char *be_state_name[] = { - [BEST_CLOSED] = "closed", - [BEST_DISCONNECTED] = "disconnected", - [BEST_CONNECTED] = "connected", -}; - -#define IPRINTK(fmt, args...) \ - printk("[XEN] " fmt, ##args) -#define WPRINTK(fmt, args...) \ - printk("[XEN] " fmt, ##args) - -static struct xn_softc * -find_sc_by_handle(unsigned int handle) -{ - struct xn_softc *sc; - SLIST_FOREACH(sc, &xn_dev_list, xn_links) - { - if ( sc->xn_ifno == handle ) - return sc; - } - return NULL; -} - -/** Network interface info. */ -struct netif_ctrl { - /** Number of interfaces. */ - int interface_n; - /** Number of connected interfaces. */ - int connected_n; - /** Error code. */ - int err; - int up; -}; - -static struct netif_ctrl netctrl; - -static void -netctrl_init(void) -{ - /* - * netctrl is already in bss, why are we setting it? - */ - memset(&netctrl, 0, sizeof(netctrl)); - netctrl.up = NETIF_DRIVER_STATUS_DOWN; -} - -/** Get or set a network interface error. - */ -static int -netctrl_err(int err) -{ - if ( (err < 0) && !netctrl.err ) - netctrl.err = err; - return netctrl.err; -} - -/** Test if all network interfaces are connected. - * - * @return 1 if all connected, 0 if not, negative error code otherwise - */ -static int -netctrl_connected(void) -{ - int ok; - XENPRINTF("err %d up %d\n", netctrl.err, netctrl.up); - if (netctrl.err) - ok = netctrl.err; - else if (netctrl.up == NETIF_DRIVER_STATUS_UP) - ok = (netctrl.connected_n == netctrl.interface_n); - else - ok = 0; - - return ok; -} - -/** Count the connected network interfaces. - * - * @return connected count - */ -static int -netctrl_connected_count(void) -{ - - struct xn_softc *sc; - unsigned int connected; - - connected = 0; - - SLIST_FOREACH(sc, &xn_dev_list, xn_links) - { - if ( sc->xn_backend_state == BEST_CONNECTED ) - connected++; - } - - netctrl.connected_n = connected; - XENPRINTF("> connected_n=%d interface_n=%d\n", - netctrl.connected_n, netctrl.interface_n); - return connected; -} - -static __inline struct mbuf* -makembuf (struct mbuf *buf) -{ - struct mbuf *m = NULL; - - MGETHDR (m, M_DONTWAIT, MT_DATA); - - if (! m) - return 0; - - M_MOVE_PKTHDR(m, buf); - - MCLGET (m, M_DONTWAIT); - - m->m_pkthdr.len = buf->m_pkthdr.len; - m->m_len = buf->m_len; - m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) ); - m->m_ext.ext_args = (vm_paddr_t *)vtophys(mtod(m,caddr_t)); - - return m; -} - - - -static void -xn_free_rx_ring(struct xn_softc *sc) -{ -#if 0 - int i; - - for (i = 0; i < NETIF_RX_RING_SIZE; i++) { - if (sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)] != NULL) { - m_freem(sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)]); - sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(i)] = NULL; - } - } - - sc->xn_rx_resp_cons = 0; - sc->xn_rx_if->req_prod = 0; - sc->xn_rx_if->event = sc->xn_rx_resp_cons ; -#endif -} - -static void -xn_free_tx_ring(struct xn_softc *sc) -{ -#if 0 - int i; - - for (i = 0; i < NETIF_TX_RING_SIZE; i++) { - if (sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)] != NULL) { - m_freem(sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)]); - sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(i)] = NULL; - } - } - - return; -#endif -} - -static void -xn_alloc_rx_buffers(struct xn_softc *sc) -{ - unsigned short id; - struct mbuf *m_new, *next; - int i, batch_target; - NETIF_RING_IDX req_prod = sc->xn_rx_if->req_prod; - - if (unlikely(sc->xn_backend_state != BEST_CONNECTED) ) - return; - - /* - * Allocate skbuffs greedily, even though we batch updates to the - * receive ring. This creates a less bursty demand on the memory allocator, - * so should reduce the chance of failed allocation requests both for - * ourself and for other kernel subsystems. - */ - batch_target = sc->xn_rx_target - (req_prod - sc->xn_rx_resp_cons); - for ( i = sc->xn_rx_batchlen; i < batch_target; i++, sc->xn_rx_batchlen++) { - MGETHDR(m_new, M_DONTWAIT, MT_DATA); - if (m_new == NULL) - break; - - MCLGET(m_new, M_DONTWAIT); - if (!(m_new->m_flags & M_EXT)) { - m_freem(m_new); - break; - } - m_new->m_len = m_new->m_pkthdr.len = MCLBYTES; - - /* queue the mbufs allocated */ - if (!sc->xn_rx_batch) - sc->xn_rx_batch = m_new; - - if (sc->xn_rx_batchtail) - sc->xn_rx_batchtail->m_next = m_new; - sc->xn_rx_batchtail = m_new; - } - - /* Is the batch large enough to be worthwhile? */ - if ( i < (sc->xn_rx_target/2) ) - return; - - for (i = 0, m_new = sc->xn_rx_batch; m_new; - i++, sc->xn_rx_batchlen--, m_new = next) { - - next = m_new->m_next; - m_new->m_next = NULL; - - m_new->m_ext.ext_args = (vm_paddr_t *)vtophys(m_new->m_ext.ext_buf); - - id = GET_ID_FROM_FREELIST(sc->xn_rx_free_idxs); - KASSERT(id != 0, ("alloc_rx_buffers: found free receive index of 0\n")); - sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(id)] = m_new; - - sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id; - - xn_rx_pfns[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; - - /* Remove this page from pseudo phys map before passing back to Xen. */ - xen_phys_machine[((unsigned long)m_new->m_ext.ext_args >> PAGE_SHIFT)] - = INVALID_P2M_ENTRY; - - xn_rx_mcl[i].op = __HYPERVISOR_update_va_mapping; - xn_rx_mcl[i].args[0] = (unsigned long)mtod(m_new,vm_offset_t); - xn_rx_mcl[i].args[1] = 0; - xn_rx_mcl[i].args[2] = 0; - - } - - KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ - KASSERT(sc->xn_rx_batchlen == 0, ("not all mbufs processed")); - sc->xn_rx_batch = sc->xn_rx_batchtail = NULL; - - /* - * We may have allocated buffers which have entries outstanding - in the page * update queue -- make sure we flush those first! */ - PT_UPDATES_FLUSH(); - - /* After all PTEs have been zapped we blow away stale TLB entries. */ - xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; - - /* Give away a batch of pages. */ - xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op; - xn_rx_mcl[i].args[0] = MEMOP_decrease_reservation; - xn_rx_mcl[i].args[1] = (unsigned long)xn_rx_pfns; - xn_rx_mcl[i].args[2] = (unsigned long)i; - xn_rx_mcl[i].args[3] = 0; - xn_rx_mcl[i].args[4] = DOMID_SELF; - - /* Zap PTEs and give away pages in one big multicall. */ - (void)HYPERVISOR_multicall(xn_rx_mcl, i+1); - - /* Check return status of HYPERVISOR_dom_mem_op(). */ - if (unlikely(xn_rx_mcl[i].result != i)) - panic("Unable to reduce memory reservation\n"); - - /* Above is a suitable barrier to ensure backend will see requests. */ - sc->xn_rx_if->req_prod = req_prod + i; - - /* Adjust our floating fill target if we risked running out of buffers. */ - if ( ((req_prod - sc->xn_rx_if->resp_prod) < (sc->xn_rx_target / 4)) && - ((sc->xn_rx_target *= 2) > RX_MAX_TARGET) ) - sc->xn_rx_target = RX_MAX_TARGET; -} - -static void -xn_rxeof(struct xn_softc *sc) -{ - struct ifnet *ifp; - netif_rx_response_t *rx; - NETIF_RING_IDX i, rp; - mmu_update_t *mmu = xn_rx_mmu; - multicall_entry_t *mcl = xn_rx_mcl; - struct mbuf *tail_mbuf = NULL, *head_mbuf = NULL, *m, *next; - - XN_LOCK_ASSERT(sc); - if (sc->xn_backend_state != BEST_CONNECTED) - return; - - ifp = &sc->arpcom.ac_if; - - rp = sc->xn_rx_if->resp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for (i = sc->xn_rx_resp_cons; i != rp; i++) { - - rx = &sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(i)].resp; - KASSERT(rx->id != 0, ("xn_rxeof: found free receive index of 0\n")); - - /* - * An error here is very odd. Usually indicates a backend bug, - * low-memory condition, or that we didn't have reservation headroom. - * Whatever - print an error and queue the id again straight away. - */ - if (unlikely(rx->status <= 0)) { - printk("bad buffer on RX ring!(%d)\n", rx->status); - sc->xn_rx_if->ring[MASK_NETIF_RX_IDX(sc->xn_rx_if->req_prod)].req.id - = rx->id; - wmb(); - sc->xn_rx_if->req_prod++; - continue; - } - - m = (struct mbuf *) - sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(rx->id)]; - if (m->m_next) - panic("mbuf is already part of a valid mbuf chain"); - ADD_ID_TO_FREELIST(sc->xn_rx_free_idxs, rx->id); - - m->m_data += (rx->addr & PAGE_MASK); - m->m_pkthdr.len = m->m_len = rx->status; - m->m_pkthdr.rcvif = ifp; - - /* Remap the page. */ - mmu->ptr = (rx->addr & ~PAGE_MASK) | MMU_MACHPHYS_UPDATE; - mmu->val = (unsigned long)m->m_ext.ext_args >> PAGE_SHIFT; - mmu++; - mcl->op = __HYPERVISOR_update_va_mapping; - mcl->args[0] = (unsigned long)m->m_data; - mcl->args[1] = (rx->addr & ~PAGE_MASK) | PG_KERNEL; - mcl->args[2] = 0; - mcl++; - - xen_phys_machine[((unsigned long)m->m_ext.ext_args >> PAGE_SHIFT)] = - (rx->addr >> PAGE_SHIFT); - - if (unlikely(!head_mbuf)) - head_mbuf = m; - - if (tail_mbuf) - tail_mbuf->m_next = m; - tail_mbuf = m; - - sc->xn_cdata.xn_rx_chain[MASK_NETIF_RX_IDX(rx->id)] = NULL; - sc->xn_rxbufcnt++; - } - - /* Do all the remapping work, and M->P updates, in one big hypercall. */ - if (likely((mcl - xn_rx_mcl) != 0)) { - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)xn_rx_mmu; - mcl->args[1] = mmu - xn_rx_mmu; - mcl->args[2] = 0; - mcl->args[3] = DOMID_SELF; - mcl++; - (void)HYPERVISOR_multicall(xn_rx_mcl, mcl - xn_rx_mcl); - } - - - /* - * Process all the mbufs after the remapping is complete. - * Break the mbuf chain first though. - */ - for (m = head_mbuf; m; m = next) { - next = m->m_next; - m->m_next = NULL; - - ifp->if_ipackets++; - - XN_UNLOCK(sc); - - /* Pass it up. */ - (*ifp->if_input)(ifp, m); - XN_LOCK(sc); - } - - sc->xn_rx_resp_cons = i; - - /* If we get a callback with very few responses, reduce fill target. */ - /* NB. Note exponential increase, linear decrease. */ - if (((sc->xn_rx_if->req_prod - sc->xn_rx_if->resp_prod) > - ((3*sc->xn_rx_target) / 4)) && (--sc->xn_rx_target < RX_MIN_TARGET)) - sc->xn_rx_target = RX_MIN_TARGET; - - xn_alloc_rx_buffers(sc); - - sc->xn_rx_if->event = i + 1; -} - -static void -xn_txeof(struct xn_softc *sc) -{ - NETIF_RING_IDX i, prod; - unsigned short id; - struct ifnet *ifp; - struct mbuf *m; - - XN_LOCK_ASSERT(sc); - - if (sc->xn_backend_state != BEST_CONNECTED) - return; - - ifp = &sc->arpcom.ac_if; - ifp->if_timer = 0; - - do { - prod = sc->xn_tx_if->resp_prod; - - for (i = sc->xn_tx_resp_cons; i != prod; i++) { - id = sc->xn_tx_if->ring[MASK_NETIF_TX_IDX(i)].resp.id; - m = sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)]; - - KASSERT(m != NULL, ("mbuf not found in xn_tx_chain")); - M_ASSERTVALID(m); - - m_freem(m); - sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)] = NULL; - ADD_ID_TO_FREELIST(sc->xn_tx_free_idxs, id); - sc->xn_txcnt--; - } - sc->xn_tx_resp_cons = prod; - - /* - * Set a new event, then check for race with update of tx_cons. Note - * that it is essential to schedule a callback, no matter how few - * buffers are pending. Even if there is space in the transmit ring, - * higher layers may be blocked because too much data is outstanding: - * in such cases notification from Xen is likely to be the only kick - * that we'll get. - */ - sc->xn_tx_if->event = - prod + ((sc->xn_tx_if->req_prod - prod) >> 1) + 1; - - mb(); - - } while (prod != sc->xn_tx_if->resp_prod); -} - -static void -xn_intr(void *xsc) -{ - struct xn_softc *sc = xsc; - struct ifnet *ifp = &sc->arpcom.ac_if; - - XN_LOCK(sc); - - /* sometimes we seem to lose packets. stay in the interrupt handler while - * there is stuff to process: continually recheck the response producer. - */ - do { - xn_txeof(sc); - - if (sc->xn_rx_resp_cons != sc->xn_rx_if->resp_prod && - sc->xn_user_state == UST_OPEN) - xn_rxeof(sc); - - if (ifp->if_flags & IFF_RUNNING && ifp->if_snd.ifq_head != NULL) - xn_start_locked(ifp); - } while (sc->xn_rx_resp_cons != sc->xn_rx_if->resp_prod && - sc->xn_user_state == UST_OPEN); - - XN_UNLOCK(sc); - return; -} - -static void -xn_tick_locked(struct xn_softc *sc) -{ - XN_LOCK_ASSERT(sc); - callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); - - /* XXX placeholder for printing debug information */ - -} - - -static void -xn_tick(void *xsc) -{ - struct xn_softc *sc; - - sc = xsc; - XN_LOCK(sc); - xn_tick_locked(sc); - XN_UNLOCK(sc); - -} -static void -xn_start_locked(struct ifnet *ifp) -{ - unsigned short id; - struct mbuf *m_head, *new_m; - struct xn_softc *sc = ifp->if_softc; - netif_tx_request_t *tx; - NETIF_RING_IDX i, start; - - if (sc->xn_backend_state != BEST_CONNECTED) - return; - - for (i = start = sc->xn_tx_if->req_prod; TRUE; i++, sc->xn_txcnt++) { - - IF_DEQUEUE(&ifp->if_snd, m_head); - if (m_head == NULL) - break; - - if (FREELIST_EMPTY(sc->xn_tx_free_idxs, NETIF_TX_RING_SIZE)) { - IF_PREPEND(&ifp->if_snd, m_head); - ifp->if_flags |= IFF_OACTIVE; - break; - } - - i = sc->xn_tx_if->req_prod; - - id = GET_ID_FROM_FREELIST(sc->xn_tx_free_idxs); - - /* - * Start packing the mbufs in this chain into - * the fragment pointers. Stop when we run out - * of fragments or hit the end of the mbuf chain. - */ - new_m = makembuf(m_head); - tx = &(sc->xn_tx_if->ring[MASK_NETIF_TX_IDX(i)].req); - tx->id = id; - tx->size = new_m->m_pkthdr.len; - new_m->m_next = NULL; - new_m->m_nextpkt = NULL; - - m_freem(m_head); - tx->addr = vtomach(mtod(new_m, vm_offset_t)); - - sc->xn_cdata.xn_tx_chain[MASK_NETIF_TX_IDX(id)] = new_m; - BPF_MTAP(ifp, new_m); - } - - sc->xn_tx_if->req_prod = i; - xn_txeof(sc); - - /* Only notify Xen if we really have to. */ - if (sc->xn_tx_if->TX_TEST_IDX == start) - notify_via_evtchn(sc->xn_evtchn); - return; -} - -static void -xn_start(struct ifnet *ifp) -{ - struct xn_softc *sc; - sc = ifp->if_softc; - XN_LOCK(sc); - xn_start_locked(ifp); - XN_UNLOCK(sc); -} - - - -/* equivalent of network_open() in Linux */ -static void -xn_ifinit_locked(struct xn_softc *sc) -{ - struct ifnet *ifp; - - XN_LOCK_ASSERT(sc); - - ifp = &sc->arpcom.ac_if; - - if (ifp->if_flags & IFF_RUNNING) - return; - - xn_stop(sc); - - sc->xn_user_state = UST_OPEN; - - xn_alloc_rx_buffers(sc); - sc->xn_rx_if->event = sc->xn_rx_resp_cons + 1; - - ifp->if_flags |= IFF_RUNNING; - ifp->if_flags &= ~IFF_OACTIVE; - - callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); - -} - - -static void -xn_ifinit(void *xsc) -{ - struct xn_softc *sc = xsc; - - XN_LOCK(sc); - xn_ifinit_locked(sc); - XN_UNLOCK(sc); - -} - - -static int -xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) -{ - struct xn_softc *sc = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *) data; - int mask, error = 0; - switch(cmd) { - case SIOCSIFMTU: - /* XXX can we alter the MTU on a VN ?*/ -#ifdef notyet - if (ifr->ifr_mtu > XN_JUMBO_MTU) - error = EINVAL; - else -#endif - { - ifp->if_mtu = ifr->ifr_mtu; - ifp->if_flags &= ~IFF_RUNNING; - xn_ifinit(sc); - } - break; - case SIOCSIFFLAGS: - XN_LOCK(sc); - if (ifp->if_flags & IFF_UP) { - /* - * If only the state of the PROMISC flag changed, - * then just use the 'set promisc mode' command - * instead of reinitializing the entire NIC. Doing - * a full re-init means reloading the firmware and - * waiting for it to start up, which may take a - * second or two. - */ -#ifdef notyet - /* No promiscuous mode with Xen */ - if (ifp->if_flags & IFF_RUNNING && - ifp->if_flags & IFF_PROMISC && - !(sc->xn_if_flags & IFF_PROMISC)) { - XN_SETBIT(sc, XN_RX_MODE, - XN_RXMODE_RX_PROMISC); - } else if (ifp->if_flags & IFF_RUNNING && - !(ifp->if_flags & IFF_PROMISC) && - sc->xn_if_flags & IFF_PROMISC) { - XN_CLRBIT(sc, XN_RX_MODE, - XN_RXMODE_RX_PROMISC); - } else -#endif - xn_ifinit_locked(sc); - } else { - if (ifp->if_flags & IFF_RUNNING) { - xn_stop(sc); - } - } - sc->xn_if_flags = ifp->if_flags; - XN_UNLOCK(sc); - error = 0; - break; - case SIOCSIFCAP: - mask = ifr->ifr_reqcap ^ ifp->if_capenable; - if (mask & IFCAP_HWCSUM) { - if (IFCAP_HWCSUM & ifp->if_capenable) - ifp->if_capenable &= ~IFCAP_HWCSUM; - else - ifp->if_capenable |= IFCAP_HWCSUM; - } - error = 0; - break; - case SIOCADDMULTI: - case SIOCDELMULTI: -#ifdef notyet - if (ifp->if_flags & IFF_RUNNING) { - XN_LOCK(sc); - xn_setmulti(sc); - XN_UNLOCK(sc); - error = 0; - } -#endif - /* FALLTHROUGH */ - case SIOCSIFMEDIA: - case SIOCGIFMEDIA: - error = EINVAL; - break; - default: - error = ether_ioctl(ifp, cmd, data); - } - - return (error); -} - -static void -xn_stop(struct xn_softc *sc) -{ - struct ifnet *ifp; - - XN_LOCK_ASSERT(sc); - - ifp = &sc->arpcom.ac_if; - - callout_stop(&sc->xn_stat_ch); - - xn_free_rx_ring(sc); - xn_free_tx_ring(sc); - - ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); -} - -/* START of Xenolinux helper functions adapted to FreeBSD */ -static void -network_connect(struct xn_softc *sc, netif_fe_interface_status_t *status) -{ - struct ifnet *ifp; - int i, requeue_idx; - netif_tx_request_t *tx; - - XN_LOCK(sc); - - ifp = &sc->arpcom.ac_if; - /* first time through, setup the ifp info */ - if (ifp->if_softc == NULL) { - ifp->if_softc = sc; - if_initname(ifp, "xn", sc->xn_ifno); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; - ifp->if_ioctl = xn_ioctl; - ifp->if_output = ether_output; - ifp->if_start = xn_start; -#ifdef notyet - ifp->if_watchdog = xn_watchdog; -#endif - ifp->if_init = xn_ifinit; - ifp->if_mtu = ETHERMTU; - ifp->if_snd.ifq_maxlen = NETIF_TX_RING_SIZE - 1; - -#ifdef notyet - ifp->if_hwassist = XN_CSUM_FEATURES; - ifp->if_capabilities = IFCAP_HWCSUM; - ifp->if_capenable = ifp->if_capabilities; -#endif - - ether_ifattach(ifp, sc->arpcom.ac_enaddr); - callout_init(&sc->xn_stat_ch, CALLOUT_MPSAFE); - } - - /* Recovery procedure: */ - - /* Step 1: Reinitialise variables. */ - sc->xn_rx_resp_cons = sc->xn_tx_resp_cons = 0; - sc->xn_rxbufcnt = sc->xn_txcnt = 0; - sc->xn_rx_if->event = sc->xn_tx_if->event = 1; - - /* Step 2: Rebuild the RX and TX ring contents. - * NB. We could just free the queued TX packets now but we hope - * that sending them out might do some good. We have to rebuild - * the RX ring because some of our pages are currently flipped out - * so we can't just free the RX skbs. - */ - - /* Rebuild the TX buffer freelist and the TX ring itself. - * NB. This reorders packets. We could keep more private state - * to avoid this but maybe it doesn't matter so much given the - * interface has been down. - */ - for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ ) - { - if (sc->xn_cdata.xn_tx_chain[i] != NULL) - { - struct mbuf *m = sc->xn_cdata.xn_tx_chain[i]; - - tx = &sc->xn_tx_if->ring[requeue_idx++].req; - - tx->id = i; - tx->addr = vtomach(mtod(m, vm_offset_t)); - tx->size = m->m_pkthdr.len; - sc->xn_txcnt++; - } - } - wmb(); - sc->xn_tx_if->req_prod = requeue_idx; - - /* Rebuild the RX buffer freelist and the RX ring itself. */ - for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ ) - if (sc->xn_cdata.xn_rx_chain[i] != NULL) - sc->xn_rx_if->ring[requeue_idx++].req.id = i; - wmb(); - sc->xn_rx_if->req_prod = requeue_idx; - - printk("[XEN] Netfront recovered tx=%d rxfree=%d\n", - sc->xn_tx_if->req_prod,sc->xn_rx_if->req_prod); - - - /* Step 3: All public and private state should now be sane. Get - * ready to start sending and receiving packets and give the driver - * domain a kick because we've probably just requeued some - * packets. - */ - sc->xn_backend_state = BEST_CONNECTED; - wmb(); - notify_via_evtchn(status->evtchn); - xn_txeof(sc); - - XN_UNLOCK(sc); -} - - -static void -vif_show(struct xn_softc *sc) -{ -#if DEBUG - if (sc) { - IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n", - sc->xn_ifno, - be_state_name[sc->xn_backend_state], - sc->xn_user_state ? "open" : "closed", - sc->xn_evtchn, - sc->xn_irq, - sc->xn_tx_if, - sc->xn_rx_if); - } else { - IPRINTK("<vif NULL>\n"); - } -#endif -} - -/* Send a connect message to xend to tell it to bring up the interface. */ -static void -send_interface_connect(struct xn_softc *sc) -{ - ctrl_msg_t cmsg = { - .type = CMSG_NETIF_FE, - .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT, - .length = sizeof(netif_fe_interface_connect_t), - }; - netif_fe_interface_connect_t *msg = (void*)cmsg.msg; - - vif_show(sc); - msg->handle = sc->xn_ifno; - msg->tx_shmem_frame = (vtomach(sc->xn_tx_if) >> PAGE_SHIFT); - msg->rx_shmem_frame = (vtomach(sc->xn_rx_if) >> PAGE_SHIFT); - - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} - -/* Send a driver status notification to the domain controller. */ -static int -send_driver_status(int ok) -{ - int err = 0; - ctrl_msg_t cmsg = { - .type = CMSG_NETIF_FE, - .subtype = CMSG_NETIF_FE_DRIVER_STATUS, - .length = sizeof(netif_fe_driver_status_t), - }; - netif_fe_driver_status_t *msg = (void*)cmsg.msg; - - msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN); - err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); - return err; -} - -/* Stop network device and free tx/rx queues and irq. - */ -static void -vif_release(struct xn_softc *sc) -{ - /* Stop old i/f to prevent errors whilst we rebuild the state. */ - XN_LOCK(sc); - /* sc->xn_backend_state = BEST_DISCONNECTED; */ - XN_UNLOCK(sc); - - /* Free resources. */ - if(sc->xn_tx_if != NULL) { - unbind_evtchn_from_irq(sc->xn_evtchn); - free(sc->xn_tx_if, M_DEVBUF); - free(sc->xn_rx_if, M_DEVBUF); - sc->xn_irq = 0; - sc->xn_evtchn = 0; - sc->xn_tx_if = NULL; - sc->xn_rx_if = NULL; - } -} - -/* Release vif resources and close it down completely. - */ -static void -vif_close(struct xn_softc *sc) -{ - vif_show(sc); - WPRINTK("Unexpected netif-CLOSED message in state %s\n", - be_state_name[sc->xn_backend_state]); - vif_release(sc); - sc->xn_backend_state = BEST_CLOSED; - /* todo: take dev down and free. */ - vif_show(sc); -} - -/* Move the vif into disconnected state. - * Allocates tx/rx pages. - * Sends connect message to xend. - */ -static void -vif_disconnect(struct xn_softc *sc) -{ - if (sc->xn_tx_if) free(sc->xn_tx_if, M_DEVBUF); - if (sc->xn_rx_if) free(sc->xn_rx_if, M_DEVBUF); - - // Before this sc->xn_tx_if and sc->xn_rx_if had better be null. - sc->xn_tx_if = (netif_tx_interface_t *)malloc(PAGE_SIZE,M_DEVBUF,M_WAITOK); - sc->xn_rx_if = (netif_rx_interface_t *)malloc(PAGE_SIZE,M_DEVBUF,M_WAITOK); - memset(sc->xn_tx_if, 0, PAGE_SIZE); - memset(sc->xn_rx_if, 0, PAGE_SIZE); - sc->xn_backend_state = BEST_DISCONNECTED; - send_interface_connect(sc); - vif_show(sc); -} - -/* Begin interface recovery. - * - * NB. Whilst we're recovering, we turn the carrier state off. We - * take measures to ensure that this device isn't used for - * anything. We also stop the queue for this device. Various - * different approaches (e.g. continuing to buffer packets) have - * been tested but don't appear to improve the overall impact on - * TCP connections. - * - * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery - * is initiated by a special "RESET" message - disconnect could - * just mean we're not allowed to use this interface any more. - */ -static void -vif_reset(struct xn_softc *sc) -{ - IPRINTK("Attempting to reconnect network interface: handle=%u\n", - sc->xn_ifno); - vif_release(sc); - vif_disconnect(sc); - vif_show(sc); -} - -/* Move the vif into connected state. - * Sets the mac and event channel from the message. - * Binds the irq to the event channel. - */ -static void -vif_connect( - struct xn_softc *sc, netif_fe_interface_status_t *status) -{ - memcpy(sc->arpcom.ac_enaddr, status->mac, ETHER_ADDR_LEN); - network_connect(sc, status); - - sc->xn_evtchn = status->evtchn; - sc->xn_irq = bind_evtchn_to_irq(sc->xn_evtchn); - - (void)intr_add_handler("xn", sc->xn_irq, (driver_intr_t *)xn_intr, sc, - INTR_TYPE_NET | INTR_MPSAFE, &sc->xn_intrhand); - netctrl_connected_count(); - /* vif_wake(dev); Not needed for FreeBSD */ - vif_show(sc); -} - -/** Create a network device. - * @param handle device handle - */ -static void -create_netdev(int handle, struct xn_softc **sc) -{ - int i; - - *sc = (struct xn_softc *)malloc(sizeof(**sc), M_DEVBUF, M_WAITOK); - memset(*sc, 0, sizeof(struct xn_softc)); - - (*sc)->xn_backend_state = BEST_CLOSED; - (*sc)->xn_user_state = UST_CLOSED; - (*sc)->xn_ifno = handle; - - XN_LOCK_INIT(*sc, "xnetif"); - (*sc)->xn_rx_target = RX_MIN_TARGET; - - /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ ) - (*sc)->xn_tx_free_idxs[i] = (i+1); - for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ ) - (*sc)->xn_rx_free_idxs[i] = (i+1); - - SLIST_INSERT_HEAD(&xn_dev_list, *sc, xn_links); -} - -/* Get the target interface for a status message. - * Creates the interface when it makes sense. - * The returned interface may be null when there is no error. - * - * @param status status message - * @param sc return parameter for interface state - * @return 0 on success, error code otherwise - */ -static int -target_vif(netif_fe_interface_status_t *status, struct xn_softc **sc) -{ - int err = 0; - - XENPRINTF("> handle=%d\n", status->handle); - if ( status->handle < 0 ) - { - err = -EINVAL; - goto exit; - } - - if ( (*sc = find_sc_by_handle(status->handle)) != NULL ) - goto exit; - - if ( status->status == NETIF_INTERFACE_STATUS_CLOSED ) - goto exit; - if ( status->status == NETIF_INTERFACE_STATUS_CHANGED ) - goto exit; - - /* It's a new interface in a good state - create it. */ - XENPRINTF("> create device...\n"); - create_netdev(status->handle, sc); - netctrl.interface_n++; - -exit: - return err; -} - -/* Handle an interface status message. */ -static void -netif_interface_status(netif_fe_interface_status_t *status) -{ - int err = 0; - struct xn_softc *sc = NULL; - - XENPRINTF("> status=%s handle=%d\n", - status_name[status->status], status->handle); - - if ( (err = target_vif(status, &sc)) != 0 ) - { - WPRINTK("Invalid netif: handle=%u\n", status->handle); - return; - } - - if ( sc == NULL ) - { - XENPRINTF("> no vif\n"); - return; - } - - vif_show(sc); - - switch ( status->status ) - { - case NETIF_INTERFACE_STATUS_CLOSED: - switch ( sc->xn_backend_state ) - { - case BEST_CLOSED: - case BEST_DISCONNECTED: - case BEST_CONNECTED: - vif_close(sc); - break; - } - break; - - case NETIF_INTERFACE_STATUS_DISCONNECTED: - switch ( sc->xn_backend_state ) - { - case BEST_CLOSED: - vif_disconnect(sc); - break; - case BEST_DISCONNECTED: - case BEST_CONNECTED: - vif_reset(sc); - break; - } - break; - - case NETIF_INTERFACE_STATUS_CONNECTED: - switch ( sc->xn_backend_state ) - { - case BEST_CLOSED: - WPRINTK("Unexpected netif status %s in state %s\n", - status_name[status->status], - be_state_name[sc->xn_backend_state]); - vif_disconnect(sc); - vif_connect(sc, status); - break; - case BEST_DISCONNECTED: - vif_connect(sc, status); - break; - } - break; - - case NETIF_INTERFACE_STATUS_CHANGED: - /* - * The domain controller is notifying us that a device has been - * added or removed. - */ - break; - - default: - WPRINTK("Invalid netif status code %d\n", status->status); - break; - } - vif_show(sc); -} - -/* - * Initialize the network control interface. - */ -static void -netif_driver_status(netif_fe_driver_status_t *status) -{ - XENPRINTF("> status=%d\n", status->status); - netctrl.up = status->status; - //netctrl.interface_n = status->max_handle; - //netctrl.connected_n = 0; - netctrl_connected_count(); -} - -/* Receive handler for control messages. */ -static void -netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - switch ( msg->subtype ) - { - case CMSG_NETIF_FE_INTERFACE_STATUS: - if ( msg->length != sizeof(netif_fe_interface_status_t) ) - goto error; - netif_interface_status((netif_fe_interface_status_t *) - &msg->msg[0]); - break; - - case CMSG_NETIF_FE_DRIVER_STATUS: - if ( msg->length != sizeof(netif_fe_driver_status_t) ) - goto error; - netif_driver_status((netif_fe_driver_status_t *) - &msg->msg[0]); - break; - - error: - default: - msg->length = 0; - break; - } - - ctrl_if_send_response(msg); -} - -#if 1 -/* Wait for all interfaces to be connected. - * - * This works OK, but we'd like to use the probing mode (see below). - */ -static int probe_interfaces(void) -{ - int err = 0, conn = 0; - int wait_i, wait_n = 100; - for ( wait_i = 0; wait_i < wait_n; wait_i++) - { - XENPRINTF("> wait_i=%d\n", wait_i); - conn = netctrl_connected(); - if(conn) break; - tsleep(&xn_dev_list, PWAIT | PCATCH, "netif", hz); - } - - XENPRINTF("> wait finished...\n"); - if ( conn <= 0 ) - { - err = netctrl_err(-ENETDOWN); - WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err); - } - - XENPRINTF("< err=%d\n", err); - - return err; -} -#else -/* Probe for interfaces until no more are found. - * - * This is the mode we'd like to use, but at the moment it panics the kernel. -*/ -static int -probe_interfaces(void) -{ - int err = 0; - int wait_i, wait_n = 100; - ctrl_msg_t cmsg = { - .type = CMSG_NETIF_FE, - .subtype = CMSG_NETIF_FE_INTERFACE_STATUS, - .length = sizeof(netif_fe_interface_status_t), - }; - netif_fe_interface_status_t msg = {}; - ctrl_msg_t rmsg = {}; - netif_fe_interface_status_t *reply = (void*)rmsg.msg; - int state = TASK_UNINTERRUPTIBLE; - uint32_t query = -1; - - - netctrl.interface_n = 0; - for ( wait_i = 0; wait_i < wait_n; wait_i++ ) - { - XENPRINTF("> wait_i=%d query=%d\n", wait_i, query); - msg.handle = query; - memcpy(cmsg.msg, &msg, sizeof(msg)); - XENPRINTF("> set_current_state...\n"); - set_current_state(state); - XENPRINTF("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply); - XENPRINTF("> sending...\n"); - err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state); - XENPRINTF("> err=%d\n", err); - if(err) goto exit; - XENPRINTF("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply); - if((int)reply->handle < 0){ - // No more interfaces. - break; - } - query = -reply->handle - 2; - XENPRINTF(">netif_interface_status ...\n"); - netif_interface_status(reply); - } - - exit: - if ( err ) - { - err = netctrl_err(-ENETDOWN); - WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err); - } - - XENPRINTF("< err=%d\n", err); - return err; -} - -#endif - -static void -xn_init(void *unused) -{ - - int err = 0; - - netctrl_init(); - (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - send_driver_status(1); - err = probe_interfaces(); - - if (err) - ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx); -} - -SYSINIT(xndev, SI_SUB_PSEUDO, SI_ORDER_ANY, xn_init, NULL) diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/kern/kern_fork.c --- a/freebsd-5.3-xen-sparse/kern/kern_fork.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,846 +0,0 @@ -/* - * Copyright (c) 1982, 1986, 1989, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/kern_fork.c,v 1.234.2.4 2004/09/18 04:11:35 julian Exp $"); - -#include "opt_ktrace.h" -#include "opt_mac.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/sysproto.h> -#include <sys/eventhandler.h> -#include <sys/filedesc.h> -#include <sys/kernel.h> -#include <sys/kthread.h> -#include <sys/sysctl.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/pioctl.h> -#include <sys/resourcevar.h> -#include <sys/sched.h> -#include <sys/syscall.h> -#include <sys/vmmeter.h> -#include <sys/vnode.h> -#include <sys/acct.h> -#include <sys/mac.h> -#include <sys/ktr.h> -#include <sys/ktrace.h> -#include <sys/unistd.h> -#include <sys/sx.h> - -#include <vm/vm.h> -#include <vm/pmap.h> -#include <vm/vm_map.h> -#include <vm/vm_extern.h> -#include <vm/uma.h> - -#include <sys/user.h> -#include <machine/critical.h> - -#ifndef _SYS_SYSPROTO_H_ -struct fork_args { - int dummy; -}; -#endif - -static int forksleep; /* Place for fork1() to sleep on. */ - -/* - * MPSAFE - */ -/* ARGSUSED */ -int -fork(td, uap) - struct thread *td; - struct fork_args *uap; -{ - int error; - struct proc *p2; - - error = fork1(td, RFFDG | RFPROC, 0, &p2); - if (error == 0) { - td->td_retval[0] = p2->p_pid; - td->td_retval[1] = 0; - } - return (error); -} - -/* - * MPSAFE - */ -/* ARGSUSED */ -int -vfork(td, uap) - struct thread *td; - struct vfork_args *uap; -{ - int error; - struct proc *p2; - - error = fork1(td, RFFDG | RFPROC /* | RFPPWAIT | RFMEM */, 0, &p2); - if (error == 0) { - td->td_retval[0] = p2->p_pid; - td->td_retval[1] = 0; - } - return (error); -} - -/* - * MPSAFE - */ -int -rfork(td, uap) - struct thread *td; - struct rfork_args *uap; -{ - struct proc *p2; - int error; - - /* Don't allow kernel-only flags. */ - if ((uap->flags & RFKERNELONLY) != 0) - return (EINVAL); - - error = fork1(td, uap->flags, 0, &p2); - if (error == 0) { - td->td_retval[0] = p2 ? p2->p_pid : 0; - td->td_retval[1] = 0; - } - return (error); -} - -int nprocs = 1; /* process 0 */ -int lastpid = 0; -SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, - "Last used PID"); - -/* - * Random component to lastpid generation. We mix in a random factor to make - * it a little harder to predict. We sanity check the modulus value to avoid - * doing it in critical paths. Don't let it be too small or we pointlessly - * waste randomness entropy, and don't let it be impossibly large. Using a - * modulus that is too big causes a LOT more process table scans and slows - * down fork processing as the pidchecked caching is defeated. - */ -static int randompid = 0; - -static int -sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) -{ - int error, pid; - - error = sysctl_wire_old_buffer(req, sizeof(int)); - if (error != 0) - return(error); - sx_xlock(&allproc_lock); - pid = randompid; - error = sysctl_handle_int(oidp, &pid, 0, req); - if (error == 0 && req->newptr != NULL) { - if (pid < 0 || pid > PID_MAX - 100) /* out of range */ - pid = PID_MAX - 100; - else if (pid < 2) /* NOP */ - pid = 0; - else if (pid < 100) /* Make it reasonable */ - pid = 100; - randompid = pid; - } - sx_xunlock(&allproc_lock); - return (error); -} - -SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); - -int -fork1(td, flags, pages, procp) - struct thread *td; - int flags; - int pages; - struct proc **procp; -{ - struct proc *p1, *p2, *pptr; - uid_t uid; - struct proc *newproc; - int ok, trypid; - static int curfail, pidchecked = 0; - static struct timeval lastfail; - struct filedesc *fd; - struct filedesc_to_leader *fdtol; - struct thread *td2; - struct ksegrp *kg2; - struct sigacts *newsigacts; - int error; - - /* Can't copy and clear. */ - if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) - return (EINVAL); - - p1 = td->td_proc; - - /* - * Here we don't create a new process, but we divorce - * certain parts of a process from itself. - */ - if ((flags & RFPROC) == 0) { - mtx_lock(&Giant); - vm_forkproc(td, NULL, NULL, flags); - mtx_unlock(&Giant); - - /* - * Close all file descriptors. - */ - if (flags & RFCFDG) { - struct filedesc *fdtmp; - FILEDESC_LOCK(td->td_proc->p_fd); - fdtmp = fdinit(td->td_proc->p_fd); - FILEDESC_UNLOCK(td->td_proc->p_fd); - fdfree(td); - p1->p_fd = fdtmp; - } - - /* - * Unshare file descriptors (from parent). - */ - if (flags & RFFDG) { - FILEDESC_LOCK(p1->p_fd); - if (p1->p_fd->fd_refcnt > 1) { - struct filedesc *newfd; - - newfd = fdcopy(td->td_proc->p_fd); - FILEDESC_UNLOCK(p1->p_fd); - fdfree(td); - p1->p_fd = newfd; - } else - FILEDESC_UNLOCK(p1->p_fd); - } - *procp = NULL; - return (0); - } - - /* - * Note 1:1 allows for forking with one thread coming out on the - * other side with the expectation that the process is about to - * exec. - */ - if (p1->p_flag & P_HADTHREADS) { - /* - * Idle the other threads for a second. - * Since the user space is copied, it must remain stable. - * In addition, all threads (from the user perspective) - * need to either be suspended or in the kernel, - * where they will try restart in the parent and will - * be aborted in the child. - */ - PROC_LOCK(p1); - if (thread_single(SINGLE_NO_EXIT)) { - /* Abort. Someone else is single threading before us. */ - PROC_UNLOCK(p1); - return (ERESTART); - } - PROC_UNLOCK(p1); - /* - * All other activity in this process - * is now suspended at the user boundary, - * (or other safe places if we think of any). - */ - } - - /* Allocate new proc. */ - newproc = uma_zalloc(proc_zone, M_WAITOK); -#ifdef MAC - mac_init_proc(newproc); -#endif - knlist_init(&newproc->p_klist, &newproc->p_mtx); - - /* We have to lock the process tree while we look for a pid. */ - sx_slock(&proctree_lock); - - /* - * Although process entries are dynamically created, we still keep - * a global limit on the maximum number we will create. Don't allow - * a nonprivileged user to use the last ten processes; don't let root - * exceed the limit. The variable nprocs is the current number of - * processes, maxproc is the limit. - */ - sx_xlock(&allproc_lock); - uid = td->td_ucred->cr_ruid; - if ((nprocs >= maxproc - 10 && - suser_cred(td->td_ucred, SUSER_RUID) != 0) || - nprocs >= maxproc) { - error = EAGAIN; - goto fail; - } - - /* - * Increment the count of procs running with this uid. Don't allow - * a nonprivileged user to exceed their current limit. - */ - PROC_LOCK(p1); - ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, - (uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0); - PROC_UNLOCK(p1); - if (!ok) { - error = EAGAIN; - goto fail; - } - - /* - * Increment the nprocs resource before blocking can occur. There - * are hard-limits as to the number of processes that can run. - */ - nprocs++; - - /* - * Find an unused process ID. We remember a range of unused IDs - * ready to use (from lastpid+1 through pidchecked-1). - * - * If RFHIGHPID is set (used during system boot), do not allocate - * low-numbered pids. - */ - trypid = lastpid + 1; - if (flags & RFHIGHPID) { - if (trypid < 10) - trypid = 10; - } else { - if (randompid) - trypid += arc4random() % randompid; - } -retry: - /* - * If the process ID prototype has wrapped around, - * restart somewhat above 0, as the low-numbered procs - * tend to include daemons that don't exit. - */ - if (trypid >= PID_MAX) { - trypid = trypid % PID_MAX; - if (trypid < 100) - trypid += 100; - pidchecked = 0; - } - if (trypid >= pidchecked) { - int doingzomb = 0; - - pidchecked = PID_MAX; - /* - * Scan the active and zombie procs to check whether this pid - * is in use. Remember the lowest pid that's greater - * than trypid, so we can avoid checking for a while. - */ - p2 = LIST_FIRST(&allproc); -again: - for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) { - PROC_LOCK(p2); - while (p2->p_pid == trypid || - (p2->p_pgrp != NULL && - (p2->p_pgrp->pg_id == trypid || - (p2->p_session != NULL && - p2->p_session->s_sid == trypid)))) { - trypid++; - if (trypid >= pidchecked) { - PROC_UNLOCK(p2); - goto retry; - } - } - if (p2->p_pid > trypid && pidchecked > p2->p_pid) - pidchecked = p2->p_pid; - if (p2->p_pgrp != NULL) { - if (p2->p_pgrp->pg_id > trypid && - pidchecked > p2->p_pgrp->pg_id) - pidchecked = p2->p_pgrp->pg_id; - if (p2->p_session != NULL && - p2->p_session->s_sid > trypid && - pidchecked > p2->p_session->s_sid) - pidchecked = p2->p_session->s_sid; - } - PROC_UNLOCK(p2); - } - if (!doingzomb) { - doingzomb = 1; - p2 = LIST_FIRST(&zombproc); - goto again; - } - } - sx_sunlock(&proctree_lock); - - /* - * RFHIGHPID does not mess with the lastpid counter during boot. - */ - if (flags & RFHIGHPID) - pidchecked = 0; - else - lastpid = trypid; - - p2 = newproc; - p2->p_state = PRS_NEW; /* protect against others */ - p2->p_pid = trypid; - LIST_INSERT_HEAD(&allproc, p2, p_list); - LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); - sx_xunlock(&allproc_lock); - - /* - * Malloc things while we don't hold any locks. - */ - if (flags & RFSIGSHARE) - newsigacts = NULL; - else - newsigacts = sigacts_alloc(); - - /* - * Copy filedesc. - */ - if (flags & RFCFDG) { - FILEDESC_LOCK(td->td_proc->p_fd); - fd = fdinit(td->td_proc->p_fd); - FILEDESC_UNLOCK(td->td_proc->p_fd); - fdtol = NULL; - } else if (flags & RFFDG) { - FILEDESC_LOCK(p1->p_fd); - fd = fdcopy(td->td_proc->p_fd); - FILEDESC_UNLOCK(p1->p_fd); - fdtol = NULL; - } else { - fd = fdshare(p1->p_fd); - if (p1->p_fdtol == NULL) - p1->p_fdtol = - filedesc_to_leader_alloc(NULL, - NULL, - p1->p_leader); - if ((flags & RFTHREAD) != 0) { - /* - * Shared file descriptor table and - * shared process leaders. - */ - fdtol = p1->p_fdtol; - FILEDESC_LOCK(p1->p_fd); - fdtol->fdl_refcount++; - FILEDESC_UNLOCK(p1->p_fd); - } else { - /* - * Shared file descriptor table, and - * different process leaders - */ - fdtol = filedesc_to_leader_alloc(p1->p_fdtol, - p1->p_fd, - p2); - } - } - /* - * Make a proc table entry for the new process. - * Start by zeroing the section of proc that is zero-initialized, - * then copy the section that is copied directly from the parent. - */ - td2 = FIRST_THREAD_IN_PROC(p2); - kg2 = FIRST_KSEGRP_IN_PROC(p2); - - /* Allocate and switch to an alternate kstack if specified. */ - if (pages != 0) - vm_thread_new_altkstack(td2, pages); - - PROC_LOCK(p2); - PROC_LOCK(p1); - -#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) - - bzero(&p2->p_startzero, - (unsigned) RANGEOF(struct proc, p_startzero, p_endzero)); - bzero(&td2->td_startzero, - (unsigned) RANGEOF(struct thread, td_startzero, td_endzero)); - bzero(&kg2->kg_startzero, - (unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero)); - - bcopy(&p1->p_startcopy, &p2->p_startcopy, - (unsigned) RANGEOF(struct proc, p_startcopy, p_endcopy)); - bcopy(&td->td_startcopy, &td2->td_startcopy, - (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); - bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy, - (unsigned) RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy)); -#undef RANGEOF - - td2->td_sigstk = td->td_sigstk; - - /* - * Duplicate sub-structures as needed. - * Increase reference counts on shared objects. - * The p_stats substruct is set in vm_forkproc. - */ - p2->p_flag = 0; - if (p1->p_flag & P_PROFIL) - startprofclock(p2); - mtx_lock_spin(&sched_lock); - p2->p_sflag = PS_INMEM; - /* - * Allow the scheduler to adjust the priority of the child and - * parent while we hold the sched_lock. - */ - sched_fork(td, td2); - - mtx_unlock_spin(&sched_lock); - p2->p_ucred = crhold(td->td_ucred); - td2->td_ucred = crhold(p2->p_ucred); /* XXXKSE */ - - pargs_hold(p2->p_args); - - if (flags & RFSIGSHARE) { - p2->p_sigacts = sigacts_hold(p1->p_sigacts); - } else { - sigacts_copy(newsigacts, p1->p_sigacts); - p2->p_sigacts = newsigacts; - } - if (flags & RFLINUXTHPN) - p2->p_sigparent = SIGUSR1; - else - p2->p_sigparent = SIGCHLD; - - p2->p_textvp = p1->p_textvp; - p2->p_fd = fd; - p2->p_fdtol = fdtol; - - /* - * p_limit is copy-on-write. Bump its refcount. - */ - p2->p_limit = lim_hold(p1->p_limit); - PROC_UNLOCK(p1); - PROC_UNLOCK(p2); - - /* Bump references to the text vnode (for procfs) */ - if (p2->p_textvp) - vref(p2->p_textvp); - - /* - * Set up linkage for kernel based threading. - */ - if ((flags & RFTHREAD) != 0) { - mtx_lock(&ppeers_lock); - p2->p_peers = p1->p_peers; - p1->p_peers = p2; - p2->p_leader = p1->p_leader; - mtx_unlock(&ppeers_lock); - PROC_LOCK(p1->p_leader); - if ((p1->p_leader->p_flag & P_WEXIT) != 0) { - PROC_UNLOCK(p1->p_leader); - /* - * The task leader is exiting, so process p1 is - * going to be killed shortly. Since p1 obviously - * isn't dead yet, we know that the leader is either - * sending SIGKILL's to all the processes in this - * task or is sleeping waiting for all the peers to - * exit. We let p1 complete the fork, but we need - * to go ahead and kill the new process p2 since - * the task leader may not get a chance to send - * SIGKILL to it. We leave it on the list so that - * the task leader will wait for this new process - * to commit suicide. - */ - PROC_LOCK(p2); - psignal(p2, SIGKILL); - PROC_UNLOCK(p2); - } else - PROC_UNLOCK(p1->p_leader); - } else { - p2->p_peers = NULL; - p2->p_leader = p2; - } - - sx_xlock(&proctree_lock); - PGRP_LOCK(p1->p_pgrp); - PROC_LOCK(p2); - PROC_LOCK(p1); - - /* - * Preserve some more flags in subprocess. P_PROFIL has already - * been preserved. - */ - p2->p_flag |= p1->p_flag & P_SUGID; - td2->td_pflags |= td->td_pflags & TDP_ALTSTACK; - SESS_LOCK(p1->p_session); - if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) - p2->p_flag |= P_CONTROLT; - SESS_UNLOCK(p1->p_session); - if (flags & RFPPWAIT) - p2->p_flag |= P_PPWAIT; - - p2->p_pgrp = p1->p_pgrp; - LIST_INSERT_AFTER(p1, p2, p_pglist); - PGRP_UNLOCK(p1->p_pgrp); - LIST_INIT(&p2->p_children); - - callout_init(&p2->p_itcallout, CALLOUT_MPSAFE); - -#ifdef KTRACE - /* - * Copy traceflag and tracefile if enabled. - */ - mtx_lock(&ktrace_mtx); - KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); - if (p1->p_traceflag & KTRFAC_INHERIT) { - p2->p_traceflag = p1->p_traceflag; - if ((p2->p_tracevp = p1->p_tracevp) != NULL) { - VREF(p2->p_tracevp); - KASSERT(p1->p_tracecred != NULL, - ("ktrace vnode with no cred")); - p2->p_tracecred = crhold(p1->p_tracecred); - } - } - mtx_unlock(&ktrace_mtx); -#endif - - /* - * If PF_FORK is set, the child process inherits the - * procfs ioctl flags from its parent. - */ - if (p1->p_pfsflags & PF_FORK) { - p2->p_stops = p1->p_stops; - p2->p_pfsflags = p1->p_pfsflags; - } - - /* - * This begins the section where we must prevent the parent - * from being swapped. - */ - _PHOLD(p1); - PROC_UNLOCK(p1); - - /* - * Attach the new process to its parent. - * - * If RFNOWAIT is set, the newly created process becomes a child - * of init. This effectively disassociates the child from the - * parent. - */ - if (flags & RFNOWAIT) - pptr = initproc; - else - pptr = p1; - p2->p_pptr = pptr; - LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); - sx_xunlock(&proctree_lock); - - /* Inform accounting that we have forked. */ - p2->p_acflag = AFORK; - PROC_UNLOCK(p2); - - /* - * Finish creating the child process. It will return via a different - * execution path later. (ie: directly into user mode) - */ - mtx_lock(&Giant); - vm_forkproc(td, p2, td2, flags); - - if (flags == (RFFDG | RFPROC)) { - cnt.v_forks++; - cnt.v_forkpages += p2->p_vmspace->vm_dsize + - p2->p_vmspace->vm_ssize; - } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { - cnt.v_vforks++; - cnt.v_vforkpages += p2->p_vmspace->vm_dsize + - p2->p_vmspace->vm_ssize; - } else if (p1 == &proc0) { - cnt.v_kthreads++; - cnt.v_kthreadpages += p2->p_vmspace->vm_dsize + - p2->p_vmspace->vm_ssize; - } else { - cnt.v_rforks++; - cnt.v_rforkpages += p2->p_vmspace->vm_dsize + - p2->p_vmspace->vm_ssize; - } - mtx_unlock(&Giant); - - /* - * Both processes are set up, now check if any loadable modules want - * to adjust anything. - * What if they have an error? XXX - */ - EVENTHANDLER_INVOKE(process_fork, p1, p2, flags); - - /* - * Set the child start time and mark the process as being complete. - */ - microuptime(&p2->p_stats->p_start); - mtx_lock_spin(&sched_lock); - p2->p_state = PRS_NORMAL; - - /* - * If RFSTOPPED not requested, make child runnable and add to - * run queue. - */ - if ((flags & RFSTOPPED) == 0) { - TD_SET_CAN_RUN(td2); - setrunqueue(td2, SRQ_BORING); - } - mtx_unlock_spin(&sched_lock); - - /* - * Now can be swapped. - */ - PROC_LOCK(p1); - _PRELE(p1); - - /* - * Tell any interested parties about the new process. - */ - KNOTE_LOCKED(&p1->p_klist, NOTE_FORK | p2->p_pid); - - PROC_UNLOCK(p1); - - /* - * Preserve synchronization semantics of vfork. If waiting for - * child to exec or exit, set P_PPWAIT on child, and sleep on our - * proc (in case of exit). - */ - PROC_LOCK(p2); - while (p2->p_flag & P_PPWAIT) - msleep(p1, &p2->p_mtx, PWAIT, "ppwait", 0); - PROC_UNLOCK(p2); - - /* - * If other threads are waiting, let them continue now. - */ - if (p1->p_flag & P_HADTHREADS) { - PROC_LOCK(p1); - thread_single_end(); - PROC_UNLOCK(p1); - } - - /* - * Return child proc pointer to parent. - */ - *procp = p2; - return (0); -fail: - sx_sunlock(&proctree_lock); - if (ppsratecheck(&lastfail, &curfail, 1)) - printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n", - uid); - sx_xunlock(&allproc_lock); -#ifdef MAC - mac_destroy_proc(newproc); -#endif - uma_zfree(proc_zone, newproc); - if (p1->p_flag & P_HADTHREADS) { - PROC_LOCK(p1); - thread_single_end(); - PROC_UNLOCK(p1); - } - tsleep(&forksleep, PUSER, "fork", hz / 2); - return (error); -} - -/* - * Handle the return of a child process from fork1(). This function - * is called from the MD fork_trampoline() entry point. - */ -void -fork_exit(callout, arg, frame) - void (*callout)(void *, struct trapframe *); - void *arg; - struct trapframe *frame; -{ - struct proc *p; - struct thread *td; - - /* - * Finish setting up thread glue so that it begins execution in a - * non-nested critical section with sched_lock held but not recursed. - */ - td = curthread; - p = td->td_proc; - td->td_oncpu = PCPU_GET(cpuid); - KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new")); - - sched_lock.mtx_lock = (uintptr_t)td; - mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); - cpu_critical_fork_exit(); - CTR4(KTR_PROC, "fork_exit: new thread %p (kse %p, pid %d, %s)", - td, td->td_sched, p->p_pid, p->p_comm); - - /* - * Processes normally resume in mi_switch() after being - * cpu_switch()'ed to, but when children start up they arrive here - * instead, so we must do much the same things as mi_switch() would. - */ - - if ((td = PCPU_GET(deadthread))) { - PCPU_SET(deadthread, NULL); - thread_stash(td); - } - td = curthread; - mtx_unlock_spin(&sched_lock); - - /* - * cpu_set_fork_handler intercepts this function call to - * have this call a non-return function to stay in kernel mode. - * initproc has its own fork handler, but it does return. - */ - KASSERT(callout != NULL, ("NULL callout in fork_exit")); - callout(arg, frame); - - /* - * Check if a kernel thread misbehaved and returned from its main - * function. - */ - PROC_LOCK(p); - if (p->p_flag & P_KTHREAD) { - PROC_UNLOCK(p); - printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", - p->p_comm, p->p_pid); - kthread_exit(0); - } - PROC_UNLOCK(p); - mtx_assert(&Giant, MA_NOTOWNED); -} - -/* - * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. Giant is not held on entry, and must not - * be held on return. This function is passed in to fork_exit() as the - * first parameter and is called when returning to a new userland process. - */ -void -fork_return(td, frame) - struct thread *td; - struct trapframe *frame; -{ - - userret(td, frame, 0); -#ifdef KTRACE - if (KTRPOINT(td, KTR_SYSRET)) - ktrsysret(SYS_fork, 0, 0); -#endif - mtx_assert(&Giant, MA_NOTOWNED); -} diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/kern/kern_shutdown.c --- a/freebsd-5.3-xen-sparse/kern/kern_shutdown.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,635 +0,0 @@ -/*- - * Copyright (c) 1986, 1988, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/kern_shutdown.c,v 1.163.2.2 2004/09/10 00:04:17 scottl Exp $"); - -#include "opt_kdb.h" -#include "opt_hw_wdog.h" -#include "opt_mac.h" -#include "opt_panic.h" -#include "opt_show_busybufs.h" -#include "opt_sched.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/bio.h> -#include <sys/buf.h> -#include <sys/conf.h> -#include <sys/cons.h> -#include <sys/eventhandler.h> -#include <sys/kdb.h> -#include <sys/kernel.h> -#include <sys/kthread.h> -#include <sys/mac.h> -#include <sys/malloc.h> -#include <sys/mount.h> -#include <sys/proc.h> -#include <sys/reboot.h> -#include <sys/resourcevar.h> -#include <sys/smp.h> /* smp_active */ -#include <sys/sysctl.h> -#include <sys/sysproto.h> -#include <sys/vnode.h> - -#include <machine/cpu.h> -#include <machine/pcb.h> -#include <machine/smp.h> - -#include <sys/signalvar.h> - -#ifndef PANIC_REBOOT_WAIT_TIME -#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ -#endif - -/* - * Note that stdarg.h and the ANSI style va_start macro is used for both - * ANSI and traditional C compilers. - */ -#include <machine/stdarg.h> - -#ifdef KDB -#ifdef KDB_UNATTENDED -int debugger_on_panic = 0; -#else -int debugger_on_panic = 1; -#endif -SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW, - &debugger_on_panic, 0, "Run debugger on kernel panic"); - -#ifdef KDB_TRACE -int trace_on_panic = 1; -#else -int trace_on_panic = 0; -#endif -SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW, - &trace_on_panic, 0, "Print stack trace on kernel panic"); -#endif /* KDB */ - -int sync_on_panic = 0; -SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW, - &sync_on_panic, 0, "Do a sync before rebooting from a panic"); - -SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); - -#ifdef HW_WDOG -/* - * If there is a hardware watchdog, point this at the function needed to - * hold it off. - * It's needed when the kernel needs to do some lengthy operations. - * e.g. in wd.c when dumping core.. It's most annoying to have - * your precious core-dump only half written because the wdog kicked in. - */ -watchdog_tickle_fn wdog_tickler = NULL; -#endif /* HW_WDOG */ - -/* - * Variable panicstr contains argument to first call to panic; used as flag - * to indicate that the kernel has already called panic. - */ -const char *panicstr; - -int dumping; /* system is dumping */ -static struct dumperinfo dumper; /* our selected dumper */ - -/* Context information for dump-debuggers. */ -static struct pcb dumppcb; /* Registers. */ -static lwpid_t dumptid; /* Thread ID. */ - -static void boot(int) __dead2; -static void poweroff_wait(void *, int); -static void shutdown_halt(void *junk, int howto); -static void shutdown_panic(void *junk, int howto); -static void shutdown_reset(void *junk, int howto); - -/* register various local shutdown events */ -static void -shutdown_conf(void *unused) -{ - - EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, - SHUTDOWN_PRI_FIRST); - EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, - SHUTDOWN_PRI_LAST + 100); - EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, - SHUTDOWN_PRI_LAST + 100); - EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, - SHUTDOWN_PRI_LAST + 200); -} - -SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL) - -/* - * The system call that results in a reboot - * - * MPSAFE - */ -/* ARGSUSED */ -int -reboot(struct thread *td, struct reboot_args *uap) -{ - int error; - - error = 0; -#ifdef MAC - error = mac_check_system_reboot(td->td_ucred, uap->opt); -#endif - if (error == 0) - error = suser(td); - if (error == 0) { - mtx_lock(&Giant); - boot(uap->opt); - mtx_unlock(&Giant); - } - return (error); -} - -/* - * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC - */ -static int shutdown_howto = 0; - -void -shutdown_nice(int howto) -{ - - shutdown_howto = howto; - - /* Send a signal to init(8) and have it shutdown the world */ - if (initproc != NULL) { - PROC_LOCK(initproc); - psignal(initproc, SIGINT); - PROC_UNLOCK(initproc); - } else { - /* No init(8) running, so simply reboot */ - boot(RB_NOSYNC); - } - return; -} -static int waittime = -1; - -static void -print_uptime(void) -{ - int f; - struct timespec ts; - - getnanouptime(&ts); - printf("Uptime: "); - f = 0; - if (ts.tv_sec >= 86400) { - printf("%ldd", (long)ts.tv_sec / 86400); - ts.tv_sec %= 86400; - f = 1; - } - if (f || ts.tv_sec >= 3600) { - printf("%ldh", (long)ts.tv_sec / 3600); - ts.tv_sec %= 3600; - f = 1; - } - if (f || ts.tv_sec >= 60) { - printf("%ldm", (long)ts.tv_sec / 60); - ts.tv_sec %= 60; - f = 1; - } - printf("%lds\n", (long)ts.tv_sec); -} - -static void -doadump(void) -{ - - /* - * Sometimes people have to call this from the kernel debugger. - * (if 'panic' can not dump) - * Give them a clue as to why they can't dump. - */ - if (dumper.dumper == NULL) { - printf("Cannot dump. No dump device defined.\n"); - return; - } - - savectx(&dumppcb); - dumptid = curthread->td_tid; - dumping++; - dumpsys(&dumper); -} - -/* - * Go through the rigmarole of shutting down.. - * this used to be in machdep.c but I'll be dammned if I could see - * anything machine dependant in it. - */ -static void -boot(int howto) -{ - static int first_buf_printf = 1; - - /* collect extra flags that shutdown_nice might have set */ - howto |= shutdown_howto; - - /* We are out of the debugger now. */ - kdb_active = 0; - -#ifdef SMP - if (smp_active) - printf("boot() called on cpu#%d\n", PCPU_GET(cpuid)); -#endif - /* - * Do any callouts that should be done BEFORE syncing the filesystems. - */ - EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); - - /* - * Now sync filesystems - */ - if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { - register struct buf *bp; - int iter, nbusy, pbusy; -#ifndef PREEMPTION - int subiter; -#endif - - waittime = 0; - - sync(&thread0, NULL); - - /* - * With soft updates, some buffers that are - * written will be remarked as dirty until other - * buffers are written. - */ - for (iter = pbusy = 0; iter < 20; iter++) { - nbusy = 0; - for (bp = &buf[nbuf]; --bp >= buf; ) { - if ((bp->b_flags & B_INVAL) == 0 && - BUF_REFCNT(bp) > 0) { - nbusy++; - } else if ((bp->b_flags & (B_DELWRI | B_INVAL)) - == B_DELWRI) { - /* bawrite(bp);*/ - nbusy++; - } - } - if (nbusy == 0) { - if (first_buf_printf) - printf("No buffers busy after final sync"); - break; - } - if (first_buf_printf) { - printf("Syncing disks, buffers remaining... "); - first_buf_printf = 0; - } - printf("%d ", nbusy); - if (nbusy < pbusy) - iter = 0; - pbusy = nbusy; - sync(&thread0, NULL); - -#ifdef PREEMPTION - /* - * Drop Giant and spin for a while to allow - * interrupt threads to run. - */ - DROP_GIANT(); - DELAY(50000 * iter); - PICKUP_GIANT(); -#else - /* - * Drop Giant and context switch several times to - * allow interrupt threads to run. - */ - DROP_GIANT(); - for (subiter = 0; subiter < 50 * iter; subiter++) { - mtx_lock_spin(&sched_lock); - mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); - DELAY(1000); - } - PICKUP_GIANT(); -#endif - } - printf("\n"); - /* - * Count only busy local buffers to prevent forcing - * a fsck if we're just a client of a wedged NFS server - */ - nbusy = 0; - for (bp = &buf[nbuf]; --bp >= buf; ) { - if (((bp->b_flags&B_INVAL) == 0 && BUF_REFCNT(bp)) || - ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) { - if (bp->b_dev == NULL) { - TAILQ_REMOVE(&mountlist, - bp->b_vp->v_mount, mnt_list); - continue; - } - nbusy++; -#if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC) - printf( - "%d: dev:%s, flags:%0x, blkno:%ld, lblkno:%ld\n", - nbusy, devtoname(bp->b_dev), - bp->b_flags, (long)bp->b_blkno, - (long)bp->b_lblkno); -#endif - } - } - if (nbusy) { - /* - * Failed to sync all blocks. Indicate this and don't - * unmount filesystems (thus forcing an fsck on reboot). - */ - printf("Giving up on %d buffers\n", nbusy); - DELAY(5000000); /* 5 seconds */ - } else { - if (!first_buf_printf) - printf("Final sync complete\n"); - /* - * Unmount filesystems - */ - if (panicstr == 0) - vfs_unmountall(); - } - DELAY(100000); /* wait for console output to finish */ - } - - print_uptime(); - - /* - * Ok, now do things that assume all filesystem activity has - * been completed. - */ - EVENTHANDLER_INVOKE(shutdown_post_sync, howto); - splhigh(); - if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) - doadump(); - - /* Now that we're going to really halt the system... */ - EVENTHANDLER_INVOKE(shutdown_final, howto); - - for(;;) ; /* safety against shutdown_reset not working */ - /* NOTREACHED */ -} - -/* - * If the shutdown was a clean halt, behave accordingly. - */ -static void -shutdown_halt(void *junk, int howto) -{ - - if (howto & RB_HALT) { - printf("\n"); - printf("The operating system has halted.\n"); - printf("Please press any key to reboot.\n\n"); - switch (cngetc()) { - case -1: /* No console, just die */ - cpu_halt(); - /* NOTREACHED */ - default: - howto &= ~RB_HALT; - break; - } - } -} - -/* - * Check to see if the system paniced, pause and then reboot - * according to the specified delay. - */ -static void -shutdown_panic(void *junk, int howto) -{ - int loop; - - if (howto & RB_DUMP) { - if (PANIC_REBOOT_WAIT_TIME != 0) { - if (PANIC_REBOOT_WAIT_TIME != -1) { - printf("Automatic reboot in %d seconds - " - "press a key on the console to abort\n", - PANIC_REBOOT_WAIT_TIME); - for (loop = PANIC_REBOOT_WAIT_TIME * 10; - loop > 0; --loop) { - DELAY(1000 * 100); /* 1/10th second */ - /* Did user type a key? */ - if (cncheckc() != -1) - break; - } - if (!loop) - return; - } - } else { /* zero time specified - reboot NOW */ - return; - } - printf("--> Press a key on the console to reboot,\n"); - printf("--> or switch off the system now.\n"); - cngetc(); - } -} - -/* - * Everything done, now reset - */ -static void -shutdown_reset(void *junk, int howto) -{ - - printf("Rebooting...\n"); - DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ - /* cpu_boot(howto); */ /* doesn't do anything at the moment */ - cpu_reset(); - /* NOTREACHED */ /* assuming reset worked */ -} - -#ifdef SMP -static u_int panic_cpu = NOCPU; -#endif - -/* - * Panic is called on unresolvable fatal errors. It prints "panic: mesg", - * and then reboots. If we are called twice, then we avoid trying to sync - * the disks as this often leads to recursive panics. - * - * MPSAFE - */ -void -panic(const char *fmt, ...) -{ - struct thread *td = curthread; - int bootopt, newpanic; - va_list ap; - static char buf[256]; - -#ifdef SMP - /* - * We don't want multiple CPU's to panic at the same time, so we - * use panic_cpu as a simple spinlock. We have to keep checking - * panic_cpu if we are spinning in case the panic on the first - * CPU is canceled. - */ - if (panic_cpu != PCPU_GET(cpuid)) - while (atomic_cmpset_int(&panic_cpu, NOCPU, - PCPU_GET(cpuid)) == 0) - while (panic_cpu != NOCPU) - ; /* nothing */ -#endif - - bootopt = RB_AUTOBOOT | RB_DUMP; - newpanic = 0; - if (panicstr) - bootopt |= RB_NOSYNC; - else { - panicstr = fmt; - newpanic = 1; - } - - va_start(ap, fmt); - if (newpanic) { - (void)vsnprintf(buf, sizeof(buf), fmt, ap); - panicstr = buf; - printf("panic: %s\n", buf); - } else { - printf("panic: "); - vprintf(fmt, ap); - printf("\n"); - } - va_end(ap); -#ifdef SMP - printf("cpuid = %d\n", PCPU_GET(cpuid)); -#endif - -#ifdef KDB - if (newpanic && trace_on_panic) - kdb_backtrace(); - if (debugger_on_panic) - kdb_enter("panic"); -#ifdef RESTARTABLE_PANICS - /* See if the user aborted the panic, in which case we continue. */ - if (panicstr == NULL) { -#ifdef SMP - atomic_store_rel_int(&panic_cpu, NOCPU); -#endif - return; - } -#endif -#endif - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_INPANIC; - mtx_unlock_spin(&sched_lock); - if (!sync_on_panic) - bootopt |= RB_NOSYNC; -#ifdef XEN - HYPERVISOR_crash(); -#else - boot(bootopt); -#endif -} - -/* - * Support for poweroff delay. - */ -#ifndef POWEROFF_DELAY -# define POWEROFF_DELAY 5000 -#endif -static int poweroff_delay = POWEROFF_DELAY; - -SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, - &poweroff_delay, 0, ""); - -static void -poweroff_wait(void *junk, int howto) -{ - - if (!(howto & RB_POWEROFF) || poweroff_delay <= 0) - return; - DELAY(poweroff_delay * 1000); -} - -/* - * Some system processes (e.g. syncer) need to be stopped at appropriate - * points in their main loops prior to a system shutdown, so that they - * won't interfere with the shutdown process (e.g. by holding a disk buf - * to cause sync to fail). For each of these system processes, register - * shutdown_kproc() as a handler for one of shutdown events. - */ -static int kproc_shutdown_wait = 60; -SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, - &kproc_shutdown_wait, 0, ""); - -void -kproc_shutdown(void *arg, int howto) -{ - struct proc *p; - char procname[MAXCOMLEN + 1]; - int error; - - if (panicstr) - return; - - p = (struct proc *)arg; - strlcpy(procname, p->p_comm, sizeof(procname)); - printf("Waiting (max %d seconds) for system process `%s' to stop...", - kproc_shutdown_wait, procname); - error = kthread_suspend(p, kproc_shutdown_wait * hz); - - if (error == EWOULDBLOCK) - printf("timed out\n"); - else - printf("done\n"); -} - -/* Registration of dumpers */ -int -set_dumper(struct dumperinfo *di) -{ - - if (di == NULL) { - bzero(&dumper, sizeof dumper); - return (0); - } - if (dumper.dumper != NULL) - return (EBUSY); - dumper = *di; - return (0); -} - -#if defined(__powerpc__) -void -dumpsys(struct dumperinfo *di __unused) -{ - - printf("Kernel dumps not implemented on this architecture\n"); -} -#endif diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/mkbuildtree --- a/freebsd-5.3-xen-sparse/mkbuildtree Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,119 +0,0 @@ -#!/bin/bash - -# mkbuildtree <build tree> -# -# Creates symbolic links in <build tree> for the sparse tree -# in the current directory. - -# Script to determine the relative path between two directories. -# Copyright (c) D. J. Hawkey Jr. 2002 -# Fixed for Xen project by K. Fraser in 2003. -abs_to_rel () -{ - local CWD SRCPATH - - if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then - SRCPATH=${1%?} - else - SRCPATH=$1 - fi - if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then - DESTPATH=${2%?} - else - DESTPATH=$2 - fi - - CWD=$PWD - [ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD - [ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD - [ "$CWD" != "$PWD" ] && cd $CWD - - BASEPATH=$SRCPATH - - [ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return - [ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return - - while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do - BASEPATH=${BASEPATH%/*} - done - - SRCPATH=${SRCPATH#$BASEPATH} - DESTPATH=${DESTPATH#$BASEPATH} - DESTPATH=${DESTPATH#?} - while [ -n "$SRCPATH" ]; do - SRCPATH=${SRCPATH%/*} - DESTPATH="../$DESTPATH" - done - - [ -z "$BASEPATH" ] && BASEPATH="/" - [ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?} -} - -# relative_lndir <target_dir> -# Creates a tree of symlinks in the current working directory that mirror -# real files in <target_dir>. <target_dir> should be relative to the current -# working directory. Symlinks in <target_dir> are ignored. Source-control files -# are ignored. -relative_lndir () -{ - local SYMLINK_DIR REAL_DIR pref i j - SYMLINK_DIR=$PWD - REAL_DIR=$1 - ( - cd $REAL_DIR - for i in `find . -type d | grep -v SCCS`; do - [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i - ( - cd $i - pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'` - for j in `find . -type f -o -type l -maxdepth 1`; do - ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j - done - ) - done - ) -} - -[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; } - -# Get absolute path to the destination directory -pushd . >/dev/null -cd ${1} -AD=$PWD -popd >/dev/null - -# Get absolute path to the source directory -AS=`pwd` - -# Get name of sparse directory -SDN=$(basename $AS) - -# Get path to source, relative to destination -abs_to_rel ${AD} ${AS} -RS=$DESTPATH - -# Remove old copies of files and directories at the destination -for i in `find sys -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done - -# We now work from the destination directory -cd ${AD} - -# Remove old symlinks -find sys -type l | while read f -do - case $(readlink $f) in - */$SDN/*) - rm -f $f - ;; - esac -done - -if [ -f ${AD}/BUILDING ]; then - # Create symlinks of files and directories which exist in the sparse source - (cd sys && relative_lndir ../${RS}/sys) -else - # Create symlinks of files and directories which exist in the sparse source - relative_lndir ${RS} - rm -f mkbuildtree -fi - diff -r 64cd054aa143 -r 0255f48b757f freebsd-5.3-xen-sparse/xenfbsd_kernel_build --- a/freebsd-5.3-xen-sparse/xenfbsd_kernel_build Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,7 +0,0 @@ -#!/bin/csh -f -cd i386-xen/conf -config XENCONF -cd ../compile/XENCONF -make kernel-clean -ln -s ../../include/xen-public/io/ring.h -make kernel-depend; make -j4 kernel diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/Makefile --- a/netbsd-2.0-xen-sparse/Makefile Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,25 +0,0 @@ -# -# -# - -.PHONY: clean config install netbsd - -TOPDIR ?= $(shell pwd) -NETBSD_RELEASE ?= $(patsubst netbsd-%-xen%,%,$(notdir $(TOPDIR))) -NETBSD_VER ?= $(patsubst netbsd-%-xen%,%,$(notdir $(TOPDIR))) -NETBSD_KERNEL ?= XEN - -clean: - @mkdir -p compile/$(NETBSD_KERNEL) - cd compile/$(NETBSD_KERNEL) && TOPDIR=$(TOPDIR) NETBSD_VER=$(NETBSD_VER) ../../nbmake-xen cleandir - -config: - @mkdir -p compile/$(NETBSD_KERNEL) - cd compile/$(NETBSD_KERNEL) && TOPDIR=$(TOPDIR) NETBSD_VER=$(NETBSD_VER) ../../nbconfig-xen $(NETBSD_KERNEL) - -netbsd: - cd compile/$(NETBSD_KERNEL) && TOPDIR=$(TOPDIR) NETBSD_VER=$(NETBSD_VER) ../../nbmake-xen dependall - -install: - @mkdir -p $(dir $(INSTALL_PATH)/$(INSTALL_NAME)) - install -c compile/$(NETBSD_KERNEL)/netbsd $(INSTALL_PATH)/$(INSTALL_NAME) diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/mkbuildtree --- a/netbsd-2.0-xen-sparse/mkbuildtree Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,128 +0,0 @@ -#!/bin/bash - -# mkbuildtree <build tree> -# -# Creates symbolic links in <build tree> for the sparse tree -# in the current directory. - -# Script to determine the relative path between two directories. -# Copyright (c) D. J. Hawkey Jr. 2002 -# Fixed for Xen project by K. Fraser in 2003. -abs_to_rel () -{ - local CWD SRCPATH - - if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then - SRCPATH=${1%?} - else - SRCPATH=$1 - fi - if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then - DESTPATH=${2%?} - else - DESTPATH=$2 - fi - - CWD=$PWD - [ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD - [ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD - [ "$CWD" != "$PWD" ] && cd $CWD - - BASEPATH=$SRCPATH - - [ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return - [ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return - - while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do - BASEPATH=${BASEPATH%/*} - done - - SRCPATH=${SRCPATH#$BASEPATH} - DESTPATH=${DESTPATH#$BASEPATH} - DESTPATH=${DESTPATH#?} - while [ -n "$SRCPATH" ]; do - SRCPATH=${SRCPATH%/*} - DESTPATH="../$DESTPATH" - done - - [ -z "$BASEPATH" ] && BASEPATH="/" - [ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?} -} - -# relative_lndir <target_dir> -# Creates a tree of symlinks in the current working directory that mirror -# real files in <target_dir>. <target_dir> should be relative to the current -# working directory. Symlinks in <target_dir> are ignored. Source-control files -# are ignored. -relative_lndir () -{ - local SYMLINK_DIR REAL_DIR pref i j - SYMLINK_DIR=$PWD - REAL_DIR=$1 - ( - cd $REAL_DIR - for i in `find . -type d | grep -v SCCS`; do - [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i - ( - cd $i - pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'` - for j in `find . -type f -o -type l -maxdepth 1`; do - ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j - done - ) - done - ) -} - -[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; } - -# Get absolute path to the destination directory -pushd . >/dev/null -cd ${1} -AD=$PWD -popd >/dev/null - -# Get absolute path to the source directory -AS=`pwd` - -# Get name of sparse directory -SDN=$(basename $AS) - -# Get path to source, relative to destination -abs_to_rel ${AD} ${AS} -RS=$DESTPATH - -# Remove old copies of files and directories at the destination -for i in `find sys -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done - -# We now work from the destination directory -cd ${AD} - -# Remove old symlinks -find sys -type l | while read f -do - case $(readlink $f) in - */$SDN/*) - rm -f $f - ;; - esac -done - -if [ -f ${AD}/BUILDING ]; then - # Create symlinks of files and directories which exist in the sparse source - (cd sys && relative_lndir ../${RS}/sys) -else - # Create symlinks of files and directories which exist in the sparse source - relative_lndir ${RS} - rm -f mkbuildtree -fi - -# Create links to the shared definitions of the Xen interface -rm -rf ${AD}/sys/arch/xen/include/xen-public -mkdir ${AD}/sys/arch/xen/include/xen-public -cd ${AD}/sys/arch/xen/include/xen-public -relative_lndir ../../../../../${RS}/../xen/include/public - -# Remove files which don't exist anymore -rm -rf ${AD}/sys/arch/xen/xen/events.c -rm -rf ${AD}/sys/arch/xen/include/events.h diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/nbconfig-xen --- a/netbsd-2.0-xen-sparse/nbconfig-xen Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,27 +0,0 @@ -#! /bin/sh -# - -: ${HOS:=$(uname -s | tr /A-Z/ /a-z/)} -: ${HARCH:=$(uname -m 2>/dev/null || echo i386)} -: ${NETBSD_RELEASE:=$(basename $(cd $(dirname $0) && pwd) | sed 's/netbsd-\([0-9]\+\.[0-9]\+\).*/\1/')} -: ${NETBSD_VERSION:=$(basename $(cd $(dirname $0) && pwd) | sed 's/netbsd-\([0-9]\+\.[0-9]\+.*\)-xen.*/\1/')} -: ${TOPDIR:=$(cd $(dirname $0) && pwd | sed 's/\(netbsd-[0-9]\+\.[0-9]\+.*-xen[^/]*\)/\1/')} - -case "$HARCH" in -i586|i686) - HARCH=i386 - ;; -esac - -TOOLDIR="$TOPDIR/../netbsd-${NETBSD_RELEASE}-tools/$HOS-$HARCH"; export TOOLDIR - -CONF="$1" -case "$1" in - /*) - CONF="$1" - ;; - *) - CONF="$TOPDIR"/sys/arch/xen/conf/"$1" - ;; -esac -exec "${TOOLDIR}/bin/nbconfig" -b $(pwd) -s "$TOPDIR"/sys "$CONF" diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/nbmake-xen --- a/netbsd-2.0-xen-sparse/nbmake-xen Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,33 +0,0 @@ -#! /bin/sh -# Set proper variables to allow easy "make" building of a NetBSD subtree. -# Generated from: $NetBSD: build.sh,v 1.126 2004/02/04 11:23:40 lukem Exp $ -# - -: ${HOS:=$(uname -s | tr /A-Z/ /a-z/)} -: ${HARCH:=$(uname -m 2>/dev/null || echo i386)} -: ${NETBSD_RELEASE:=$(basename $(cd $(dirname $0) && pwd) | sed 's/netbsd-\([0-9]\+\.[0-9]\+\).*/\1/')} -: ${NETBSD_VERSION:=$(basename $(cd $(dirname $0) && pwd) | sed 's/netbsd-\([0-9]\+\.[0-9]\+.*\)-xen.*/\1/')} -: ${TOPDIR:=$(cd $(dirname $0) && pwd | sed 's/\(netbsd-[0-9]\+\.[0-9]\+.*-xen[^/]*\)/\1/')} - -case "$HARCH" in -i586|i686) - HARCH=i386 - ;; -esac - -NETBSDSRCDIR="$TOPDIR"; export NETBSDSRCDIR -DESTDIR="$TOPDIR/root"; export DESTDIR -unset MAKEOBJDIRPREFIX -MAKEOBJDIR='${.CURDIR:C,^'"$TOPDIR,$TOPDIR/obj,}"; export MAKEOBJDIR -RELEASEDIR="$TOPDIR/release"; export RELEASEDIR -MKUNPRIVED='yes'; export MKUNPRIVED -MAKEVERBOSE='1'; export MAKEVERBOSE -LC_ALL='C'; export LC_ALL -TOOLDIR="$TOPDIR/../netbsd-${NETBSD_RELEASE}-tools/$HOS-$HARCH"; export TOOLDIR -MACHINE='i386'; export MACHINE -MACHINE_ARCH='i386'; export MACHINE_ARCH -MAKEFLAGS="-de -m $TOPDIR/share/mk MKOBJDIRS=yes"; export MAKEFLAGS -BUILDID="${NETBSD_RELEASE}"; export BUILDID -USETOOLS=yes; export USETOOLS - -exec "${TOOLDIR}/bin/nbmake" ${1+"$@"} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN --- a/netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,175 +0,0 @@ -# $NetBSD: XEN,v 1.1.2.2 2004/07/15 20:19:34 he Exp $ - -include "arch/xen/conf/std.xen" - -options INCLUDE_CONFIG_FILE # embed config file in kernel binary - -#options UVMHIST -#options UVMHIST_PRINT -#options SYSCALL_DEBUG - -maxusers 32 # estimated number of users - -# -options XEN -#options DOM0OPS - -#options I586_CPU -options I686_CPU - -#options VM86 # virtual 8086 emulation -#options USER_LDT # user-settable LDT; used by WINE - -#options MTRR # memory-type range register syscall support - -#options CONSDEVNAME="\"xencons\"" -#options CONS_OVERRIDE - -options INSECURE # disable kernel security levels - X needs this - -options RTC_OFFSET=0 # hardware clock is this many mins. west of GMT -options NTP # NTP phase/frequency locked loop - -options KTRACE # system call tracing via ktrace(1) -#options SYSTRACE # system call vetting via systrace(1) - -options SYSVMSG # System V-like message queues -options SYSVSEM # System V-like semaphores -#options SEMMNI=10 # number of semaphore identifiers -#options SEMMNS=60 # number of semaphores in system -#options SEMUME=10 # max number of undo entries per process -#options SEMMNU=30 # number of undo structures in system -options SYSVSHM # System V-like memory sharing -#options SHMMAXPGS=2048 # 2048 pages is the default -options P1003_1B_SEMAPHORE # p1003.1b semaphore support - -options LKM # loadable kernel modules - -options USERCONF # userconf(4) support -options SYSCTL_INCLUDE_DESCR # Include sysctl descriptions in kernel - -# Diagnostic/debugging support options -options DIAGNOSTIC # expensive kernel consistency checks -options DEBUG # expensive debugging checks/support -options KMEMSTATS # kernel memory statistics (vmstat -m) -options DDB # in-kernel debugger -options DDB_ONPANIC=1 # see also sysctl(8): `ddb.onpanic' -options DDB_HISTORY_SIZE=512 # enable history editing in DDB -#options KGDB # remote debugger -#options KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x2f8,KGDB_DEVRATE=57600 -makeoptions DEBUG="-g" # compile full symbol table - -#options COMPAT_14 # NetBSD 1.4 -#options COMPAT_15 # NetBSD 1.5 -options COMPAT_16 # NetBSD 1.6 - -##options COMPAT_LINUX # binary compatibility with Linux -#options COMPAT_FREEBSD # binary compatibility with FreeBSD -#options COMPAT_MACH # binary compatibility with Mach binaries -#options COMPAT_DARWIN # binary compatibility with Darwin binaries -#options EXEC_MACHO # exec MACH-O binaries -#options COMPAT_PECOFF # kernel support to run Win32 apps - -file-system FFS # UFS -file-system EXT2FS # second extended file system (linux) -#file-system LFS # log-structured file system -#file-system MFS # memory file system -file-system NFS # Network File System client -#file-system NTFS # Windows/NT file system (experimental) -#file-system CD9660 # ISO 9660 + Rock Ridge file system -#file-system MSDOSFS # MS-DOS file system -file-system FDESC # /dev/fd -file-system KERNFS # /kern -file-system NULLFS # loopback file system -#file-system OVERLAY # overlay file system -#file-system PORTAL # portal filesystem (still experimental) -file-system PROCFS # /proc -#file-system UMAPFS # NULLFS + uid and gid remapping -#file-system UNION # union file system -#file-system SMBFS # experimental - CIFS; also needs nsmb (below) - -#options QUOTA # UFS quotas -#options SOFTDEP # FFS soft updates support. -#options NFSSERVER # Network File System server - -options GATEWAY # packet forwarding -options INET # IP + ICMP + TCP + UDP -options INET6 # IPV6 -options IPSEC # IP security -options IPSEC_ESP # IP security (encryption part; define w/IPSEC) -options MROUTING # IP multicast routing -options PFIL_HOOKS # pfil(9) packet filter hooks -options IPFILTER_LOG # ipmon(8) log support - -options NFS_BOOT_DHCP,NFS_BOOT_BOOTPARAM,NFS_BOOT_BOOTSTATIC -#options NFS_BOOTSTATIC_MYIP="\"169.254.1.2\"" -#options NFS_BOOTSTATIC_GWIP="\"169.254.1.1\"" -#options NFS_BOOTSTATIC_MASK="\"255.255.255.0\"" -#options NFS_BOOTSTATIC_SERVADDR="\"169.254.1.1\"" -#options NFS_BOOTSTATIC_SERVER="\"server:/path/to/root\"" - -options WSEMUL_VT100 # VT100 / VT220 emulation -options WS_KERNEL_FG=WSCOL_GREEN -options WSDISPLAY_COMPAT_PCVT # emulate some ioctls -options WSDISPLAY_COMPAT_SYSCONS # emulate some ioctls -options WSDISPLAY_COMPAT_USL # VT handling -options WSDISPLAY_COMPAT_RAWKBD # can get raw scancodes -options WSDISPLAY_DEFAULTSCREENS=4 -options PCDISPLAY_SOFTCURSOR - -config netbsd root on ? type ? -#config netbsd root on wd0a type ffs -#config netbsd root on xennet0 type nfs - -mainbus0 at root - -cpu* at mainbus? - -hypervisor* at mainbus? # Xen hypervisor - -npx0 at hypervisor? # x86 math coprocessor - -xencons* at hypervisor? # Xen virtual console -xennet* at hypervisor? # Xen virtual network interface - -xbd* at hypervisor? # Xen virtual block device -wd* at hypervisor? # Xen vbd (wd identity) -sd* at hypervisor? # Xen vbd (sd identity) -cd* at hypervisor? # Xen vbd (cd identity) - -#xenkbc* at hypervisor? # Xen Keyboard/Mouse Interface -#pckbd* at xenkbc? # Keyboard -#vga* at hypervisor? # Xen VGA display -#pms* at xenkbc? # PS/2 Mouse for wsmouse - -#wskbd* at pckbd? console ? -#wsdisplay* at vga? console ? -#wsmouse* at pms? mux 0 - - -include "arch/xen/conf/GENERIC.local" - - -pseudo-device ccd 4 # concatenated/striped disk devices -#pseudo-device cgd 4 # cryptographic disk devices -#pseudo-device md 1 # memory disk device (ramdisk) -pseudo-device vnd 4 # disk-like interface to files - -pseudo-device bpfilter 8 # Berkeley packet filter -pseudo-device ipfilter # IP filter (firewall) and NAT -pseudo-device loop # network loopback -#pseudo-device tun 2 # network tunneling over tty -#pseudo-device gre 2 # generic L3 over IP tunnel -#pseudo-device gif 4 # IPv[46] over IPv[46] tunnel (RFC1933) -#pseudo-device faith 1 # IPv[46] tcp relay translation i/f -#pseudo-device stf 1 # 6to4 IPv6 over IPv4 encapsulation -#pseudo-device vlan # IEEE 802.1q encapsulation -#pseudo-device bridge # simple inter-network bridging - -pseudo-device pty # pseudo-terminals -pseudo-device rnd # /dev/random and in-kernel generator -pseudo-device clockctl # user control of clock subsystem - -pseudo-device wsmux # mouse & keyboard multiplexor -pseudo-device wsfont -pseudo-device ksyms # /dev/ksyms diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/conf/files.xen --- a/netbsd-2.0-xen-sparse/sys/arch/xen/conf/files.xen Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,232 +0,0 @@ -# $NetBSD: files.xen,v 1.3.2.1 2004/05/22 15:59:02 he Exp $ -# NetBSD: files.x86,v 1.10 2003/10/08 17:30:00 bouyer Exp -# NetBSD: files.i386,v 1.254 2004/03/25 23:32:10 jmc Exp - -maxpartitions 8 - -maxusers 2 16 128 - -# Processor type options. -defflag opt_cputype.h I686_CPU - -# delay before cpu_reset() for reboot. -defparam CPURESET_DELAY - -# No unmapped page below kernel stack -defflag NOREDZONE - -# Beep on halt -defflag opt_beep.h BEEP_ONHALT -defparam opt_beep.h BEEP_ONHALT_COUNT -defparam opt_beep.h BEEP_ONHALT_PITCH BEEP_ONHALT_PERIOD - -file arch/xen/i386/autoconf.c -file arch/i386/i386/db_dbgreg.S ddb | kstack_check_dr0 -file arch/i386/i386/db_disasm.c ddb -file arch/i386/i386/db_interface.c ddb -file arch/i386/i386/db_memrw.c ddb | kgdb -file arch/i386/i386/db_trace.c ddb -file kern/subr_disk_mbr.c disk -file arch/xen/i386/gdt.c -file arch/xen/i386/hypervisor_machdep.c -file arch/i386/i386/in_cksum.S inet | inet6 -file arch/i386/i386/ipkdb_glue.c ipkdb -file arch/i386/i386/kgdb_machdep.c kgdb -file arch/xen/i386/machdep.c -file arch/xen/i386/identcpu.c -file arch/i386/i386/math_emulate.c math_emulate -file arch/i386/i386/mem.c -file kern/kern_microtime.c i586_cpu | i686_cpu -file arch/i386/i386/mtrr_k6.c mtrr -file netns/ns_cksum.c ns -file arch/xen/i386/pmap.c -file arch/i386/i386/process_machdep.c -file arch/i386/i386/procfs_machdep.c procfs -file arch/xen/i386/sys_machdep.c -file arch/i386/i386/syscall.c -file arch/xen/i386/trap.c -file arch/i386/i386/vm_machdep.c -file arch/xen/i386/xen_machdep.c - -file arch/xen/xen/xen_debug.c - -file arch/xen/xen/clock.c -file arch/xen/xen/evtchn.c -file arch/xen/xen/ctrl_if.c - -file dev/cons.c - -file arch/i386/i386/mptramp.S multiprocessor -file arch/i386/i386/ipifuncs.c multiprocessor - -file arch/i386/i386/pmc.c perfctrs - -file crypto/des/arch/i386/des_enc.S des -file crypto/des/arch/i386/des_cbc.S des - -file crypto/blowfish/arch/i386/bf_enc.S blowfish -file crypto/blowfish/arch/i386/bf_cbc.S blowfish & !i386_cpu - -# -# Machine-independent SCSI drivers -# - -#xxx include "dev/scsipi/files.scsipi" - -# -# Machine-independent ATA drivers -# - -#xxx include "dev/ata/files.ata" - -# Memory Disk for install floppy -file dev/md_root.c memory_disk_hooks - -# -define mainbus { [apid = -1] } - -file arch/x86/x86/bus_dma.c -file arch/xen/x86/bus_space.c -file arch/x86/x86/cacheinfo.c -file arch/xen/x86/consinit.c -file arch/xen/x86/intr.c -file arch/x86/x86/ipi.c multiprocessor -file arch/x86/x86/lock_machdep.c lockdebug -file arch/x86/x86/softintr.c - -include "arch/xen/conf/files.compat" - -# -# System bus types -# - -device mainbus: mainbus -attach mainbus at root -file arch/xen/i386/mainbus.c mainbus - -# Xen hypervisor -device hypervisor { } -attach hypervisor at mainbus -file arch/xen/xen/hypervisor.c hypervisor needs-flag - -# Numeric Processing Extension; Math Co-processor -device npx -file arch/xen/i386/npx.c npx needs-flag - -attach npx at hypervisor with npx_hv -file arch/xen/i386/npx_hv.c npx_hv - -# Xen console support -device xencons: tty -attach xencons at hypervisor -file arch/xen/xen/xencons.c xencons needs-flag - -include "dev/wscons/files.wscons" -include "dev/wsfont/files.wsfont" - -include "dev/pckbport/files.pckbport" - -# CPUS - -define cpu { [apid = -1] } -device cpu -attach cpu at mainbus -file arch/xen/i386/cpu.c cpu - -# -# Compatibility modules -# - -# VM86 mode -file arch/i386/i386/vm86.c vm86 - -# VM86 in kernel -file arch/i386/i386/kvm86.c kvm86 -file arch/i386/i386/kvm86call.S kvm86 - -# Binary compatibility with previous NetBSD releases (COMPAT_XX) -file arch/i386/i386/compat_13_machdep.c compat_13 | compat_aout -file arch/i386/i386/compat_16_machdep.c compat_16 | compat_ibcs2 - -# SVR4 binary compatibility (COMPAT_SVR4) -include "compat/svr4/files.svr4" -file arch/i386/i386/svr4_machdep.c compat_svr4 -file arch/i386/i386/svr4_sigcode.S compat_svr4 -file arch/i386/i386/svr4_syscall.c compat_svr4 - -# MACH binary compatibility (COMPAT_MACH) -include "compat/mach/files.mach" -file arch/i386/i386/mach_machdep.c compat_mach | compat_darwin -file arch/i386/i386/mach_sigcode.S compat_mach | compat_darwin -file arch/i386/i386/mach_syscall.c compat_mach | compat_darwin -file arch/i386/i386/macho_machdep.c exec_macho - -# DARWIN binary compatibility (COMPAT_DARWIN) -include "compat/darwin/files.darwin" -file arch/i386/i386/darwin_machdep.c compat_darwin - -# iBCS-2 binary compatibility (COMPAT_IBCS2) -include "compat/ibcs2/files.ibcs2" -file arch/i386/i386/ibcs2_machdep.c compat_ibcs2 -file arch/i386/i386/ibcs2_sigcode.S compat_ibcs2 -file arch/i386/i386/ibcs2_syscall.c compat_ibcs2 - -# Linux binary compatibility (COMPAT_LINUX) -include "compat/linux/files.linux" -include "compat/linux/arch/i386/files.linux_i386" -file arch/i386/i386/linux_sigcode.S compat_linux -file arch/i386/i386/linux_syscall.c compat_linux -file arch/i386/i386/linux_trap.c compat_linux - -# FreeBSD binary compatibility (COMPAT_FREEBSD) -include "compat/freebsd/files.freebsd" -file arch/i386/i386/freebsd_machdep.c compat_freebsd -file arch/i386/i386/freebsd_sigcode.S compat_freebsd -file arch/i386/i386/freebsd_syscall.c compat_freebsd - -# a.out binary compatibility (COMPAT_AOUT) -include "compat/aout/files.aout" - -# Win32 binary compatibility (COMPAT_PECOFF) -include "compat/pecoff/files.pecoff" - -# OSS audio driver compatibility -include "compat/ossaudio/files.ossaudio" - -# Xen devices - -# Network driver -device xennet: arp, ether, ifnet -attach xennet at hypervisor -file arch/xen/xen/if_xennet.c xennet needs-flag - -# Block device driver and wd/sd/cd identities -device xbd: disk -attach xbd at hypervisor -file arch/xen/xen/xbd.c xbd | wd | sd | cd needs-flag - -device wd: disk -attach wd at hypervisor - -device sd: disk -attach sd at hypervisor - -device cd: disk -attach cd at hypervisor - -# Keyboard -device xenkbc: pckbport -attach xenkbc at hypervisor -file arch/xen/xen/xenkbc.c xenkbc needs-flag - -# Generic VGA -attach vga at hypervisor with vga_xen -file arch/xen/xen/vga_xen.c vga_xen needs-flag - -# Domain-0 operations -defflag opt_xen.h DOM0OPS -file arch/xen/xen/machmem.c dom0ops -file arch/xen/xen/privcmd.c dom0ops -file arch/xen/xen/vfr.c dom0ops - -include "arch/xen/conf/majors.i386" diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/i386/autoconf.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/autoconf.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,630 +0,0 @@ -/* $NetBSD: autoconf.c,v 1.1.2.1 2004/05/22 15:57:33 he Exp $ */ -/* NetBSD: autoconf.c,v 1.75 2003/12/30 12:33:22 pk Exp */ - -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)autoconf.c 7.1 (Berkeley) 5/9/91 - */ - -/* - * Setup the system to run on the current machine. - * - * Configure() is called at boot time and initializes the vba - * device tables and the memory controller monitoring. Available - * devices are determined (from possibilities mentioned in ioconf.c), - * and the drivers are initialized. - */ - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.1.2.1 2004/05/22 15:57:33 he Exp $"); - -#include "opt_compat_oldboot.h" -#include "opt_multiprocessor.h" -#include "opt_nfs_boot.h" -#include "xennet.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/buf.h> -#include <sys/disklabel.h> -#include <sys/conf.h> -#ifdef COMPAT_OLDBOOT -#include <sys/reboot.h> -#endif -#include <sys/device.h> -#include <sys/malloc.h> -#include <sys/vnode.h> -#include <sys/fcntl.h> -#include <sys/dkio.h> -#include <sys/proc.h> -#include <sys/user.h> - -#ifdef NFS_BOOT_BOOTSTATIC -#include <net/if.h> -#include <net/if_ether.h> -#include <netinet/in.h> -#include <nfs/rpcv2.h> -#include <nfs/nfsproto.h> -#include <nfs/nfs.h> -#include <nfs/nfsmount.h> -#include <nfs/nfsdiskless.h> -#include <machine/if_xennetvar.h> -#endif - -#include <machine/pte.h> -#include <machine/cpu.h> -#include <machine/gdt.h> -#include <machine/pcb.h> -#include <machine/bootinfo.h> - -#include "ioapic.h" -#include "lapic.h" - -#if NIOAPIC > 0 -#include <machine/i82093var.h> -#endif - -#if NLAPIC > 0 -#include <machine/i82489var.h> -#endif - -static int match_harddisk(struct device *, struct btinfo_bootdisk *); -static void matchbiosdisks(void); -static void findroot(void); -static int is_valid_disk(struct device *); - -extern struct disklist *i386_alldisks; -extern int i386_ndisks; - -#include "bios32.h" -#if NBIOS32 > 0 -#include <machine/bios32.h> -#endif - -#include "opt_pcibios.h" -#ifdef PCIBIOS -#include <dev/pci/pcireg.h> -#include <dev/pci/pcivar.h> -#include <i386/pci/pcibios.h> -#endif - -#include "opt_kvm86.h" -#ifdef KVM86 -#include <machine/kvm86.h> -#endif - -#include "opt_xen.h" - -struct device *booted_device; -int booted_partition; - -/* - * Determine i/o configuration for a machine. - */ -void -cpu_configure(void) -{ - - startrtclock(); - -#if NBIOS32 > 0 - bios32_init(); -#endif -#ifdef PCIBIOS - pcibios_init(); -#endif - - /* kvm86 needs a TSS */ - i386_proc0_tss_ldt_init(); -#ifdef KVM86 - kvm86_init(); -#endif - - if (config_rootfound("mainbus", NULL) == NULL) - panic("configure: mainbus not configured"); - -#ifdef INTRDEBUG - intr_printconfig(); -#endif - -#if NIOAPIC > 0 - lapic_set_lvt(); - ioapic_enable(); -#endif - /* resync cr0 after FPU configuration */ - lwp0.l_addr->u_pcb.pcb_cr0 = rcr0(); -#ifdef MULTIPROCESSOR - /* propagate this to the idle pcb's. */ - cpu_init_idle_pcbs(); -#endif - - spl0(); -#if NLAPIC > 0 - lapic_tpr = 0; -#endif -} - -void -cpu_rootconf(void) -{ - findroot(); - matchbiosdisks(); - - printf("boot device: %s\n", - booted_device ? booted_device->dv_xname : "<unknown>"); - - setroot(booted_device, booted_partition); -} - -/* - * XXX ugly bit of code. But, this is the only safe time that the - * match between BIOS disks and native disks can be done. - */ -static void -matchbiosdisks(void) -{ - struct btinfo_biosgeom *big; - struct bi_biosgeom_entry *be; - struct device *dv; - int i, ck, error, m, n; - struct vnode *tv; - char mbr[DEV_BSIZE]; - int dklist_size; - int bmajor; - - big = lookup_bootinfo(BTINFO_BIOSGEOM); - - if (big == NULL) - return; - - /* - * First, count all native disks - */ - for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) - if (is_valid_disk(dv)) - i386_ndisks++; - - if (i386_ndisks == 0) - return; - - dklist_size = sizeof (struct disklist) + (i386_ndisks - 1) * - sizeof (struct nativedisk_info); - - /* XXX M_TEMP is wrong */ - i386_alldisks = malloc(dklist_size, M_TEMP, M_NOWAIT); - if (i386_alldisks == NULL) - return; - - memset(i386_alldisks, 0, dklist_size); - - i386_alldisks->dl_nnativedisks = i386_ndisks; - i386_alldisks->dl_nbiosdisks = big->num; - for (i = 0; i < big->num; i++) { - i386_alldisks->dl_biosdisks[i].bi_dev = big->disk[i].dev; - i386_alldisks->dl_biosdisks[i].bi_sec = big->disk[i].sec; - i386_alldisks->dl_biosdisks[i].bi_head = big->disk[i].head; - i386_alldisks->dl_biosdisks[i].bi_cyl = big->disk[i].cyl; - i386_alldisks->dl_biosdisks[i].bi_lbasecs = big->disk[i].totsec; - i386_alldisks->dl_biosdisks[i].bi_flags = big->disk[i].flags; -#ifdef GEOM_DEBUG -#ifdef NOTYET - printf("disk %x: flags %x, interface %x, device %llx\n", - big->disk[i].dev, big->disk[i].flags, - big->disk[i].interface_path, big->disk[i].device_path); -#endif -#endif - } - - /* - * XXX code duplication from findroot() - */ - n = -1; - for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) { - if (dv->dv_class != DV_DISK) - continue; -#ifdef GEOM_DEBUG - printf("matchbiosdisks: trying to match (%s) %s\n", - dv->dv_xname, dv->dv_cfdata->cf_name); -#endif - if (is_valid_disk(dv)) { - n++; - sprintf(i386_alldisks->dl_nativedisks[n].ni_devname, - "%s%d", dv->dv_cfdata->cf_name, - dv->dv_unit); - - bmajor = devsw_name2blk(dv->dv_xname, NULL, 0); - if (bmajor == -1) - return; - - if (bdevvp(MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART), - &tv)) - panic("matchbiosdisks: can't alloc vnode"); - - error = VOP_OPEN(tv, FREAD, NOCRED, 0); - if (error) { - vput(tv); - continue; - } - error = vn_rdwr(UIO_READ, tv, mbr, DEV_BSIZE, 0, - UIO_SYSSPACE, 0, NOCRED, NULL, 0); - VOP_CLOSE(tv, FREAD, NOCRED, 0); - if (error) { -#ifdef GEOM_DEBUG - printf("matchbiosdisks: %s: MBR read failure\n", - dv->dv_xname); -#endif - continue; - } - - for (ck = i = 0; i < DEV_BSIZE; i++) - ck += mbr[i]; - for (m = i = 0; i < big->num; i++) { - be = &big->disk[i]; -#ifdef GEOM_DEBUG - printf("match %s with %d ", dv->dv_xname, i); - printf("dev ck %x bios ck %x\n", ck, be->cksum); -#endif - if (be->flags & BI_GEOM_INVALID) - continue; - if (be->cksum == ck && - !memcmp(&mbr[MBR_PART_OFFSET], be->dosparts, - MBR_PART_COUNT * - sizeof (struct mbr_partition))) { -#ifdef GEOM_DEBUG - printf("matched bios disk %x with %s\n", - be->dev, dv->dv_xname); -#endif - i386_alldisks->dl_nativedisks[n]. - ni_biosmatches[m++] = i; - } - } - i386_alldisks->dl_nativedisks[n].ni_nmatches = m; - vput(tv); - } - } -} - -#ifdef COMPAT_OLDBOOT -u_long bootdev = 0; /* should be dev_t, but not until 32 bits */ -#endif - -/* - * helper function for "findroot()": - * return nonzero if disk device matches bootinfo - */ -static int -match_harddisk(struct device *dv, struct btinfo_bootdisk *bid) -{ - struct vnode *tmpvn; - int error; - struct disklabel label; - int found = 0; - int bmajor; - - /* - * A disklabel is required here. The - * bootblocks don't refuse to boot from - * a disk without a label, but this is - * normally not wanted. - */ - if (bid->labelsector == -1) - return(0); - - /* - * lookup major number for disk block device - */ - bmajor = devsw_name2blk(dv->dv_xname, NULL, 0); - if (bmajor == -1) - return(0); /* XXX panic() ??? */ - - /* - * Fake a temporary vnode for the disk, open - * it, and read the disklabel for comparison. - */ - if (bdevvp(MAKEDISKDEV(bmajor, dv->dv_unit, bid->partition), &tmpvn)) - panic("findroot can't alloc vnode"); - error = VOP_OPEN(tmpvn, FREAD, NOCRED, 0); - if (error) { -#ifndef DEBUG - /* - * Ignore errors caused by missing - * device, partition or medium. - */ - if (error != ENXIO && error != ENODEV) -#endif - printf("findroot: can't open dev %s%c (%d)\n", - dv->dv_xname, 'a' + bid->partition, error); - vput(tmpvn); - return(0); - } - error = VOP_IOCTL(tmpvn, DIOCGDINFO, &label, FREAD, NOCRED, 0); - if (error) { - /* - * XXX can't happen - open() would - * have errored out (or faked up one) - */ - printf("can't get label for dev %s%c (%d)\n", - dv->dv_xname, 'a' + bid->partition, error); - goto closeout; - } - - /* compare with our data */ - if (label.d_type == bid->label.type && - label.d_checksum == bid->label.checksum && - !strncmp(label.d_packname, bid->label.packname, 16)) - found = 1; - -closeout: - VOP_CLOSE(tmpvn, FREAD, NOCRED, 0); - vput(tmpvn); - return(found); -} - -/* - * Attempt to find the device from which we were booted. - * If we can do so, and not instructed not to do so, - * change rootdev to correspond to the load device. - */ -void -findroot(void) -{ - struct btinfo_bootdisk *bid; - struct device *dv; - union xen_cmdline_parseinfo xcp; -#ifdef COMPAT_OLDBOOT - int i, majdev, unit, part; - char buf[32]; -#endif - - if (booted_device) - return; - - if (lookup_bootinfo(BTINFO_NETIF)) { - /* - * We got netboot interface information, but - * "device_register()" couldn't match it to a configured - * device. Bootdisk information cannot be present at the - * same time, so give up. - */ - printf("findroot: netboot interface not found\n"); - return; - } - - bid = lookup_bootinfo(BTINFO_BOOTDISK); - if (bid) { - /* - * Scan all disk devices for ones that match the passed data. - * Don't break if one is found, to get possible multiple - * matches - for problem tracking. Use the first match anyway - * because lower device numbers are more likely to be the - * boot device. - */ - for (dv = alldevs.tqh_first; dv != NULL; - dv = dv->dv_list.tqe_next) { - if (dv->dv_class != DV_DISK) - continue; - - if (!strcmp(dv->dv_cfdata->cf_name, "fd")) { - /* - * Assume the configured unit number matches - * the BIOS device number. (This is the old - * behaviour.) Needs some ideas how to handle - * BIOS's "swap floppy drive" options. - */ - if ((bid->biosdev & 0x80) || - dv->dv_unit != bid->biosdev) - continue; - - goto found; - } - - if (is_valid_disk(dv)) { - /* - * Don't trust BIOS device numbers, try - * to match the information passed by the - * bootloader instead. - */ - if ((bid->biosdev & 0x80) == 0 || - !match_harddisk(dv, bid)) - continue; - - goto found; - } - - /* no "fd", "wd", "sd", "ld", "ed" */ - continue; - -found: - if (booted_device) { - printf("warning: double match for boot " - "device (%s, %s)\n", - booted_device->dv_xname, dv->dv_xname); - continue; - } - booted_device = dv; - booted_partition = bid->partition; - } - - if (booted_device) - return; - } - - xen_parse_cmdline(XEN_PARSE_BOOTDEV, &xcp); - - for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) { - if (is_valid_disk(dv) == 0) - continue; - - if (xcp.xcp_bootdev[0] == 0) { - booted_device = dv; - break; - } - - if (strncmp(xcp.xcp_bootdev, dv->dv_xname, - strlen(dv->dv_xname))) - continue; - - if (strlen(xcp.xcp_bootdev) > strlen(dv->dv_xname)) { - booted_partition = toupper( - xcp.xcp_bootdev[strlen(dv->dv_xname)]) - 'A'; - } - - booted_device = dv; - break; - } - - if (booted_device) - return; - -#ifdef COMPAT_OLDBOOT -#if 0 - printf("howto %x bootdev %x ", boothowto, bootdev); -#endif - - if ((bootdev & B_MAGICMASK) != (u_long)B_DEVMAGIC) - return; - - majdev = (bootdev >> B_TYPESHIFT) & B_TYPEMASK; - name = devsw_blk2name(majdev); - if (name == NULL) - return; - - part = (bootdev >> B_PARTITIONSHIFT) & B_PARTITIONMASK; - unit = (bootdev >> B_UNITSHIFT) & B_UNITMASK; - - sprintf(buf, "%s%d", name, unit); - for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) { - if (strcmp(buf, dv->dv_xname) == 0) { - booted_device = dv; - booted_partition = part; - return; - } - } -#endif -} - -#include "pci.h" - -#include <dev/isa/isavar.h> -#if NPCI > 0 -#include <dev/pci/pcivar.h> -#endif - -void -device_register(struct device *dev, void *aux) -{ - /* - * Handle network interfaces here, the attachment information is - * not available driver independantly later. - * For disks, there is nothing useful available at attach time. - */ -#if NXENNET > 0 - if (dev->dv_class == DV_IFNET) { - union xen_cmdline_parseinfo xcp; - - xen_parse_cmdline(XEN_PARSE_BOOTDEV, &xcp); - if (strncmp(xcp.xcp_bootdev, dev->dv_xname, 16) == 0) { -#ifdef NFS_BOOT_BOOTSTATIC - nfs_bootstatic_callback = xennet_bootstatic_callback; -#endif - goto found; - } - } -#endif - if (dev->dv_class == DV_IFNET) { - struct btinfo_netif *bin = lookup_bootinfo(BTINFO_NETIF); - if (bin == NULL) - return; - - /* - * We don't check the driver name against the device name - * passed by the boot ROM. The ROM should stay usable - * if the driver gets obsoleted. - * The physical attachment information (checked below) - * must be sufficient to identify the device. - */ - - if (bin->bus == BI_BUS_ISA && - !strcmp(dev->dv_parent->dv_cfdata->cf_name, "isa")) { - struct isa_attach_args *iaa = aux; - - /* compare IO base address */ - /* XXXJRT what about multiple I/O addrs? */ - if (iaa->ia_nio > 0 && - bin->addr.iobase == iaa->ia_io[0].ir_addr) - goto found; - } -#if NPCI > 0 - if (bin->bus == BI_BUS_PCI && - !strcmp(dev->dv_parent->dv_cfdata->cf_name, "pci")) { - struct pci_attach_args *paa = aux; - int b, d, f; - - /* - * Calculate BIOS representation of: - * - * <bus,device,function> - * - * and compare. - */ - pci_decompose_tag(paa->pa_pc, paa->pa_tag, &b, &d, &f); - if (bin->addr.tag == ((b << 8) | (d << 3) | f)) - goto found; - } -#endif - } - return; - -found: - if (booted_device) { - /* XXX should be a "panic()" */ - printf("warning: double match for boot device (%s, %s)\n", - booted_device->dv_xname, dev->dv_xname); - return; - } - booted_device = dev; -} - -static int -is_valid_disk(struct device *dv) -{ - const char *name; - - if (dv->dv_class != DV_DISK) - return (0); - - name = dv->dv_cfdata->cf_name; - - return (strcmp(name, "sd") == 0 || strcmp(name, "wd") == 0 || - strcmp(name, "ld") == 0 || strcmp(name, "ed") == 0 || - strcmp(name, "xbd") == 0); -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/i386/gdt.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/gdt.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,408 +0,0 @@ -/* $NetBSD: gdt.c,v 1.1 2004/03/11 21:44:08 cl Exp $ */ -/* NetBSD: gdt.c,v 1.32 2004/02/13 11:36:13 wiz Exp */ - -/*- - * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by John T. Kohl and Charles M. Hannum. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.1 2004/03/11 21:44:08 cl Exp $"); - -#include "opt_multiprocessor.h" -#include "opt_xen.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/lock.h> -#include <sys/user.h> - -#include <uvm/uvm.h> - -#include <machine/gdt.h> - -int gdt_size[2]; /* total number of GDT entries */ -int gdt_count[2]; /* number of GDT entries in use */ -int gdt_next[2]; /* next available slot for sweeping */ -int gdt_free[2]; /* next free slot; terminated with GNULL_SEL */ - -struct lock gdt_lock_store; - -static __inline void gdt_lock(void); -static __inline void gdt_unlock(void); -void gdt_init(void); -void gdt_grow(int); -int gdt_get_slot(void); -int gdt_get_slot1(int); -void gdt_put_slot(int); -void gdt_put_slot1(int, int); - -/* - * Lock and unlock the GDT, to avoid races in case gdt_{ge,pu}t_slot() sleep - * waiting for memory. - * - * Note that the locking done here is not sufficient for multiprocessor - * systems. A freshly allocated slot will still be of type SDT_SYSNULL for - * some time after the GDT is unlocked, so gdt_compact() could attempt to - * reclaim it. - */ -static __inline void -gdt_lock() -{ - - (void) lockmgr(&gdt_lock_store, LK_EXCLUSIVE, NULL); -} - -static __inline void -gdt_unlock() -{ - - (void) lockmgr(&gdt_lock_store, LK_RELEASE, NULL); -} - -void -setgdt(int sel, void *base, size_t limit, - int type, int dpl, int def32, int gran) -{ - struct segment_descriptor sd; - CPU_INFO_ITERATOR cii; - struct cpu_info *ci; - - if (type == SDT_SYS386TSS) { - /* printk("XXX TSS descriptor not supported in GDT\n"); */ - return; - } - - setsegment(&sd, base, limit, type, dpl, def32, gran); - for (CPU_INFO_FOREACH(cii, ci)) { - if (ci->ci_gdt != NULL) { -#ifndef XEN - ci->ci_gdt[sel].sd = sd; -#else - xen_update_descriptor(&ci->ci_gdt[sel], - (union descriptor *)&sd); -#endif - } - } -} - -/* - * Initialize the GDT subsystem. Called from autoconf(). - */ -void -gdt_init() -{ - size_t max_len, min_len; - union descriptor *old_gdt; - struct vm_page *pg; - vaddr_t va; - struct cpu_info *ci = &cpu_info_primary; - - lockinit(&gdt_lock_store, PZERO, "gdtlck", 0, 0); - - max_len = MAXGDTSIZ * sizeof(gdt[0]); - min_len = MINGDTSIZ * sizeof(gdt[0]); - - gdt_size[0] = MINGDTSIZ; - gdt_count[0] = NGDT; - gdt_next[0] = NGDT; - gdt_free[0] = GNULL_SEL; - - gdt_size[1] = 0; - gdt_count[1] = MAXGDTSIZ; - gdt_next[1] = MAXGDTSIZ; - gdt_free[1] = GNULL_SEL; - - old_gdt = gdt; - gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len + max_len); - for (va = (vaddr_t)gdt; va < (vaddr_t)gdt + min_len; va += PAGE_SIZE) { - pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); - if (pg == NULL) { - panic("gdt_init: no pages"); - } - pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ | VM_PROT_WRITE); - } - memcpy(gdt, old_gdt, NGDT * sizeof(gdt[0])); - ci->ci_gdt = gdt; - setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1, - SDT_MEMRWA, SEL_KPL, 1, 1); - - gdt_init_cpu(ci); -} - -/* - * Allocate shadow GDT for a slave CPU. - */ -void -gdt_alloc_cpu(struct cpu_info *ci) -{ - int max_len = MAXGDTSIZ * sizeof(gdt[0]); - int min_len = MINGDTSIZ * sizeof(gdt[0]); - struct vm_page *pg; - vaddr_t va; - - ci->ci_gdt = (union descriptor *)uvm_km_valloc(kernel_map, max_len); - for (va = (vaddr_t)ci->ci_gdt; va < (vaddr_t)ci->ci_gdt + min_len; - va += PAGE_SIZE) { - while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) - == NULL) { - uvm_wait("gdt_alloc_cpu"); - } - pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ | VM_PROT_WRITE); - } - memset(ci->ci_gdt, 0, min_len); - memcpy(ci->ci_gdt, gdt, gdt_count[0] * sizeof(gdt[0])); - setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1, - SDT_MEMRWA, SEL_KPL, 1, 1); -} - - -/* - * Load appropriate gdt descriptor; we better be running on *ci - * (for the most part, this is how a CPU knows who it is). - */ -void -gdt_init_cpu(struct cpu_info *ci) -{ -#ifndef XEN - struct region_descriptor region; - size_t max_len; - - max_len = MAXGDTSIZ * sizeof(gdt[0]); - setregion(®ion, ci->ci_gdt, max_len - 1); - lgdt(®ion); -#else - size_t len = gdt_size[0] * sizeof(gdt[0]); - unsigned long frames[len >> PAGE_SHIFT]; - vaddr_t va; - pt_entry_t *ptp; - pt_entry_t *maptp; - int f; - - for (va = (vaddr_t)ci->ci_gdt, f = 0; - va < (vaddr_t)ci->ci_gdt + len; - va += PAGE_SIZE, f++) { - KASSERT(va >= VM_MIN_KERNEL_ADDRESS); - ptp = kvtopte(va); - frames[f] = *ptp >> PAGE_SHIFT; - maptp = (pt_entry_t *)vtomach((vaddr_t)ptp); - PTE_CLEARBITS(ptp, maptp, PG_RW); - } - PTE_UPDATES_FLUSH(); - /* printk("loading gdt %x, %d entries, %d pages", */ - /* frames[0] << PAGE_SHIFT, gdt_size[0], len >> PAGE_SHIFT); */ - if (HYPERVISOR_set_gdt(frames, gdt_size[0])) - panic("HYPERVISOR_set_gdt failed!\n"); - lgdt_finish(); -#endif -} - -#ifdef MULTIPROCESSOR - -void -gdt_reload_cpu(struct cpu_info *ci) -{ - struct region_descriptor region; - size_t max_len; - - max_len = MAXGDTSIZ * sizeof(gdt[0]); - setregion(®ion, ci->ci_gdt, max_len - 1); - lgdt(®ion); -} -#endif - - -/* - * Grow the GDT. - */ -void -gdt_grow(int which) -{ - size_t old_len, new_len, max_len; - CPU_INFO_ITERATOR cii; - struct cpu_info *ci; - struct vm_page *pg; - vaddr_t va; - - old_len = gdt_size[which] * sizeof(gdt[0]); - gdt_size[which] <<= 1; - new_len = old_len << 1; - - if (which != 0) { - max_len = MAXGDTSIZ * sizeof(gdt[0]); - if (old_len == 0) { - gdt_size[which] = MINGDTSIZ; - new_len = gdt_size[which] * sizeof(gdt[0]); - } - for (va = (vaddr_t)(cpu_info_primary.ci_gdt) + old_len + max_len; - va < (vaddr_t)(cpu_info_primary.ci_gdt) + new_len + max_len; - va += PAGE_SIZE) { - while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) == - NULL) { - uvm_wait("gdt_grow"); - } - pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ | VM_PROT_WRITE); - } - return; - } - - for (CPU_INFO_FOREACH(cii, ci)) { - for (va = (vaddr_t)(ci->ci_gdt) + old_len; - va < (vaddr_t)(ci->ci_gdt) + new_len; - va += PAGE_SIZE) { - while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) == - NULL) { - uvm_wait("gdt_grow"); - } - pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ | VM_PROT_WRITE); - } - } -} - -/* - * Allocate a GDT slot as follows: - * 1) If there are entries on the free list, use those. - * 2) If there are fewer than gdt_size entries in use, there are free slots - * near the end that we can sweep through. - * 3) As a last resort, we increase the size of the GDT, and sweep through - * the new slots. - */ -int -gdt_get_slot() -{ - return gdt_get_slot1(0); -} - -int -gdt_get_slot1(int which) -{ - size_t offset; - int slot; - - gdt_lock(); - - if (gdt_free[which] != GNULL_SEL) { - slot = gdt_free[which]; - gdt_free[which] = gdt[slot].gd.gd_selector; - } else { - offset = which * MAXGDTSIZ * sizeof(gdt[0]); - if (gdt_next[which] != gdt_count[which] + offset) - panic("gdt_get_slot botch 1"); - if (gdt_next[which] - offset >= gdt_size[which]) { - if (gdt_size[which] >= MAXGDTSIZ) - panic("gdt_get_slot botch 2"); - gdt_grow(which); - } - slot = gdt_next[which]++; - } - - gdt_count[which]++; - gdt_unlock(); - return (slot); -} - -/* - * Deallocate a GDT slot, putting it on the free list. - */ -void -gdt_put_slot(int slot) -{ - gdt_put_slot1(slot, 0); -} - -void -gdt_put_slot1(int slot, int which) -{ - - gdt_lock(); - gdt_count[which]--; - - gdt[slot].gd.gd_type = SDT_SYSNULL; - gdt[slot].gd.gd_selector = gdt_free[which]; - gdt_free[which] = slot; - - gdt_unlock(); -} - -int -tss_alloc(struct pcb *pcb) -{ - int slot; - - slot = gdt_get_slot(); - setgdt(slot, &pcb->pcb_tss, sizeof(struct pcb) - 1, - SDT_SYS386TSS, SEL_KPL, 0, 0); - return GSEL(slot, SEL_KPL); -} - -void -tss_free(int sel) -{ - - gdt_put_slot(IDXSEL(sel)); -} - -/* - * Caller must have pmap locked for both of these functions. - */ -void -ldt_alloc(struct pmap *pmap, union descriptor *ldt, size_t len) -{ - int slot; - - slot = gdt_get_slot1(1); -#ifndef XEN - setgdt(slot, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0, 0); -#else - cpu_info_primary.ci_gdt[slot].ld.ld_base = (uint32_t)ldt; - cpu_info_primary.ci_gdt[slot].ld.ld_entries = - len / sizeof(union descriptor); -#endif - pmap->pm_ldt_sel = GSEL(slot, SEL_KPL); -} - -void -ldt_free(struct pmap *pmap) -{ - int slot; - - slot = IDXSEL(pmap->pm_ldt_sel); - - gdt_put_slot1(slot, 1); -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/i386/hypervisor_machdep.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/hypervisor_machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,229 +0,0 @@ -/* $NetBSD: hypervisor_machdep.c,v 1.2.2.2 2004/06/17 09:23:13 tron Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/****************************************************************************** - * hypervisor.c - * - * Communication to/from hypervisor. - * - * Copyright (c) 2002-2004, K A Fraser - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.2.2.2 2004/06/17 09:23:13 tron Exp $"); - -#include <sys/cdefs.h> -#include <sys/param.h> -#include <sys/systm.h> - -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/evtchn.h> - -/* - * Force a proper event-channel callback from Xen after clearing the - * callback mask. We do this in a very simple manner, by making a call - * down into Xen. The pending flag will be checked by Xen on return. - */ -void -hypervisor_force_callback(void) -{ - - (void)HYPERVISOR_xen_version(0); -} - -int stipending(void); -int -stipending() -{ - uint32_t l1; - unsigned long l2; - unsigned int l1i, l2i, port; - int irq; - shared_info_t *s = HYPERVISOR_shared_info; - struct cpu_info *ci; - int ret; - - ret = 0; - ci = curcpu(); - -#if 0 - if (HYPERVISOR_shared_info->events) - printf("stipending events %08lx mask %08lx ilevel %d\n", - HYPERVISOR_shared_info->events, - HYPERVISOR_shared_info->events_mask, ci->ci_ilevel); -#endif - - /* - * we're only called after STIC, so we know that we'll have to - * STI at the end - */ - cli(); - while (s->vcpu_data[0].evtchn_upcall_pending) { - s->vcpu_data[0].evtchn_upcall_pending = 0; - /* NB. No need for a barrier here -- XCHG is a barrier - * on x86. */ - l1 = x86_atomic_xchg(&s->evtchn_pending_sel, 0); - while ((l1i = ffs(l1)) != 0) { - l1i--; - l1 &= ~(1 << l1i); - - l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i]; - while ((l2i = ffs(l2)) != 0) { - l2i--; - l2 &= ~(1 << l2i); - - port = (l1i << 5) + l2i; - if ((irq = evtchn_to_irq[port]) != -1) { - hypervisor_acknowledge_irq(irq); - ci->ci_ipending |= (1 << irq); - if (ret == 0 && ci->ci_ilevel < - ci->ci_isources[irq]->is_maxlevel) - ret = 1; - } -#if 0 /* XXXcl dev/evtchn */ - else - evtchn_device_upcall(port); -#endif - } - } - } - sti(); - -#if 0 - if (ci->ci_ipending & 0x1) - printf("stipending events %08lx mask %08lx ilevel %d ipending %08x\n", - HYPERVISOR_shared_info->events, - HYPERVISOR_shared_info->events_mask, ci->ci_ilevel, - ci->ci_ipending); -#endif - - return (ret); -} - -void do_hypervisor_callback(struct intrframe *regs) -{ - uint32_t l1; - unsigned long l2; - unsigned int l1i, l2i, port; - int irq; - shared_info_t *s = HYPERVISOR_shared_info; - struct cpu_info *ci; - int level; - - ci = curcpu(); - level = ci->ci_ilevel; - - while (s->vcpu_data[0].evtchn_upcall_pending) { - s->vcpu_data[0].evtchn_upcall_pending = 0; - /* NB. No need for a barrier here -- XCHG is a barrier - * on x86. */ - l1 = x86_atomic_xchg(&s->evtchn_pending_sel, 0); - while ((l1i = ffs(l1)) != 0) { - l1i--; - l1 &= ~(1 << l1i); - - l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i]; - while ((l2i = ffs(l2)) != 0) { - l2i--; - l2 &= ~(1 << l2i); - - port = (l1i << 5) + l2i; - if ((irq = evtchn_to_irq[port]) != -1) - do_event(irq, regs); -#if 0 /* XXXcl dev/evtchn */ - else - evtchn_device_upcall(port); -#endif - } - } - } - -#ifdef DIAGNOSTIC - if (level != ci->ci_ilevel) - printf("hypervisor done %08x level %d/%d ipending %08x\n", - HYPERVISOR_shared_info->evtchn_pending_sel, level, - ci->ci_ilevel, ci->ci_ipending); -#endif -} - -void hypervisor_unmask_event(unsigned int ev) -{ - shared_info_t *s = HYPERVISOR_shared_info; - - x86_atomic_clear_bit(&s->evtchn_mask[0], ev); - /* - * The following is basically the equivalent of - * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the - * interrupt edge' if the channel is masked. - */ - if (x86_atomic_test_bit(&s->evtchn_pending[0], ev) && - !x86_atomic_test_and_set_bit(&s->evtchn_pending_sel, ev>>5)) { - s->vcpu_data[0].evtchn_upcall_pending = 1; - if (!s->vcpu_data[0].evtchn_upcall_mask) - hypervisor_force_callback(); - } -} - -void hypervisor_mask_event(unsigned int ev) -{ - shared_info_t *s = HYPERVISOR_shared_info; - - x86_atomic_set_bit(&s->evtchn_mask[0], ev); -} - -void hypervisor_clear_event(unsigned int ev) -{ - shared_info_t *s = HYPERVISOR_shared_info; - - x86_atomic_clear_bit(&s->evtchn_pending[0], ev); -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1998 +0,0 @@ -/* $NetBSD: locore.S,v 1.2.2.1 2004/05/22 15:59:48 he Exp $ */ -/* NetBSD: locore.S,v 1.26 2004/04/12 13:17:46 yamt Exp */ - -/*- - * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Charles M. Hannum. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)locore.s 7.3 (Berkeley) 5/13/91 - */ - -#include "opt_compat_netbsd.h" -#include "opt_compat_oldboot.h" -#include "opt_cputype.h" -#include "opt_ddb.h" -#include "opt_ipkdb.h" -#include "opt_lockdebug.h" -#include "opt_multiprocessor.h" -#include "opt_realmem.h" -#include "opt_user_ldt.h" -#include "opt_vm86.h" -#include "opt_xen.h" - -#include "npx.h" -#include "assym.h" -#include "apm.h" -#include "lapic.h" -#include "ioapic.h" -#include "ksyms.h" - -#include <sys/errno.h> -#include <sys/syscall.h> - -#include <machine/cputypes.h> -#include <machine/param.h> -#include <machine/pte.h> -#include <machine/segments.h> -#include <machine/specialreg.h> -#include <machine/trap.h> -#include <machine/bootinfo.h> - -#if NLAPIC > 0 -#include <machine/i82489reg.h> -#endif - -/* LINTSTUB: include <sys/types.h> */ -/* LINTSTUB: include <machine/cpu.h> */ -/* LINTSTUB: include <sys/systm.h> */ - -#include <machine/asm.h> - -#if defined(MULTIPROCESSOR) - -#define SET_CURLWP(lwp,cpu) \ - movl CPUVAR(SELF),cpu ; \ - movl lwp,CPUVAR(CURLWP) ; \ - movl cpu,L_CPU(lwp) - -#else - -#define SET_CURLWP(lwp,tcpu) movl lwp,CPUVAR(CURLWP) -#define GET_CURLWP(reg) movl CPUVAR(CURLWP),reg - -#endif - -#define GET_CURPCB(reg) movl CPUVAR(CURPCB),reg -#define SET_CURPCB(reg) movl reg,CPUVAR(CURPCB) - -#define CLEAR_RESCHED(reg) movl reg,CPUVAR(RESCHED) - -/* XXX temporary kluge; these should not be here */ -/* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */ -#include <dev/isa/isareg.h> - - -/* Disallow old names for REALBASEMEM */ -#ifdef BIOSBASEMEM -#error BIOSBASEMEM option deprecated; use REALBASEMEM only if memory size reported by latest boot block is incorrect -#endif - -/* Disallow old names for REALEXTMEM */ -#ifdef EXTMEM_SIZE -#error EXTMEM_SIZE option deprecated; use REALEXTMEM only if memory size reported by latest boot block is incorrect -#endif -#ifdef BIOSEXTMEM -#error BIOSEXTMEM option deprecated; use REALEXTMEM only if memory size reported by latest boot block is incorrect -#endif - -#include <machine/frameasm.h> - - -#ifdef MULTIPROCESSOR -#include <machine/i82489reg.h> -#endif - -/* - * PTmap is recursive pagemap at top of virtual address space. - * Within PTmap, the page directory can be found (third indirection). - * - * XXX 4 == sizeof pde - */ - .set _C_LABEL(PTmap),(PDSLOT_PTE << PDSHIFT) - .set _C_LABEL(PTD),(_C_LABEL(PTmap) + PDSLOT_PTE * PAGE_SIZE) - .set _C_LABEL(PTDpde),(_C_LABEL(PTD) + PDSLOT_PTE * 4) - -/* - * APTmap, APTD is the alternate recursive pagemap. - * It's used when modifying another process's page tables. - * - * XXX 4 == sizeof pde - */ - .set _C_LABEL(APTmap),(PDSLOT_APTE << PDSHIFT) - .set _C_LABEL(APTD),(_C_LABEL(APTmap) + PDSLOT_APTE * PAGE_SIZE) - .set _C_LABEL(APTDpde),(_C_LABEL(PTD) + PDSLOT_APTE * 4) - - -/* - * Xen guest identifier and loader selection - */ -.section __xen_guest - .ascii "GUEST_OS=netbsd,GUEST_VER=2.0,XEN_VER=3.0" - .ascii ",LOADER=generic" -#if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE) - .ascii ",BSD_SYMTAB" -#endif - .byte 0 - - -/* - * Initialization - */ - .data - - .globl _C_LABEL(cpu) - .globl _C_LABEL(boothowto) - .globl _C_LABEL(bootinfo),_C_LABEL(atdevbase) -#ifdef COMPAT_OLDBOOT - .globl _C_LABEL(bootdev) -#endif - .globl _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr) - .globl _C_LABEL(biosbasemem),_C_LABEL(biosextmem) - .globl _C_LABEL(gdt) -#ifdef I586_CPU - .globl _C_LABEL(idt) -#endif - .globl _C_LABEL(lapic_tpr) - -#if NLAPIC > 0 -#ifdef __ELF__ - .align PAGE_SIZE -#else - .align 12 -#endif - .globl _C_LABEL(local_apic), _C_LABEL(lapic_id) -_C_LABEL(local_apic): - .space LAPIC_ID -_C_LABEL(lapic_id): - .long 0x00000000 - .space LAPIC_TPRI-(LAPIC_ID+4) -_C_LABEL(lapic_tpr): - .space LAPIC_PPRI-LAPIC_TPRI -_C_LABEL(lapic_ppr): - .space LAPIC_ISR-LAPIC_PPRI -_C_LABEL(lapic_isr): - .space PAGE_SIZE-LAPIC_ISR -#else -_C_LABEL(lapic_tpr): - .long 0 -#endif - - -_C_LABEL(cpu): .long 0 # are we 386, 386sx, or 486, - # or Pentium, or.. -_C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual -_C_LABEL(proc0paddr): .long 0 -_C_LABEL(PTDpaddr): .long 0 # paddr of PTD, for libkvm -#ifndef REALBASEMEM -_C_LABEL(biosbasemem): .long 0 # base memory reported by BIOS -#else -_C_LABEL(biosbasemem): .long REALBASEMEM -#endif -#ifndef REALEXTMEM -_C_LABEL(biosextmem): .long 0 # extended memory reported by BIOS -#else -_C_LABEL(biosextmem): .long REALEXTMEM -#endif - -#include <machine/xen.h> -#define __HYPERVISOR_yield 8 -#define __SCHEDOP_yield 0 - - .space 512 -tmpstk: - .long tmpstk, __KERNEL_DS - - -#define _RELOC(x) ((x)) -#define RELOC(x) _RELOC(_C_LABEL(x)) - - .text - .globl _C_LABEL(kernel_text) - .set _C_LABEL(kernel_text),KERNTEXTOFF - - .globl start -start: - cld - - lss tmpstk,%esp # bootstrap stack end location - - movl %esi,%ebx # save start_info pointer - - /* Clear BSS first so that there are no surprises... */ - xorl %eax,%eax - movl $RELOC(__bss_start),%edi - movl $RELOC(_end),%ecx - subl %edi,%ecx - rep stosb - - movl %ebx,RELOC(avail_start) - - /* Copy the necessary stuff from start_info structure. */ - /* We need to copy shared_info early, so that sti/cli work */ - movl %ebx,%esi - movl $RELOC(start_info_union),%edi - movl $128,%ecx - rep movsl - - /* (howto, [bootdev], bootinfo, basemem, extmem). */ - xorl %eax,%eax - movl %eax,RELOC(boothowto) -#ifdef COMPAT_OLDBOOT - movl %eax,RELOC(bootdev) -#endif - movl $0x20000,%eax - movl %eax,RELOC(boothowto) - - /* First, reset the PSL. */ - pushl $PSL_MBO - popfl - - /* Clear segment registers; always null in proc0. */ - xorl %eax,%eax - movw %ax,%fs - movw %ax,%gs - decl %eax - movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL - - xorl %eax,%eax - cpuid - movl %eax,RELOC(cpu_info_primary)+CPU_INFO_LEVEL - -/* - * Virtual address space of kernel: - * - * text | data | bss | [syms] | page dir | proc0 kstack - * 0 1 2 3 - */ -#define PROC0PDIR ((0) * PAGE_SIZE) -#define PROC0STACK ((1) * PAGE_SIZE) -#define SYSMAP ((1+UPAGES) * PAGE_SIZE) -#define TABLESIZE ((1+UPAGES) * PAGE_SIZE) /* + nkpde * PAGE_SIZE */ - - /* Find end of kernel image. */ - movl RELOC(avail_start),%edi - /* Calculate where to start the bootstrap tables. */ - movl %edi,%esi - - /* - * Calculate the size of the kernel page table directory, and - * how many entries it will have. - */ - movl RELOC(nkpde),%ecx # get nkpde - cmpl $NKPTP_MIN,%ecx # larger than min? - jge 1f - movl $NKPTP_MIN,%ecx # set at min - jmp 2f -1: cmpl $NKPTP_MAX,%ecx # larger than max? - jle 2f - movl $NKPTP_MAX,%ecx -2: - - /* Clear memory for bootstrap tables. */ - shll $PGSHIFT,%ecx - addl $TABLESIZE,%ecx - addl %esi,%ecx # end of tables - movl %ecx,RELOC(gdt) - addl $PAGE_SIZE,%ecx - movl %ecx,RELOC(avail_start) - subl %edi,%ecx # size of tables - shrl $2,%ecx - xorl %eax,%eax - cld - rep - stosl - -/* - * fillkpt - * eax = pte (page frame | control | status) - * ebx = page table address - * ecx = number of pages to map - */ -#define fillkpt \ -1: movl %eax,(%ebx) ; \ - addl $PAGE_SIZE,%eax ; /* increment physical address */ \ - addl $4,%ebx ; /* next pte */ \ - loop 1b ; - -/* - * Build initial page tables. - */ - /* Calculate end of text segment, rounded to a page. */ - leal (RELOC(etext)+PGOFSET),%edx - andl $~PGOFSET,%edx - - /* Skip over the first 1MB. */ - movl $KERNTEXTOFF,%eax - movl %eax,%ecx - subl $KERNBASE_LOCORE,%ecx - shrl $PGSHIFT,%ecx - leal (SYSMAP)(%esi,%ecx,4),%ebx - - /* Map the kernel text read-only. */ - movl %edx,%ecx - subl %eax,%ecx - shrl $PGSHIFT,%ecx - orl $(PG_V|PG_KR),%eax - fillkpt - - /* Map the data, BSS, and bootstrap tables read-write. */ - movl RELOC(avail_start),%ecx - # end of tables - subl %edx,%ecx # subtract end of text - shrl $PGSHIFT,%ecx - leal (PG_V|PG_KW)(%edx),%eax - fillkpt - - movl $0xffffffff,(%ebx) - addl $4,%ebx - -/* - * Construct a page table directory. - */ - /* Map kernel PDEs. */ - movl RELOC(nkpde),%ecx # for this many pde s, - leal (PROC0PDIR+PDSLOT_KERN*4)(%esi),%ebx # kernel pde offset - leal (SYSMAP+PG_V|PG_KW)(%esi),%eax # pte for KPT in proc 0, - fillkpt - - /* Install a PDE recursively mapping page directory as a page table! */ - leal (PROC0PDIR+PG_V/*|PG_KW*/)(%esi),%eax # pte for ptd - movl %eax,(PROC0PDIR+PDSLOT_PTE*4)(%esi) # recursive PD slot - - /* Save phys. addr of PTD, for libkvm. */ - movl %esi,RELOC(PTDpaddr) - - call xpmap_init - - /* cr0 is 0x8005003b */ - - /* Relocate atdevbase. */ - movl _C_LABEL(avail_start),%edx - movl %edx,_C_LABEL(HYPERVISOR_shared_info) - addl $PAGE_SIZE,%edx # shared_inf - movl %edx,_C_LABEL(atdevbase) - - /* Set up bootstrap stack. */ - leal (PROC0STACK)(%esi),%eax - movl %eax,_C_LABEL(proc0paddr) - leal (USPACE-FRAMESIZE)(%eax),%esp - subl $KERNBASE_LOCORE,%esi - movl %esi,PCB_CR3(%eax) # pcb->pcb_cr3 - xorl %ebp,%ebp # mark end of frames - - movl _C_LABEL(atdevbase),%eax - pushl %eax - call _C_LABEL(init386) # wire 386 chip for unix operation - addl $4,%esp - -#ifdef SAFARI_FIFO_HACK - movb $5,%al - movw $0x37b,%dx - outb %al,%dx - movw $0x37f,%dx - inb %dx,%al - movb %al,%cl - - orb $1,%cl - - movb $5,%al - movw $0x37b,%dx - outb %al,%dx - movw $0x37f,%dx - movb %cl,%al - outb %al,%dx -#endif /* SAFARI_FIFO_HACK */ - - call _C_LABEL(main) - -/* - * void proc_trampoline(void); - * This is a trampoline function pushed onto the stack of a newly created - * process in order to do some additional setup. The trampoline is entered by - * cpu_switch()ing to the process, so we abuse the callee-saved registers used - * by cpu_switch() to store the information about the stub to call. - * NOTE: This function does not have a normal calling sequence! - */ -/* LINTSTUB: Func: void proc_trampoline(void) */ -NENTRY(proc_trampoline) -#ifdef MULTIPROCESSOR - call _C_LABEL(proc_trampoline_mp) -#endif - movl $IPL_NONE,CPUVAR(ILEVEL) - pushl %ebx - call *%esi - addl $4,%esp - DO_DEFERRED_SWITCH(%eax) - INTRFASTEXIT - /* NOTREACHED */ - -/*****************************************************************************/ -#ifdef COMPAT_16 -/* - * Signal trampoline; copied to top of user stack. - */ -/* LINTSTUB: Var: char sigcode[1], esigcode[1]; */ -NENTRY(sigcode) - /* - * Handler has returned here as if we called it. The sigcontext - * is on the stack after the 3 args "we" pushed. - */ - leal 12(%esp),%eax # get pointer to sigcontext - movl %eax,4(%esp) # put it in the argument slot - # fake return address already there - movl $SYS_compat_16___sigreturn14,%eax - int $0x80 # enter kernel with args on stack - movl $SYS_exit,%eax - int $0x80 # exit if sigreturn fails - .globl _C_LABEL(esigcode) -_C_LABEL(esigcode): -#endif - -/*****************************************************************************/ - -/* - * The following primitives are used to fill and copy regions of memory. - */ - -/* - * XXX No section 9 man page for fillw. - * fillw seems to be very sparsely used (only in pccons it seems.) - * One wonders if it couldn't be done without. - * -- Perry Metzger, May 7, 2001 - */ -/* - * void fillw(short pattern, void *addr, size_t len); - * Write len copies of pattern at addr. - */ -/* LINTSTUB: Func: void fillw(short pattern, void *addr, size_t len) */ -ENTRY(fillw) - pushl %edi - movl 8(%esp),%eax - movl 12(%esp),%edi - movw %ax,%cx - rorl $16,%eax - movw %cx,%ax - cld - movl 16(%esp),%ecx - shrl %ecx # do longwords - rep - stosl - movl 16(%esp),%ecx - andl $1,%ecx # do remainder - rep - stosw - popl %edi - ret - -/* - * int kcopy(const void *from, void *to, size_t len); - * Copy len bytes, abort on fault. - */ -/* LINTSTUB: Func: int kcopy(const void *from, void *to, size_t len) */ -ENTRY(kcopy) - pushl %esi - pushl %edi - GET_CURPCB(%eax) # load curpcb into eax and set on-fault - pushl PCB_ONFAULT(%eax) - movl $_C_LABEL(kcopy_fault), PCB_ONFAULT(%eax) - - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%ecx - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax # overlapping? - jb 1f - cld # nope, copy forward - shrl $2,%ecx # copy by 32-bit words - rep - movsl - movl 24(%esp),%ecx - andl $3,%ecx # any bytes left? - rep - movsb - - GET_CURPCB(%edx) # XXX save curpcb? - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax - ret - - ALIGN_TEXT -1: addl %ecx,%edi # copy backward - addl %ecx,%esi - std - andl $3,%ecx # any fractional bytes? - decl %edi - decl %esi - rep - movsb - movl 24(%esp),%ecx # copy remainder by 32-bit words - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - cld - - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax - ret - -/*****************************************************************************/ - -/* - * The following primitives are used to copy data in and out of the user's - * address space. - */ - -/* - * Default to the lowest-common-denominator. We will improve it - * later. - */ -#if defined(I386_CPU) -#define DEFAULT_COPYOUT _C_LABEL(i386_copyout) -#define DEFAULT_COPYIN _C_LABEL(i386_copyin) -#elif defined(I486_CPU) -#define DEFAULT_COPYOUT _C_LABEL(i486_copyout) -#define DEFAULT_COPYIN _C_LABEL(i386_copyin) -#elif defined(I586_CPU) -#define DEFAULT_COPYOUT _C_LABEL(i486_copyout) /* XXX */ -#define DEFAULT_COPYIN _C_LABEL(i386_copyin) /* XXX */ -#elif defined(I686_CPU) -#define DEFAULT_COPYOUT _C_LABEL(i486_copyout) /* XXX */ -#define DEFAULT_COPYIN _C_LABEL(i386_copyin) /* XXX */ -#endif - - .data - - .globl _C_LABEL(copyout_func) -_C_LABEL(copyout_func): - .long DEFAULT_COPYOUT - - .globl _C_LABEL(copyin_func) -_C_LABEL(copyin_func): - .long DEFAULT_COPYIN - - .text - -/* - * int copyout(const void *from, void *to, size_t len); - * Copy len bytes into the user's address space. - * see copyout(9) - */ -/* LINTSTUB: Func: int copyout(const void *kaddr, void *uaddr, size_t len) */ -ENTRY(copyout) - DO_DEFERRED_SWITCH(%eax) - jmp *_C_LABEL(copyout_func) - -#if defined(I386_CPU) -/* LINTSTUB: Func: int i386_copyout(const void *kaddr, void *uaddr, size_t len) */ -ENTRY(i386_copyout) - pushl %esi - pushl %edi - pushl $0 - - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%eax - - /* - * We check that the end of the destination buffer is not past the end - * of the user's address space. If it's not, then we only need to - * check that each page is writable. The 486 will do this for us; the - * 386 will not. (We assume that pages in user space that are not - * writable by the user are not writable by the kernel either.) - */ - movl %edi,%edx - addl %eax,%edx - jc _C_LABEL(copy_efault) - cmpl $VM_MAXUSER_ADDRESS,%edx - ja _C_LABEL(copy_efault) - - testl %eax,%eax # anything to do? - jz 3f - - /* - * We have to check each PTE for (write) permission, since the CPU - * doesn't do it for us. - */ - - /* Compute number of pages. */ - movl %edi,%ecx - andl $PGOFSET,%ecx - addl %eax,%ecx - decl %ecx - shrl $PGSHIFT,%ecx - - /* Compute PTE offset for start address. */ - shrl $PGSHIFT,%edi - - GET_CURPCB(%edx) - movl $2f,PCB_ONFAULT(%edx) - -1: /* Check PTE for each page. */ - testb $PG_RW,_C_LABEL(PTmap)(,%edi,4) - jz 2f - -4: incl %edi - decl %ecx - jns 1b - - movl 20(%esp),%edi - movl 24(%esp),%eax - jmp 3f - -2: /* Simulate a trap. */ - pushl %ecx - movl %edi,%eax - shll $PGSHIFT,%eax - pushl %eax - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # pop argument - popl %ecx - testl %eax,%eax # if not ok, return EFAULT - jz 4b - jmp _C_LABEL(copy_efault) - -3: GET_CURPCB(%edx) - movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) - - /* bcopy(%esi, %edi, %eax); */ - cld - movl %eax,%ecx - shrl $2,%ecx - rep - movsl - movl %eax,%ecx - andl $3,%ecx - rep - movsb - - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax - ret -#endif /* I386_CPU */ - -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) -/* LINTSTUB: Func: int i486_copyout(const void *kaddr, void *uaddr, size_t len) */ -ENTRY(i486_copyout) - pushl %esi - pushl %edi - pushl $0 - - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%eax - - /* - * We check that the end of the destination buffer is not past the end - * of the user's address space. - */ - movl %edi,%edx - addl %eax,%edx - jc _C_LABEL(copy_efault) - cmpl $VM_MAXUSER_ADDRESS,%edx - ja _C_LABEL(copy_efault) - - GET_CURPCB(%edx) - movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) - - /* bcopy(%esi, %edi, %eax); */ - cld - movl %eax,%ecx - shrl $2,%ecx - rep - movsl - movl %eax,%ecx - andl $3,%ecx - rep - movsb - - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax - ret -#endif /* I486_CPU || I586_CPU || I686_CPU */ - -/* - * int copyin(const void *from, void *to, size_t len); - * Copy len bytes from the user's address space. - * see copyin(9) - */ -/* LINTSTUB: Func: int copyin(const void *uaddr, void *kaddr, size_t len) */ -ENTRY(copyin) - DO_DEFERRED_SWITCH(%eax) - jmp *_C_LABEL(copyin_func) - -#if defined(I386_CPU) || defined(I486_CPU) || defined(I586_CPU) || \ - defined(I686_CPU) -/* LINTSTUB: Func: int i386_copyin(const void *uaddr, void *kaddr, size_t len) */ -ENTRY(i386_copyin) - pushl %esi - pushl %edi - GET_CURPCB(%eax) - pushl $0 - movl $_C_LABEL(copy_fault),PCB_ONFAULT(%eax) - - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%eax - - /* - * We check that the end of the destination buffer is not past the end - * of the user's address space. If it's not, then we only need to - * check that each page is readable, and the CPU will do that for us. - */ - movl %esi,%edx - addl %eax,%edx - jc _C_LABEL(copy_efault) - cmpl $VM_MAXUSER_ADDRESS,%edx - ja _C_LABEL(copy_efault) - - /* bcopy(%esi, %edi, %eax); */ - cld - movl %eax,%ecx - shrl $2,%ecx - rep - movsl - movl %eax,%ecx - andl $3,%ecx - rep - movsb - - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax - ret -#endif /* I386_CPU || I486_CPU || I586_CPU || I686_CPU */ - -/* LINTSTUB: Ignore */ -NENTRY(copy_efault) - movl $EFAULT,%eax - -/* - * kcopy_fault is used by kcopy and copy_fault is used by copyin/out. - * - * they're distinguished for lazy pmap switching. see trap(). - */ -/* LINTSTUB: Ignore */ -NENTRY(kcopy_fault) - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - ret - -/* LINTSTUB: Ignore */ -NENTRY(copy_fault) - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - ret - -/* - * int copyoutstr(const void *from, void *to, size_t maxlen, size_t *lencopied); - * Copy a NUL-terminated string, at most maxlen characters long, into the - * user's address space. Return the number of characters copied (including the - * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else - * return 0 or EFAULT. - * see copyoutstr(9) - */ -/* LINTSTUB: Func: int copyoutstr(const void *kaddr, void *uaddr, size_t len, size_t *done) */ -ENTRY(copyoutstr) - pushl %esi - pushl %edi - - DO_DEFERRED_SWITCH(%eax) - - movl 12(%esp),%esi # esi = from - movl 16(%esp),%edi # edi = to - movl 20(%esp),%edx # edx = maxlen - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 5f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - /* Compute number of bytes in first page. */ - movl %edi,%eax - andl $PGOFSET,%eax - movl $PAGE_SIZE,%ecx - subl %eax,%ecx # ecx = PAGE_SIZE - (src % PAGE_SIZE) - - GET_CURPCB(%eax) - movl $6f,PCB_ONFAULT(%eax) - -1: /* - * Once per page, check that we are still within the bounds of user - * space, and check for a write fault. - */ - cmpl $VM_MAXUSER_ADDRESS,%edi - jae _C_LABEL(copystr_efault) - - /* Compute PTE offset. */ - movl %edi,%eax - shrl $PGSHIFT,%eax # calculate pte address - - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 2f - -6: /* Simulate a trap. */ - pushl %edx - pushl %edi - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # clear argument from stack - popl %edx - testl %eax,%eax - jnz _C_LABEL(copystr_efault) - -2: /* Copy up to end of this page. */ - subl %ecx,%edx # predecrement total count - jnc 3f - addl %edx,%ecx # ecx += (edx - ecx) = edx - xorl %edx,%edx - -3: decl %ecx - js 4f - lodsb - stosb - testb %al,%al - jnz 3b - - /* Success -- 0 byte reached. */ - addl %ecx,%edx # add back residual for this page - xorl %eax,%eax - jmp copystr_return - -4: /* Go to next page, if any. */ - movl $PAGE_SIZE,%ecx - testl %edx,%edx - jnz 1b - - /* edx is zero -- return ENAMETOOLONG. */ - movl $ENAMETOOLONG,%eax - jmp copystr_return -#endif /* I386_CPU */ - -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) -5: GET_CURPCB(%eax) - movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax) - /* - * Get min(%edx, VM_MAXUSER_ADDRESS-%edi). - */ - movl $VM_MAXUSER_ADDRESS,%eax - subl %edi,%eax - cmpl %edx,%eax - jae 1f - movl %eax,%edx - movl %eax,20(%esp) - -1: incl %edx - cld - -1: decl %edx - jz 2f - lodsb - stosb - testb %al,%al - jnz 1b - - /* Success -- 0 byte reached. */ - decl %edx - xorl %eax,%eax - jmp copystr_return - -2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */ - cmpl $VM_MAXUSER_ADDRESS,%edi - jae _C_LABEL(copystr_efault) - movl $ENAMETOOLONG,%eax - jmp copystr_return -#endif /* I486_CPU || I586_CPU || I686_CPU */ - -/* - * int copyinstr(const void *from, void *to, size_t maxlen, size_t *lencopied); - * Copy a NUL-terminated string, at most maxlen characters long, from the - * user's address space. Return the number of characters copied (including the - * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else - * return 0 or EFAULT. - * see copyinstr(9) - */ -/* LINTSTUB: Func: int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) */ -ENTRY(copyinstr) - pushl %esi - pushl %edi - - DO_DEFERRED_SWITCH(%eax) - - GET_CURPCB(%ecx) - movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx) - - movl 12(%esp),%esi # %esi = from - movl 16(%esp),%edi # %edi = to - movl 20(%esp),%edx # %edx = maxlen - - /* - * Get min(%edx, VM_MAXUSER_ADDRESS-%esi). - */ - movl $VM_MAXUSER_ADDRESS,%eax - subl %esi,%eax - cmpl %edx,%eax - jae 1f - movl %eax,%edx - movl %eax,20(%esp) - -1: incl %edx - cld - -1: decl %edx - jz 2f - lodsb - stosb - testb %al,%al - jnz 1b - - /* Success -- 0 byte reached. */ - decl %edx - xorl %eax,%eax - jmp copystr_return - -2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */ - cmpl $VM_MAXUSER_ADDRESS,%esi - jae _C_LABEL(copystr_efault) - movl $ENAMETOOLONG,%eax - jmp copystr_return - -/* LINTSTUB: Ignore */ -NENTRY(copystr_efault) - movl $EFAULT,%eax - -/* LINTSTUB: Ignore */ -NENTRY(copystr_fault) -copystr_return: - /* Set *lencopied and return %eax. */ - GET_CURPCB(%ecx) - movl $0,PCB_ONFAULT(%ecx) - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx - jz 8f - movl %ecx,(%edx) - -8: popl %edi - popl %esi - ret - -/* - * int copystr(const void *from, void *to, size_t maxlen, size_t *lencopied); - * Copy a NUL-terminated string, at most maxlen characters long. Return the - * number of characters copied (including the NUL) in *lencopied. If the - * string is too long, return ENAMETOOLONG; else return 0. - * see copystr(9) - */ -/* LINTSTUB: Func: int copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *done) */ -ENTRY(copystr) - pushl %esi - pushl %edi - - movl 12(%esp),%esi # esi = from - movl 16(%esp),%edi # edi = to - movl 20(%esp),%edx # edx = maxlen - incl %edx - cld - -1: decl %edx - jz 4f - lodsb - stosb - testb %al,%al - jnz 1b - - /* Success -- 0 byte reached. */ - decl %edx - xorl %eax,%eax - jmp 6f - -4: /* edx is zero -- return ENAMETOOLONG. */ - movl $ENAMETOOLONG,%eax - -6: /* Set *lencopied and return %eax. */ - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx - jz 7f - movl %ecx,(%edx) - -7: popl %edi - popl %esi - ret - -/* - * long fuword(const void *uaddr); - * Fetch an int from the user's address space. - * see fuword(9) - */ -/* LINTSTUB: Func: long fuword(const void *base) */ -ENTRY(fuword) - DO_DEFERRED_SWITCH(%eax) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-4,%edx - ja _C_LABEL(fusuaddrfault) - GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - movl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * int fusword(const void *uaddr); - * Fetch a short from the user's address space. - * see fusword(9) - */ -/* LINTSTUB: Func: int fusword(const void *base) */ -ENTRY(fusword) - DO_DEFERRED_SWITCH(%eax) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-2,%edx - ja _C_LABEL(fusuaddrfault) - GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - movzwl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * int fuswintr(const void *uaddr); - * Fetch a short from the user's address space. Can be called during an - * interrupt. - * see fuswintr(9) - */ -/* LINTSTUB: Func: int fuswintr(const void *base) */ -ENTRY(fuswintr) - cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE) - jnz _C_LABEL(fusuaddrfault) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-2,%edx - ja _C_LABEL(fusuaddrfault) - movl CPUVAR(CURLWP),%ecx - movl L_ADDR(%ecx),%ecx - movl $_C_LABEL(fusubail),PCB_ONFAULT(%ecx) - movzwl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * int fubyte(const void *uaddr); - * Fetch a byte from the user's address space. - * see fubyte(9) - */ -/* LINTSTUB: Func: int fubyte(const void *base) */ -ENTRY(fubyte) - DO_DEFERRED_SWITCH(%eax) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-1,%edx - ja _C_LABEL(fusuaddrfault) - GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - movzbl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) - ret - -/* - * Handle faults from [fs]u*(). Clean up and return -1. - */ -/* LINTSTUB: Ignore */ -NENTRY(fusufault) - movl $0,PCB_ONFAULT(%ecx) - movl $-1,%eax - ret - -/* - * Handle faults from [fs]u*(). Clean up and return -1. This differs from - * fusufault() in that trap() will recognize it and return immediately rather - * than trying to page fault. - */ -/* LINTSTUB: Ignore */ -NENTRY(fusubail) - movl $0,PCB_ONFAULT(%ecx) - movl $-1,%eax - ret - -/* - * Handle earlier faults from [fs]u*(), due to our of range addresses. - */ -/* LINTSTUB: Ignore */ -NENTRY(fusuaddrfault) - movl $-1,%eax - ret - -/* - * int suword(void *uaddr, long x); - * Store an int in the user's address space. - * see suword(9) - */ -/* LINTSTUB: Func: int suword(void *base, long c) */ -ENTRY(suword) - DO_DEFERRED_SWITCH(%eax) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-4,%edx - ja _C_LABEL(fusuaddrfault) - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 2f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - GET_CURPCB(%eax) - movl $3f,PCB_ONFAULT(%eax) - - movl %edx,%eax - shrl $PGSHIFT,%eax # calculate pte address - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 1f - -3: /* Simulate a trap. */ - pushl %edx - pushl %edx - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # clear parameter from the stack - popl %edx - GET_CURPCB(%ecx) - testl %eax,%eax - jnz _C_LABEL(fusufault) - -1: /* XXX also need to check the following 3 bytes for validity! */ -#endif - -2: GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - - movl 8(%esp),%eax - movl %eax,(%edx) - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - ret - -/* - * int susword(void *uaddr, short x); - * Store a short in the user's address space. - * see susword(9) - */ -/* LINTSTUB: Func: int susword(void *base, short c) */ -ENTRY(susword) - DO_DEFERRED_SWITCH(%eax) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-2,%edx - ja _C_LABEL(fusuaddrfault) - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 2f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - GET_CURPCB(%eax) - movl $3f,PCB_ONFAULT(%eax) - - movl %edx,%eax - shrl $PGSHIFT,%eax # calculate pte address - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 1f - -3: /* Simulate a trap. */ - pushl %edx - pushl %edx - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # clear parameter from the stack - popl %edx - GET_CURPCB(%ecx) - testl %eax,%eax - jnz _C_LABEL(fusufault) - -1: /* XXX also need to check the following byte for validity! */ -#endif - -2: GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - - movl 8(%esp),%eax - movw %ax,(%edx) - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - ret - -/* - * int suswintr(void *uaddr, short x); - * Store a short in the user's address space. Can be called during an - * interrupt. - * see suswintr(9) - */ -/* LINTSTUB: Func: int suswintr(void *base, short c) */ -ENTRY(suswintr) - cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE) - jnz _C_LABEL(fusuaddrfault) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-2,%edx - ja _C_LABEL(fusuaddrfault) - movl CPUVAR(CURLWP),%ecx - movl L_ADDR(%ecx),%ecx - movl $_C_LABEL(fusubail),PCB_ONFAULT(%ecx) - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 2f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - movl %edx,%eax - shrl $PGSHIFT,%eax # calculate pte address - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 1f - - /* Simulate a trap. */ - jmp _C_LABEL(fusubail) - -1: /* XXX also need to check the following byte for validity! */ -#endif - -2: movl 8(%esp),%eax - movw %ax,(%edx) - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - ret - -/* - * int subyte(void *uaddr, char x); - * Store a byte in the user's address space. - * see subyte(9) - */ -/* LINTSTUB: Func: int subyte(void *base, int c) */ -ENTRY(subyte) - DO_DEFERRED_SWITCH(%eax) - movl 4(%esp),%edx - cmpl $VM_MAXUSER_ADDRESS-1,%edx - ja _C_LABEL(fusuaddrfault) - -#if defined(I386_CPU) -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_386,_C_LABEL(cpu_class) - jne 2f -#endif /* I486_CPU || I586_CPU || I686_CPU */ - - GET_CURPCB(%eax) - movl $3f,PCB_ONFAULT(%eax) - - movl %edx,%eax - shrl $PGSHIFT,%eax # calculate pte address - testb $PG_RW,_C_LABEL(PTmap)(,%eax,4) - jnz 1f - -3: /* Simulate a trap. */ - pushl %edx - pushl %edx - call _C_LABEL(trapwrite) # trapwrite(addr) - addl $4,%esp # clear parameter from the stack - popl %edx - GET_CURPCB(%ecx) - testl %eax,%eax - jnz _C_LABEL(fusufault) - -1: -#endif - -2: GET_CURPCB(%ecx) - movl $_C_LABEL(fusufault),PCB_ONFAULT(%ecx) - - movb 8(%esp),%al - movb %al,(%edx) - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - ret - -/*****************************************************************************/ - -/* - * The following is i386-specific nonsense. - */ - -/* - * void lgdt_finish(void); - * Finish load a new GDT pointer (do any necessary cleanup). - * XXX It's somewhat questionable whether reloading all the segment registers - * is necessary, since the actual descriptor data is not changed except by - * process creation and exit, both of which clean up via task switches. OTOH, - * this only happens at run time when the GDT is resized. - */ -/* LINTSTUB: Func: void lgdt_finish(void) */ -NENTRY(lgdt_finish) - movl $GSEL(GDATA_SEL, SEL_KPL),%eax - movw %ax,%ds - movw %ax,%es - movw %ax,%gs - movw %ax,%ss - movl $GSEL(GCPU_SEL, SEL_KPL),%eax - movw %ax,%fs - /* Reload code selector by doing intersegment return. */ - popl %eax - pushl $GSEL(GCODE_SEL, SEL_KPL) - pushl %eax - lret - -/*****************************************************************************/ - -/* - * These functions are primarily used by DDB. - */ - -/* LINTSTUB: Func: int setjmp (label_t *l) */ -ENTRY(setjmp) - movl 4(%esp),%eax - movl %ebx,(%eax) # save ebx - movl %esp,4(%eax) # save esp - movl %ebp,8(%eax) # save ebp - movl %esi,12(%eax) # save esi - movl %edi,16(%eax) # save edi - movl (%esp),%edx # get rta - movl %edx,20(%eax) # save eip - xorl %eax,%eax # return (0); - ret - -/* LINTSTUB: Func: void longjmp (label_t *l) */ -ENTRY(longjmp) - movl 4(%esp),%eax - movl (%eax),%ebx # restore ebx - movl 4(%eax),%esp # restore esp - movl 8(%eax),%ebp # restore ebp - movl 12(%eax),%esi # restore esi - movl 16(%eax),%edi # restore edi - movl 20(%eax),%edx # get rta - movl %edx,(%esp) # put in return frame - xorl %eax,%eax # return (1); - incl %eax - ret - -/*****************************************************************************/ - - .globl _C_LABEL(sched_whichqs),_C_LABEL(sched_qs) - .globl _C_LABEL(uvmexp),_C_LABEL(panic) - -#ifdef DIAGNOSTIC -NENTRY(switch_error) - pushl $1f -3: call _C_LABEL(panic) - /* NOTREACHED */ -1: .asciz "cpu_switch" -#endif /* DIAGNOSTIC */ - -/* - * void cpu_switch(struct lwp *) - * Find a runnable process and switch to it. Wait if necessary. If the new - * process is the same as the old one, we short-circuit the context save and - * restore. - * - * Note that the stack frame layout is known to "struct switchframe" - * in <machine/frame.h> and to the code in cpu_fork() which initializes - * it for a new lwp. - */ -ENTRY(cpu_switch) - pushl %ebx - pushl %esi - pushl %edi - -#ifdef DEBUG - cmpl $IPL_SCHED,CPUVAR(ILEVEL) - jae 1f - pushl $2f - call _C_LABEL(panic) - /* NOTREACHED */ -2: .asciz "not splsched() in cpu_switch!" -1: -#endif /* DEBUG */ - - movl 16(%esp),%esi # current - - /* - * Clear curlwp so that we don't accumulate system time while idle. - * This also insures that schedcpu() will move the old lwp to - * the correct queue if it happens to get called from the spllower() - * below and changes the priority. (See corresponding comment in - * userret()). - */ - movl $0,CPUVAR(CURLWP) - /* - * First phase: find new lwp. - * - * Registers: - * %eax - queue head, scratch, then zero - * %ebx - queue number - * %ecx - cached value of whichqs - * %edx - next lwp in queue - * %esi - old lwp - * %edi - new lwp - */ - - /* Look for new lwp. */ - CLI(%ecx) # splhigh doesn't do a cli - movl _C_LABEL(sched_whichqs),%ecx - bsfl %ecx,%ebx # find a full q - jnz switch_dequeue - - /* - * idling: save old context. - * - * Registers: - * %eax, %ecx - scratch - * %esi - old lwp, then old pcb - * %edi - idle pcb - */ - - pushl %esi - call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc) - addl $4,%esp - - movl L_ADDR(%esi),%esi - - /* Save stack pointers. */ - movl %esp,PCB_ESP(%esi) - movl %ebp,PCB_EBP(%esi) - - /* Find idle PCB for this CPU */ -#ifndef MULTIPROCESSOR - movl $_C_LABEL(lwp0),%ebx - movl L_ADDR(%ebx),%edi - movl L_MD_TSS_SEL(%ebx),%edx -#else - movl CPUVAR(IDLE_PCB),%edi - movl CPUVAR(IDLE_TSS_SEL),%edx -#endif - movl $0,CPUVAR(CURLWP) /* In case we fault... */ - - /* Restore the idle context (avoid interrupts) */ - CLI(%ecx) - - /* Restore stack pointers. */ - movl PCB_ESP(%edi),%esp - movl PCB_EBP(%edi),%ebp - - pushl %edi - call _C_LABEL(i386_switch_context) - addl $4,%esp - - /* Record new pcb. */ - SET_CURPCB(%edi) - - xorl %esi,%esi - STI(%eax) -idle_unlock: -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - call _C_LABEL(sched_unlock_idle) -#endif - /* Interrupts are okay again. */ - pushl $IPL_NONE # spl0() - call _C_LABEL(Xspllower) # process pending interrupts - addl $4,%esp - jmp idle_start -idle_zero: - STIC(%eax) - jz 4f - call _C_LABEL(stipending) - testl %eax,%eax - jz 4f - pushl $IPL_NONE - call _C_LABEL(Xspllower) - addl $4,%esp - jmp idle_start -4: - call _C_LABEL(uvm_pageidlezero) - CLI(%eax) - cmpl $0,_C_LABEL(sched_whichqs) - jnz idle_exit -idle_loop: - /* Try to zero some pages. */ - movl _C_LABEL(uvm)+UVM_PAGE_IDLE_ZERO,%ecx - testl %ecx,%ecx - jnz idle_zero - call _C_LABEL(idle_block) - cmpl $0,_C_LABEL(sched_whichqs) - jnz idle_exit - STIC(%eax) - jz 4f - call _C_LABEL(stipending) - testl %eax,%eax - jz 4f - pushl $IPL_NONE - call _C_LABEL(Xspllower) - addl $4,%esp - jmp idle_start -4: - movl $__HYPERVISOR_yield,%eax - movl $__SCHEDOP_yield,%ebx - TRAP_INSTR -NENTRY(mpidle) -idle_start: - CLI(%eax) - cmpl $0,_C_LABEL(sched_whichqs) - jz idle_loop -idle_exit: - movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh - STI(%eax) -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - call _C_LABEL(sched_lock_idle) -#endif - movl _C_LABEL(sched_whichqs),%ecx - bsfl %ecx,%ebx - jz idle_unlock - -#ifdef XENDEBUG_LOW - pushl %ecx - call _C_LABEL(xen_dbg1) - xorl %ecx,%ecx - movl %ecx,_C_LABEL(xen_once) - popl %ecx -#endif -switch_dequeue: - /* - * we're running at splhigh(), but it's otherwise okay to take - * interrupts here. - */ - STI(%edi) - leal _C_LABEL(sched_qs)(,%ebx,8),%eax # select q - - movl L_FORW(%eax),%edi # unlink from front of process q -#ifdef DIAGNOSTIC - cmpl %edi,%eax # linked to self (i.e. nothing queued)? - je _C_LABEL(switch_error) # not possible -#endif /* DIAGNOSTIC */ - movl L_FORW(%edi),%edx - movl %edx,L_FORW(%eax) - movl %eax,L_BACK(%edx) - - cmpl %edx,%eax # q empty? - jne 3f - - btrl %ebx,%ecx # yes, clear to indicate empty - movl %ecx,_C_LABEL(sched_whichqs) # update q status - -3: /* We just did it. */ - xorl %eax,%eax - CLEAR_RESCHED(%eax) - -switch_resume: -#ifdef DIAGNOSTIC - cmpl %eax,L_WCHAN(%edi) # Waiting for something? - jne _C_LABEL(switch_error) # Yes; shouldn't be queued. - cmpb $LSRUN,L_STAT(%edi) # In run state? - jne _C_LABEL(switch_error) # No; shouldn't be queued. -#endif /* DIAGNOSTIC */ - - /* Isolate lwp. XXX Is this necessary? */ - movl %eax,L_BACK(%edi) - - /* Record new lwp. */ - movb $LSONPROC,L_STAT(%edi) # l->l_stat = LSONPROC - SET_CURLWP(%edi,%ecx) - - /* Skip context switch if same lwp. */ - xorl %ebx,%ebx - cmpl %edi,%esi - je switch_return - - /* If old lwp exited, don't bother. */ - testl %esi,%esi - jz switch_exited - - /* - * Second phase: save old context. - * - * Registers: - * %eax, %ecx - scratch - * %esi - old lwp, then old pcb - * %edi - new lwp - */ - - pushl %esi - call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc) - addl $4,%esp - - movl L_ADDR(%esi),%esi - - /* Save stack pointers. */ - movl %esp,PCB_ESP(%esi) - movl %ebp,PCB_EBP(%esi) - -switch_exited: - /* - * Third phase: restore saved context. - * - * Registers: - * %eax, %ebx, %ecx, %edx - scratch - * %esi - new pcb - * %edi - new lwp - */ - - /* No interrupts while loading new state. */ - CLI(%eax) - movl L_ADDR(%edi),%esi - - /* Restore stack pointers. */ - movl PCB_ESP(%esi),%esp - movl PCB_EBP(%esi),%ebp - -#if 0 - /* Don't bother with the rest if switching to a system process. */ - testl $P_SYSTEM,L_FLAG(%edi); XXX NJWLWP lwp's don't have P_SYSTEM! - jnz switch_restored ; XXX skip stack_switch+pmap_activate -#endif - - pushl %edi - call _C_LABEL(pmap_activate) # pmap_activate(p) - addl $4,%esp - - pushl %esi - call _C_LABEL(i386_switch_context) - addl $4,%esp - - /* Record new pcb. */ - SET_CURPCB(%esi) - - /* Interrupts are okay again. */ - STI(%edi) - -/* - * Check for restartable atomic sequences (RAS) - */ - movl CPUVAR(CURLWP),%edi - movl L_PROC(%edi),%esi - cmpl $0,P_RASLIST(%esi) - jne 2f -1: - movl $1,%ebx - -switch_return: -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - call _C_LABEL(sched_unlock_idle) -#endif - pushl $IPL_NONE # spl0() - call _C_LABEL(Xspllower) # process pending interrupts - addl $4,%esp - movl $IPL_HIGH,CPUVAR(ILEVEL) # splhigh() - - movl %ebx,%eax - - popl %edi - popl %esi - popl %ebx - ret - -2: # check RAS list - movl L_MD_REGS(%edi),%ebx - movl TF_EIP(%ebx),%eax - pushl %eax - pushl %esi - call _C_LABEL(ras_lookup) - addl $8,%esp - cmpl $-1,%eax - je 1b - movl %eax,TF_EIP(%ebx) - jmp 1b - -/* - * void cpu_switchto(struct lwp *current, struct lwp *next) - * Switch to the specified next LWP. - */ -ENTRY(cpu_switchto) - pushl %ebx - pushl %esi - pushl %edi - -#ifdef DEBUG - cmpl $IPL_SCHED,CPUVAR(ILEVEL) - jae 1f - pushl $2f - call _C_LABEL(panic) - /* NOTREACHED */ -2: .asciz "not splsched() in cpu_switchto!" -1: -#endif /* DEBUG */ - - movl 16(%esp),%esi # current - movl 20(%esp),%edi # next - - /* - * Clear curlwp so that we don't accumulate system time while idle. - * This also insures that schedcpu() will move the old process to - * the correct queue if it happens to get called from the spllower() - * below and changes the priority. (See corresponding comment in - * usrret()). - * - * XXX Is this necessary? We know we won't go idle. - */ - movl $0,CPUVAR(CURLWP) - - /* - * We're running at splhigh(), but it's otherwise okay to take - * interrupts here. - */ - STI(%eax) - - /* Jump into the middle of cpu_switch */ - xorl %eax,%eax - jmp switch_resume - -/* - * void cpu_exit(struct lwp *l) - * Switch to the appropriate idle context (lwp0's if uniprocessor; the CPU's - * if multiprocessor) and deallocate the address space and kernel stack for p. - * Then jump into cpu_switch(), as if we were in the idle proc all along. - */ -#ifndef MULTIPROCESSOR - .globl _C_LABEL(lwp0) -#endif - .globl _C_LABEL(uvmspace_free),_C_LABEL(kernel_map) - .globl _C_LABEL(uvm_km_free),_C_LABEL(tss_free) -/* LINTSTUB: Func: void cpu_exit(struct lwp *l) */ -ENTRY(cpu_exit) - movl 4(%esp),%edi # old process -#ifndef MULTIPROCESSOR - movl $_C_LABEL(lwp0),%ebx - movl L_ADDR(%ebx),%esi - movl L_MD_TSS_SEL(%ebx),%edx -#else - movl CPUVAR(IDLE_PCB),%esi - movl CPUVAR(IDLE_TSS_SEL),%edx -#endif - /* In case we fault... */ - movl $0,CPUVAR(CURLWP) - - /* Restore the idle context. */ - CLI(%eax) - - /* Restore stack pointers. */ - movl PCB_ESP(%esi),%esp - movl PCB_EBP(%esi),%ebp - - pushl %esi - call _C_LABEL(i386_switch_context) - addl $4,%esp - - /* Record new pcb. */ - SET_CURPCB(%esi) - - /* Interrupts are okay again. */ - STI(%eax) - - /* - * Schedule the dead LWP's stack to be freed. - */ - pushl %edi - call _C_LABEL(lwp_exit2) - addl $4,%esp - - /* Jump into cpu_switch() with the right state. */ - xorl %esi,%esi - movl %esi,CPUVAR(CURLWP) - jmp idle_start - -/* - * void savectx(struct pcb *pcb); - * Update pcb, saving current processor state. - */ -/* LINTSTUB: Func: void savectx(struct pcb *pcb) */ -ENTRY(savectx) - movl 4(%esp),%edx # edx = p->p_addr - - /* Save stack pointers. */ - movl %esp,PCB_ESP(%edx) - movl %ebp,PCB_EBP(%edx) - - ret - -/* - * Old call gate entry for syscall - */ -/* LINTSTUB: Var: char Xosyscall[1]; */ -IDTVEC(osyscall) - /* Set eflags in trap frame. */ - pushfl - popl 8(%esp) - pushl $7 # size of instruction for restart - jmp syscall1 - -/* - * Trap gate entry for syscall - */ -/* LINTSTUB: Var: char Xsyscall[1]; */ -IDTVEC(syscall) - pushl $2 # size of instruction for restart -syscall1: - pushl $T_ASTFLT # trap # for doing ASTs - INTRENTRY - -#ifdef DIAGNOSTIC - cmpl $0, CPUVAR(WANT_PMAPLOAD) - jz 1f - pushl $6f - call _C_LABEL(printf) - addl $4, %esp -1: - movl CPUVAR(ILEVEL),%ebx - testl %ebx,%ebx - jz 1f - pushl $5f - call _C_LABEL(printf) - addl $4,%esp -#ifdef DDB - int $3 -#endif -1: -#endif /* DIAGNOSTIC */ - movl CPUVAR(CURLWP),%edx - movl %esp,L_MD_REGS(%edx) # save pointer to frame - movl L_PROC(%edx),%edx - pushl %esp - call *P_MD_SYSCALL(%edx) # get pointer to syscall() function - addl $4,%esp -syscall_checkast: - /* Check for ASTs on exit to user mode. */ - CLI(%eax) - CHECK_ASTPENDING(%eax) - je 1f - /* Always returning to user mode here. */ - CLEAR_ASTPENDING(%eax) - STI(%eax) - /* Pushed T_ASTFLT into tf_trapno on entry. */ - pushl %esp - call _C_LABEL(trap) - addl $4,%esp - jmp syscall_checkast -1: STI(%eax) - CHECK_DEFERRED_SWITCH(%eax) - jnz 9f -#ifndef DIAGNOSTIC - INTRFASTEXIT -#else /* DIAGNOSTIC */ - cmpl $IPL_NONE,CPUVAR(ILEVEL) - jne 3f - INTRFASTEXIT -3: pushl $4f - call _C_LABEL(printf) - addl $4,%esp -#ifdef DDB - int $3 -#endif /* DDB */ - movl $IPL_NONE,CPUVAR(ILEVEL) - jmp 2b -4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL EXIT\n" -5: .asciz "WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n" -6: .asciz "WARNING: WANT PMAPLOAD ON SYSCALL ENTRY\n" -#endif /* DIAGNOSTIC */ -9: call _C_LABEL(pmap_load) - jmp syscall_checkast /* re-check ASTs */ - -#if NNPX > 0 -/* - * Special interrupt handlers. Someday intr0-intr15 will be used to count - * interrupts. We'll still need a special exception 16 handler. The busy - * latch stuff in probintr() can be moved to npxprobe(). - */ - -/* LINTSTUB: Func: void probeintr(void) */ -NENTRY(probeintr) - ss - incl _C_LABEL(npx_intrs_while_probing) - pushl %eax - movb $0x20,%al # EOI (asm in strings loses cpp features) - outb %al,$0xa0 # IO_ICU2 - outb %al,$0x20 # IO_ICU1 - movb $0,%al - outb %al,$0xf0 # clear BUSY# latch - popl %eax - iret - -/* LINTSTUB: Func: void probetrap(void) */ -NENTRY(probetrap) - ss - incl _C_LABEL(npx_traps_while_probing) - fnclex - iret - -/* LINTSTUB: Func: int npx586bug1(int a, int b) */ -NENTRY(npx586bug1) - fildl 4(%esp) # x - fildl 8(%esp) # y - fld %st(1) - fdiv %st(1),%st # x/y - fmulp %st,%st(1) # (x/y)*y - fsubrp %st,%st(1) # x-(x/y)*y - pushl $0 - fistpl (%esp) - popl %eax - ret -#endif /* NNPX > 0 */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,2567 +0,0 @@ -/* $NetBSD: machdep.c,v 1.2.2.1 2004/05/22 15:58:02 he Exp $ */ -/* NetBSD: machdep.c,v 1.552 2004/03/24 15:34:49 atatat Exp */ - -/*- - * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace - * Simulation Facility, NASA Ames Research Center. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/*- - * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)machdep.c 7.4 (Berkeley) 6/3/91 - */ - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.2.2.1 2004/05/22 15:58:02 he Exp $"); - -#include "opt_beep.h" -#include "opt_compat_ibcs2.h" -#include "opt_compat_mach.h" /* need to get the right segment def */ -#include "opt_compat_netbsd.h" -#include "opt_compat_svr4.h" -#include "opt_cpureset_delay.h" -#include "opt_cputype.h" -#include "opt_ddb.h" -#include "opt_ipkdb.h" -#include "opt_kgdb.h" -#include "opt_mtrr.h" -#include "opt_multiprocessor.h" -#include "opt_realmem.h" -#include "opt_user_ldt.h" -#include "opt_vm86.h" -#include "opt_xen.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/signal.h> -#include <sys/signalvar.h> -#include <sys/kernel.h> -#include <sys/proc.h> -#include <sys/user.h> -#include <sys/exec.h> -#include <sys/buf.h> -#include <sys/reboot.h> -#include <sys/conf.h> -#include <sys/file.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/msgbuf.h> -#include <sys/mount.h> -#include <sys/vnode.h> -#include <sys/extent.h> -#include <sys/syscallargs.h> -#include <sys/core.h> -#include <sys/kcore.h> -#include <sys/ucontext.h> -#include <machine/kcore.h> -#include <sys/ras.h> -#include <sys/sa.h> -#include <sys/savar.h> -#include <sys/ksyms.h> - -#ifdef IPKDB -#include <ipkdb/ipkdb.h> -#endif - -#ifdef KGDB -#include <sys/kgdb.h> -#endif - -#include <dev/cons.h> - -#include <uvm/uvm_extern.h> -#include <uvm/uvm_page.h> - -#include <sys/sysctl.h> - -#include <machine/cpu.h> -#include <machine/cpufunc.h> -#include <machine/cpuvar.h> -#include <machine/gdt.h> -#include <machine/pio.h> -#include <machine/psl.h> -#include <machine/reg.h> -#include <machine/specialreg.h> -#include <machine/bootinfo.h> -#include <machine/mtrr.h> -#include <machine/evtchn.h> - -#include <dev/isa/isareg.h> -#include <machine/isa_machdep.h> -#include <dev/ic/i8042reg.h> - -#ifdef DDB -#include <machine/db_machdep.h> -#include <ddb/db_extern.h> -#endif - -#ifdef VM86 -#include <machine/vm86.h> -#endif - -#include "acpi.h" -#include "apm.h" -#include "bioscall.h" - -#if NBIOSCALL > 0 -#include <machine/bioscall.h> -#endif - -#if NACPI > 0 -#include <dev/acpi/acpivar.h> -#define ACPI_MACHDEP_PRIVATE -#include <machine/acpi_machdep.h> -#endif - -#if NAPM > 0 -#include <machine/apmvar.h> -#endif - -#include "isa.h" -#include "isadma.h" -#include "npx.h" -#include "ksyms.h" - -#include "mca.h" -#if NMCA > 0 -#include <machine/mca_machdep.h> /* for mca_busprobe() */ -#endif - -#ifdef MULTIPROCESSOR /* XXX */ -#include <machine/mpbiosvar.h> /* XXX */ -#endif /* XXX */ - -#include <machine/xen.h> -#include <machine/hypervisor.h> - -#if defined(DDB) || defined(KGDB) -#include <ddb/db_interface.h> -#include <ddb/db_output.h> - -void ddb_trap_hook(int); -#endif - -/* #define XENDEBUG */ -/* #define XENDEBUG_LOW */ - -#ifdef XENDEBUG -extern void printk(char *, ...); -#define XENPRINTF(x) printf x -#define XENPRINTK(x) printk x -#else -#define XENPRINTF(x) -#define XENPRINTK(x) -#endif -#define PRINTK(x) printf x - -#ifdef XENDEBUG_LOW -void xen_dbglow_init(void); -#endif - -#ifndef BEEP_ONHALT_COUNT -#define BEEP_ONHALT_COUNT 3 -#endif -#ifndef BEEP_ONHALT_PITCH -#define BEEP_ONHALT_PITCH 1500 -#endif -#ifndef BEEP_ONHALT_PERIOD -#define BEEP_ONHALT_PERIOD 250 -#endif - -/* the following is used externally (sysctl_hw) */ -char machine[] = "i386"; /* CPU "architecture" */ -char machine_arch[] = "i386"; /* machine == machine_arch */ - -char bootinfo[BOOTINFO_MAXSIZE]; - -struct bi_devmatch *i386_alldisks = NULL; -int i386_ndisks = 0; - -#ifdef CPURESET_DELAY -int cpureset_delay = CPURESET_DELAY; -#else -int cpureset_delay = 2000; /* default to 2s */ -#endif - -#ifdef MTRR -struct mtrr_funcs *mtrr_funcs; -#endif - -#ifdef COMPAT_NOMID -static int exec_nomid(struct proc *, struct exec_package *); -#endif - -int physmem; -int dumpmem_low; -int dumpmem_high; -unsigned int cpu_feature; -int cpu_class; -int i386_fpu_present; -int i386_fpu_exception; -int i386_fpu_fdivbug; - -int i386_use_fxsave; -int i386_has_sse; -int i386_has_sse2; - -int tmx86_has_longrun; - -vaddr_t msgbuf_vaddr; -paddr_t msgbuf_paddr; - -vaddr_t idt_vaddr; -paddr_t idt_paddr; - -#ifdef I586_CPU -vaddr_t pentium_idt_vaddr; -#endif - -struct vm_map *exec_map = NULL; -struct vm_map *mb_map = NULL; -struct vm_map *phys_map = NULL; - -extern paddr_t avail_start, avail_end; -extern paddr_t pmap_pa_start, pmap_pa_end; - -#ifdef ISA_CLOCK -void (*delay_func)(int) = i8254_delay; -void (*microtime_func)(struct timeval *) = i8254_microtime; -void (*initclock_func)(void) = i8254_initclocks; -#else -void (*delay_func)(int) = xen_delay; -void (*microtime_func)(struct timeval *) = xen_microtime; -void (*initclock_func)(void) = xen_initclocks; -#endif - -void hypervisor_callback(void); -void failsafe_callback(void); - -/* - * Size of memory segments, before any memory is stolen. - */ -phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; -int mem_cluster_cnt; - -int cpu_dump(void); -int cpu_dumpsize(void); -u_long cpu_dump_mempagecnt(void); -void dumpsys(void); -void init386(paddr_t); -void initgdt(void); - -#if !defined(REALBASEMEM) && !defined(REALEXTMEM) -void add_mem_cluster(u_int64_t, u_int64_t, u_int32_t); -#endif /* !defnied(REALBASEMEM) && !defined(REALEXTMEM) */ - -extern int time_adjusted; - -/* - * Machine-dependent startup code - */ -void -cpu_startup() -{ - int x; - vaddr_t minaddr, maxaddr; - char pbuf[9]; - - /* - * Initialize error message buffer (et end of core). - */ - msgbuf_vaddr = uvm_km_valloc(kernel_map, x86_round_page(MSGBUFSIZE)); - if (msgbuf_vaddr == 0) - panic("failed to valloc msgbuf_vaddr"); - - /* msgbuf_paddr was init'd in pmap */ - for (x = 0; x < btoc(MSGBUFSIZE); x++) - pmap_kenter_pa((vaddr_t)msgbuf_vaddr + x * PAGE_SIZE, - msgbuf_paddr + x * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE); - pmap_update(pmap_kernel()); - - initmsgbuf((caddr_t)msgbuf_vaddr, round_page(MSGBUFSIZE)); - - printf("%s", version); - -#ifdef TRAPLOG - /* - * Enable recording of branch from/to in MSR's - */ - wrmsr(MSR_DEBUGCTLMSR, 0x1); -#endif - - format_bytes(pbuf, sizeof(pbuf), ptoa(physmem)); - printf("total memory = %s\n", pbuf); - - minaddr = 0; - - /* - * Allocate a submap for exec arguments. This map effectively - * limits the number of processes exec'ing at any time. - */ - exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, - 16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL); - - /* - * Allocate a submap for physio - */ - phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, - VM_PHYS_SIZE, 0, FALSE, NULL); - - /* - * Finally, allocate mbuf cluster submap. - */ - mb_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, - nmbclusters * mclbytes, VM_MAP_INTRSAFE, FALSE, NULL); - - format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free)); - printf("avail memory = %s\n", pbuf); - - /* Safe for i/o port / memory space allocation to use malloc now. */ - x86_bus_space_mallocok(); -} - -/* - * Set up proc0's TSS and LDT. - */ -void -i386_proc0_tss_ldt_init() -{ - struct pcb *pcb; - int x; - - gdt_init(); - - cpu_info_primary.ci_curpcb = pcb = &lwp0.l_addr->u_pcb; - - pcb->pcb_tss.tss_ioopt = - ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16 - | SEL_KPL; /* i/o pl */ - - for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++) - pcb->pcb_iomap[x] = 0xffffffff; - - pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); - pcb->pcb_cr0 = rcr0(); - pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - pcb->pcb_tss.tss_esp0 = (int)lwp0.l_addr + USPACE - 16; - lwp0.l_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_esp0 - 1; - lwp0.l_md.md_tss_sel = tss_alloc(pcb); - -#ifndef XEN - ltr(lwp0.l_md.md_tss_sel); - lldt(pcb->pcb_ldt_sel); -#else - HYPERVISOR_fpu_taskswitch(1); - XENPRINTF(("lwp tss sp %p ss %04x/%04x\n", - (void *)pcb->pcb_tss.tss_esp0, - pcb->pcb_tss.tss_ss0, IDXSEL(pcb->pcb_tss.tss_ss0))); - HYPERVISOR_stack_switch(pcb->pcb_tss.tss_ss0, pcb->pcb_tss.tss_esp0); -#endif -} - -/* - * Set up TSS and LDT for a new PCB. - */ - -void -i386_init_pcb_tss_ldt(struct cpu_info *ci) -{ - int x; - struct pcb *pcb = ci->ci_idle_pcb; - - pcb->pcb_tss.tss_ioopt = - ((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss) << 16 - | SEL_KPL; /* i/o pl */ - for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++) - pcb->pcb_iomap[x] = 0xffffffff; - - pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); - pcb->pcb_cr0 = rcr0(); - - ci->ci_idle_tss_sel = tss_alloc(pcb); -} - -/* - * Switch context: - * - honor CR0_TS in saved CR0 and request DNA exception on FPU use - * - switch stack pointer for user->kernel transition - */ -void -i386_switch_context(struct pcb *new) -{ - dom0_op_t op; - struct cpu_info *ci; - - ci = curcpu(); - if (ci->ci_fpused) { - HYPERVISOR_fpu_taskswitch(1); - ci->ci_fpused = 0; - } - - HYPERVISOR_stack_switch(new->pcb_tss.tss_ss0, new->pcb_tss.tss_esp0); - - if (xen_start_info.flags & SIF_PRIVILEGED) { - op.cmd = DOM0_IOPL; - op.u.iopl.domain = DOMID_SELF; - op.u.iopl.iopl = new->pcb_tss.tss_ioopt & SEL_RPL; /* i/o pl */ - HYPERVISOR_dom0_op(&op); - } -} - -/* - * sysctl helper routine for machdep.tm* nodes. - */ -static int -sysctl_machdep_tm_longrun(SYSCTLFN_ARGS) -{ - struct sysctlnode node; - int io, error; - - if (!tmx86_has_longrun) - return (EOPNOTSUPP); - - node = *rnode; - node.sysctl_data = &io; - - switch (rnode->sysctl_num) { - case CPU_TMLR_MODE: - io = (int)(crusoe_longrun = tmx86_get_longrun_mode()); - break; - case CPU_TMLR_FREQUENCY: - tmx86_get_longrun_status_all(); - io = crusoe_frequency; - break; - case CPU_TMLR_VOLTAGE: - tmx86_get_longrun_status_all(); - io = crusoe_voltage; - break; - case CPU_TMLR_PERCENTAGE: - tmx86_get_longrun_status_all(); - io = crusoe_percentage; - break; - default: - return (EOPNOTSUPP); - } - - error = sysctl_lookup(SYSCTLFN_CALL(&node)); - if (error || newp == NULL) - return (error); - - if (rnode->sysctl_num == CPU_TMLR_MODE) { - if (tmx86_set_longrun_mode(io)) - crusoe_longrun = (u_int)io; - else - return (EINVAL); - } - - return (0); -} - -/* - * sysctl helper routine for machdep.booted_kernel - */ -static int -sysctl_machdep_booted_kernel(SYSCTLFN_ARGS) -{ - struct btinfo_bootpath *bibp; - struct sysctlnode node; - - bibp = lookup_bootinfo(BTINFO_BOOTPATH); - if(!bibp) - return(ENOENT); /* ??? */ - - node = *rnode; - node.sysctl_data = bibp->bootpath; - node.sysctl_size = sizeof(bibp->bootpath); - return (sysctl_lookup(SYSCTLFN_CALL(&node))); -} - -/* - * sysctl helper routine for machdep.diskinfo - */ -static int -sysctl_machdep_diskinfo(SYSCTLFN_ARGS) -{ - struct sysctlnode node; - - node = *rnode; - node.sysctl_data = i386_alldisks; - node.sysctl_size = sizeof(struct disklist) + - (i386_ndisks - 1) * sizeof(struct nativedisk_info); - return (sysctl_lookup(SYSCTLFN_CALL(&node))); -} - -/* - * machine dependent system variables. - */ -SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup") -{ - - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_NODE, "machdep", NULL, - NULL, 0, NULL, 0, - CTL_MACHDEP, CTL_EOL); - - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_STRUCT, "console_device", NULL, - sysctl_consdev, 0, NULL, sizeof(dev_t), - CTL_MACHDEP, CPU_CONSDEV, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "biosbasemem", NULL, - NULL, 0, &biosbasemem, 0, - CTL_MACHDEP, CPU_BIOSBASEMEM, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "biosextmem", NULL, - NULL, 0, &biosextmem, 0, - CTL_MACHDEP, CPU_BIOSEXTMEM, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "nkpde", NULL, - NULL, 0, &nkpde, 0, - CTL_MACHDEP, CPU_NKPDE, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_STRING, "booted_kernel", NULL, - sysctl_machdep_booted_kernel, 0, NULL, 0, - CTL_MACHDEP, CPU_BOOTED_KERNEL, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_STRUCT, "diskinfo", NULL, - sysctl_machdep_diskinfo, 0, NULL, 0, - CTL_MACHDEP, CPU_DISKINFO, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "fpu_present", NULL, - NULL, 0, &i386_fpu_present, 0, - CTL_MACHDEP, CPU_FPU_PRESENT, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "osfxsr", NULL, - NULL, 0, &i386_use_fxsave, 0, - CTL_MACHDEP, CPU_OSFXSR, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "sse", NULL, - NULL, 0, &i386_has_sse, 0, - CTL_MACHDEP, CPU_SSE, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "sse2", NULL, - NULL, 0, &i386_has_sse2, 0, - CTL_MACHDEP, CPU_SSE2, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT|CTLFLAG_READWRITE, - CTLTYPE_INT, "tm_longrun_mode", NULL, - sysctl_machdep_tm_longrun, 0, NULL, 0, - CTL_MACHDEP, CPU_TMLR_MODE, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "tm_longrun_frequency", NULL, - sysctl_machdep_tm_longrun, 0, NULL, 0, - CTL_MACHDEP, CPU_TMLR_FREQUENCY, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "tm_longrun_voltage", NULL, - sysctl_machdep_tm_longrun, 0, NULL, 0, - CTL_MACHDEP, CPU_TMLR_VOLTAGE, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_INT, "tm_longrun_percentage", NULL, - sysctl_machdep_tm_longrun, 0, NULL, 0, - CTL_MACHDEP, CPU_TMLR_PERCENTAGE, CTL_EOL); -} - -void * -getframe(struct lwp *l, int sig, int *onstack) -{ - struct proc *p = l->l_proc; - struct sigctx *ctx = &p->p_sigctx; - struct trapframe *tf = l->l_md.md_regs; - - /* Do we need to jump onto the signal stack? */ - *onstack = (ctx->ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 - && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; - if (*onstack) - return (char *)ctx->ps_sigstk.ss_sp + ctx->ps_sigstk.ss_size; -#ifdef VM86 - if (tf->tf_eflags & PSL_VM) - return (void *)(tf->tf_esp + (tf->tf_ss << 4)); - else -#endif - return (void *)tf->tf_esp; -} - -/* - * Build context to run handler in. We invoke the handler - * directly, only returning via the trampoline. Note the - * trampoline version numbers are coordinated with machine- - * dependent code in libc. - */ -void -buildcontext(struct lwp *l, int sel, void *catcher, void *fp) -{ - struct trapframe *tf = l->l_md.md_regs; - - tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL); - tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); - tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); - tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); - tf->tf_eip = (int)catcher; - tf->tf_cs = GSEL(sel, SEL_UPL); - tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC); - tf->tf_esp = (int)fp; - tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); -} - -static void -sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask) -{ - struct lwp *l = curlwp; - struct proc *p = l->l_proc; - struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map); - int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ? - GUCODEBIG_SEL : GUCODE_SEL; - struct sigacts *ps = p->p_sigacts; - int onstack; - int sig = ksi->ksi_signo; - struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame; - sig_t catcher = SIGACTION(p, sig).sa_handler; - struct trapframe *tf = l->l_md.md_regs; - - fp--; - - /* Build stack frame for signal trampoline. */ - switch (ps->sa_sigdesc[sig].sd_vers) { - case 0: /* handled by sendsig_sigcontext */ - case 1: /* handled by sendsig_sigcontext */ - default: /* unknown version */ - printf("nsendsig: bad version %d\n", - ps->sa_sigdesc[sig].sd_vers); - sigexit(l, SIGILL); - case 2: - break; - } - - frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp; - frame.sf_signum = sig; - frame.sf_sip = &fp->sf_si; - frame.sf_ucp = &fp->sf_uc; - frame.sf_si._info = ksi->ksi_info; - frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM; - frame.sf_uc.uc_sigmask = *mask; - frame.sf_uc.uc_link = NULL; - frame.sf_uc.uc_flags |= (p->p_sigctx.ps_sigstk.ss_flags & SS_ONSTACK) - ? _UC_SETSTACK : _UC_CLRSTACK; - memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack)); - cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags); - - if (tf->tf_eflags & PSL_VM) - (*p->p_emul->e_syscall_intern)(p); - - if (copyout(&frame, fp, sizeof(frame)) != 0) { - /* - * Process has trashed its stack; give it an illegal - * instruction to halt it in its tracks. - */ - sigexit(l, SIGILL); - /* NOTREACHED */ - } - - buildcontext(l, sel, catcher, fp); - - /* Remember that we're now on the signal stack. */ - if (onstack) - p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK; -} - -void -sendsig(const ksiginfo_t *ksi, const sigset_t *mask) -{ -#ifdef COMPAT_16 - if (curproc->p_sigacts->sa_sigdesc[ksi->ksi_signo].sd_vers < 2) - sendsig_sigcontext(ksi, mask); - else -#endif - sendsig_siginfo(ksi, mask); -} - -void -cpu_upcall(struct lwp *l, int type, int nevents, int ninterrupted, void *sas, - void *ap, void *sp, sa_upcall_t upcall) -{ - struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); - struct saframe *sf, frame; - struct trapframe *tf; - - tf = l->l_md.md_regs; - - /* Finally, copy out the rest of the frame. */ - frame.sa_type = type; - frame.sa_sas = sas; - frame.sa_events = nevents; - frame.sa_interrupted = ninterrupted; - frame.sa_arg = ap; - frame.sa_ra = 0; - - sf = (struct saframe *)sp - 1; - if (copyout(&frame, sf, sizeof(frame)) != 0) { - /* Copying onto the stack didn't work. Die. */ - sigexit(l, SIGILL); - /* NOTREACHED */ - } - - tf->tf_eip = (int) upcall; - tf->tf_esp = (int) sf; - tf->tf_ebp = 0; /* indicate call-frame-top to debuggers */ - tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL); - tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); - tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); - tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); - tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ? - GSEL(GUCODEBIG_SEL, SEL_UPL) : GSEL(GUCODE_SEL, SEL_UPL); - tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); - tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC); -} - -int waittime = -1; -struct pcb dumppcb; - -void -cpu_reboot(int howto, char *bootstr) -{ - - if (cold) { - howto |= RB_HALT; - goto haltsys; - } - - boothowto = howto; - if ((howto & RB_NOSYNC) == 0 && waittime < 0) { - waittime = 0; - vfs_shutdown(); - /* - * If we've been adjusting the clock, the todr - * will be out of synch; adjust it now. - */ - if (time_adjusted != 0) - resettodr(); - } - - /* Disable interrupts. */ - splhigh(); - - /* Do a dump if requested. */ - if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP) - dumpsys(); - -haltsys: - doshutdownhooks(); - -#ifdef MULTIPROCESSOR - x86_broadcast_ipi(X86_IPI_HALT); -#endif - - if ((howto & RB_POWERDOWN) == RB_POWERDOWN) { -#if NACPI > 0 - if (acpi_softc != NULL) { - delay(500000); - acpi_enter_sleep_state(acpi_softc, ACPI_STATE_S5); - printf("WARNING: ACPI powerdown failed!\n"); - } -#endif -#if NAPM > 0 && !defined(APM_NO_POWEROFF) - /* turn off, if we can. But try to turn disk off and - * wait a bit first--some disk drives are slow to clean up - * and users have reported disk corruption. - */ - delay(500000); - apm_set_powstate(APM_DEV_DISK(0xff), APM_SYS_OFF); - delay(500000); - apm_set_powstate(APM_DEV_ALLDEVS, APM_SYS_OFF); - printf("WARNING: APM powerdown failed!\n"); - /* - * RB_POWERDOWN implies RB_HALT... fall into it... - */ -#endif - HYPERVISOR_shutdown(); - } - - if (howto & RB_HALT) { - printf("\n"); - printf("The operating system has halted.\n"); - - /* XXX cngetc() below doesn't work, shutdown machine for now */ - HYPERVISOR_shutdown(); - - printf("Please press any key to reboot.\n\n"); - -#ifdef BEEP_ONHALT - { - int c; - for (c = BEEP_ONHALT_COUNT; c > 0; c--) { - sysbeep(BEEP_ONHALT_PITCH, - BEEP_ONHALT_PERIOD * hz / 1000); - delay(BEEP_ONHALT_PERIOD * 1000); - sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000); - delay(BEEP_ONHALT_PERIOD * 1000); - } - } -#endif - - cnpollc(1); /* for proper keyboard command handling */ - if (cngetc() == 0) { - /* no console attached, so just hlt */ - for(;;) { - __asm __volatile("hlt"); - } - } - cnpollc(0); - } - - printf("rebooting...\n"); - if (cpureset_delay > 0) - delay(cpureset_delay * 1000); - cpu_reset(); - for(;;) ; - /*NOTREACHED*/ -} - -/* - * These variables are needed by /sbin/savecore - */ -u_int32_t dumpmag = 0x8fca0101; /* magic number */ -int dumpsize = 0; /* pages */ -long dumplo = 0; /* blocks */ - -/* - * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers. - */ -int -cpu_dumpsize() -{ - int size; - - size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) + - ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t)); - if (roundup(size, dbtob(1)) != dbtob(1)) - return (-1); - - return (1); -} - -/* - * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped. - */ -u_long -cpu_dump_mempagecnt() -{ - u_long i, n; - - n = 0; - for (i = 0; i < mem_cluster_cnt; i++) - n += atop(mem_clusters[i].size); - return (n); -} - -/* - * cpu_dump: dump the machine-dependent kernel core dump headers. - */ -int -cpu_dump() -{ - int (*dump)(dev_t, daddr_t, caddr_t, size_t); - char buf[dbtob(1)]; - kcore_seg_t *segp; - cpu_kcore_hdr_t *cpuhdrp; - phys_ram_seg_t *memsegp; - const struct bdevsw *bdev; - int i; - - bdev = bdevsw_lookup(dumpdev); - if (bdev == NULL) - return (ENXIO); - dump = bdev->d_dump; - - memset(buf, 0, sizeof buf); - segp = (kcore_seg_t *)buf; - cpuhdrp = (cpu_kcore_hdr_t *)&buf[ALIGN(sizeof(*segp))]; - memsegp = (phys_ram_seg_t *)&buf[ ALIGN(sizeof(*segp)) + - ALIGN(sizeof(*cpuhdrp))]; - - /* - * Generate a segment header. - */ - CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU); - segp->c_size = dbtob(1) - ALIGN(sizeof(*segp)); - - /* - * Add the machine-dependent header info. - */ - cpuhdrp->ptdpaddr = PTDpaddr; - cpuhdrp->nmemsegs = mem_cluster_cnt; - - /* - * Fill in the memory segment descriptors. - */ - for (i = 0; i < mem_cluster_cnt; i++) { - memsegp[i].start = mem_clusters[i].start; - memsegp[i].size = mem_clusters[i].size; - } - - return (dump(dumpdev, dumplo, (caddr_t)buf, dbtob(1))); -} - -/* - * This is called by main to set dumplo and dumpsize. - * Dumps always skip the first PAGE_SIZE of disk space - * in case there might be a disk label stored there. - * If there is extra space, put dump at the end to - * reduce the chance that swapping trashes it. - */ -void -cpu_dumpconf() -{ - const struct bdevsw *bdev; - int nblks, dumpblks; /* size of dump area */ - - if (dumpdev == NODEV) - goto bad; - bdev = bdevsw_lookup(dumpdev); - if (bdev == NULL) - panic("dumpconf: bad dumpdev=0x%x", dumpdev); - if (bdev->d_psize == NULL) - goto bad; - nblks = (*bdev->d_psize)(dumpdev); - if (nblks <= ctod(1)) - goto bad; - - dumpblks = cpu_dumpsize(); - if (dumpblks < 0) - goto bad; - dumpblks += ctod(cpu_dump_mempagecnt()); - - /* If dump won't fit (incl. room for possible label), punt. */ - if (dumpblks > (nblks - ctod(1))) - goto bad; - - /* Put dump at end of partition */ - dumplo = nblks - dumpblks; - - /* dumpsize is in page units, and doesn't include headers. */ - dumpsize = cpu_dump_mempagecnt(); - return; - - bad: - dumpsize = 0; -} - -/* - * Doadump comes here after turning off memory management and - * getting on the dump stack, either when called above, or by - * the auto-restart code. - */ -#define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */ -static vaddr_t dumpspace; - -vaddr_t -reserve_dumppages(vaddr_t p) -{ - - dumpspace = p; - return (p + BYTES_PER_DUMP); -} - -void -dumpsys() -{ - u_long totalbytesleft, bytes, i, n, memseg; - u_long maddr; - int psize; - daddr_t blkno; - const struct bdevsw *bdev; - int (*dump)(dev_t, daddr_t, caddr_t, size_t); - int error; - - /* Save registers. */ - savectx(&dumppcb); - - if (dumpdev == NODEV) - return; - - bdev = bdevsw_lookup(dumpdev); - if (bdev == NULL || bdev->d_psize == NULL) - return; - - /* - * For dumps during autoconfiguration, - * if dump device has already configured... - */ - if (dumpsize == 0) - cpu_dumpconf(); - if (dumplo <= 0 || dumpsize == 0) { - printf("\ndump to dev %u,%u not possible\n", major(dumpdev), - minor(dumpdev)); - return; - } - printf("\ndumping to dev %u,%u offset %ld\n", major(dumpdev), - minor(dumpdev), dumplo); - - psize = (*bdev->d_psize)(dumpdev); - printf("dump "); - if (psize == -1) { - printf("area unavailable\n"); - return; - } - -#if 0 /* XXX this doesn't work. grr. */ - /* toss any characters present prior to dump */ - while (sget() != NULL); /*syscons and pccons differ */ -#endif - - if ((error = cpu_dump()) != 0) - goto err; - - totalbytesleft = ptoa(cpu_dump_mempagecnt()); - blkno = dumplo + cpu_dumpsize(); - dump = bdev->d_dump; - error = 0; - - for (memseg = 0; memseg < mem_cluster_cnt; memseg++) { - maddr = mem_clusters[memseg].start; - bytes = mem_clusters[memseg].size; - - for (i = 0; i < bytes; i += n, totalbytesleft -= n) { - /* Print out how many MBs we have left to go. */ - if ((totalbytesleft % (1024*1024)) == 0) - printf("%ld ", totalbytesleft / (1024 * 1024)); - - /* Limit size for next transfer. */ - n = bytes - i; - if (n > BYTES_PER_DUMP) - n = BYTES_PER_DUMP; - - (void) pmap_map(dumpspace, maddr, maddr + n, - VM_PROT_READ); - - error = (*dump)(dumpdev, blkno, (caddr_t)dumpspace, n); - if (error) - goto err; - maddr += n; - blkno += btodb(n); /* XXX? */ - -#if 0 /* XXX this doesn't work. grr. */ - /* operator aborting dump? */ - if (sget() != NULL) { - error = EINTR; - break; - } -#endif - } - } - - err: - switch (error) { - - case ENXIO: - printf("device bad\n"); - break; - - case EFAULT: - printf("device not ready\n"); - break; - - case EINVAL: - printf("area improper\n"); - break; - - case EIO: - printf("i/o error\n"); - break; - - case EINTR: - printf("aborted from console\n"); - break; - - case 0: - printf("succeeded\n"); - break; - - default: - printf("error %d\n", error); - break; - } - printf("\n\n"); - delay(5000000); /* 5 seconds */ -} - -/* - * Clear registers on exec - */ -void -setregs(struct lwp *l, struct exec_package *pack, u_long stack) -{ - struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); - struct pcb *pcb = &l->l_addr->u_pcb; - struct trapframe *tf; - -#if NNPX > 0 - /* If we were using the FPU, forget about it. */ - if (l->l_addr->u_pcb.pcb_fpcpu != NULL) - npxsave_lwp(l, 0); -#endif - -#ifdef USER_LDT - pmap_ldt_cleanup(l); -#endif - - l->l_md.md_flags &= ~MDL_USEDFPU; - if (i386_use_fxsave) { - pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __NetBSD_NPXCW__; - pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__; - } else - pcb->pcb_savefpu.sv_87.sv_env.en_cw = __NetBSD_NPXCW__; - - tf = l->l_md.md_regs; - tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL); - tf->tf_fs = LSEL(LUDATA_SEL, SEL_UPL); - tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); - tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); - tf->tf_edi = 0; - tf->tf_esi = 0; - tf->tf_ebp = 0; - tf->tf_ebx = (int)l->l_proc->p_psstr; - tf->tf_edx = 0; - tf->tf_ecx = 0; - tf->tf_eax = 0; - tf->tf_eip = pack->ep_entry; - tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ? - LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL); - tf->tf_eflags = PSL_USERSET; - tf->tf_esp = stack; - tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL); -} - -/* - * Initialize segments and descriptor tables - */ - -union descriptor *gdt, *ldt; -struct gate_descriptor *idt; -char idt_allocmap[NIDT]; -struct simplelock idt_lock = SIMPLELOCK_INITIALIZER; -#ifdef I586_CPU -union descriptor *pentium_idt; -#endif -extern struct user *proc0paddr; - -void -setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl, - int sel) -{ - - gd->gd_looffset = (int)func; - gd->gd_selector = sel; - gd->gd_stkcpy = args; - gd->gd_xx = 0; - gd->gd_type = type; - gd->gd_dpl = dpl; - gd->gd_p = 1; - gd->gd_hioffset = (int)func >> 16; -} - -void -unsetgate(struct gate_descriptor *gd) -{ - gd->gd_p = 0; - gd->gd_hioffset = 0; - gd->gd_looffset = 0; - gd->gd_selector = 0; - gd->gd_xx = 0; - gd->gd_stkcpy = 0; - gd->gd_type = 0; - gd->gd_dpl = 0; -} - - -void -setregion(struct region_descriptor *rd, void *base, size_t limit) -{ - - rd->rd_limit = (int)limit; - rd->rd_base = (int)base; -} - -void -setsegment(struct segment_descriptor *sd, void *base, size_t limit, int type, - int dpl, int def32, int gran) -{ - - sd->sd_lolimit = (int)limit; - sd->sd_lobase = (int)base; - sd->sd_type = type; - sd->sd_dpl = dpl; - sd->sd_p = 1; - sd->sd_hilimit = (int)limit >> 16; - sd->sd_xx = 0; - sd->sd_def32 = def32; - sd->sd_gran = gran; - sd->sd_hibase = (int)base >> 24; -} - -#define IDTVEC(name) __CONCAT(X, name) -typedef void (vector)(void); -extern vector IDTVEC(syscall); -extern vector IDTVEC(osyscall); -extern vector *IDTVEC(exceptions)[]; -#ifdef COMPAT_SVR4 -extern vector IDTVEC(svr4_fasttrap); -#endif /* COMPAT_SVR4 */ -#ifdef COMPAT_MACH -extern vector IDTVEC(mach_trap); -#endif -#define MAX_XEN_IDT 128 -trap_info_t xen_idt[MAX_XEN_IDT]; -int xen_idt_idx; - -#define KBTOB(x) ((size_t)(x) * 1024UL) - -void cpu_init_idt() -{ - struct region_descriptor region; - - panic("cpu_init_idt"); -#ifdef I586_CPU - setregion(®ion, pentium_idt, NIDT * sizeof(idt[0]) - 1); -#else - setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); -#endif - lidt(®ion); -} - -#if !defined(REALBASEMEM) && !defined(REALEXTMEM) -void -add_mem_cluster(u_int64_t seg_start, u_int64_t seg_end, u_int32_t type) -{ - extern struct extent *iomem_ex; - int i; - - if (seg_end > 0x100000000ULL) { - printf("WARNING: skipping large " - "memory map entry: " - "0x%qx/0x%qx/0x%x\n", - seg_start, - (seg_end - seg_start), - type); - return; - } - - /* - * XXX Chop the last page off the size so that - * XXX it can fit in avail_end. - */ - if (seg_end == 0x100000000ULL) - seg_end -= PAGE_SIZE; - - if (seg_end <= seg_start) - return; - - for (i = 0; i < mem_cluster_cnt; i++) { - if ((mem_clusters[i].start == round_page(seg_start)) - && (mem_clusters[i].size - == trunc_page(seg_end) - mem_clusters[i].start)) { -#ifdef DEBUG_MEMLOAD - printf("WARNING: skipping duplicate segment entry\n"); -#endif - return; - } - } - - /* - * Allocate the physical addresses used by RAM - * from the iomem extent map. This is done before - * the addresses are page rounded just to make - * sure we get them all. - */ - if (extent_alloc_region(iomem_ex, seg_start, - seg_end - seg_start, EX_NOWAIT)) { - /* XXX What should we do? */ - printf("WARNING: CAN'T ALLOCATE " - "MEMORY SEGMENT " - "(0x%qx/0x%qx/0x%x) FROM " - "IOMEM EXTENT MAP!\n", - seg_start, seg_end - seg_start, type); - return; - } - - /* - * If it's not free memory, skip it. - */ - if (type != BIM_Memory) - return; - - /* XXX XXX XXX */ - if (mem_cluster_cnt >= VM_PHYSSEG_MAX) - panic("init386: too many memory segments"); - - seg_start = round_page(seg_start); - seg_end = trunc_page(seg_end); - - if (seg_start == seg_end) - return; - - mem_clusters[mem_cluster_cnt].start = seg_start; - mem_clusters[mem_cluster_cnt].size = - seg_end - seg_start; - - if (avail_end < seg_end) - avail_end = seg_end; - physmem += atop(mem_clusters[mem_cluster_cnt].size); - mem_cluster_cnt++; -} -#endif /* !defined(REALBASEMEM) && !defined(REALEXTMEM) */ - -void -initgdt() -{ -#if !defined(XEN) - struct region_descriptor region; -#else - paddr_t frames[16]; -#endif - -#if !defined(XEN) - gdt = tgdt; - memset(gdt, 0, NGDT*sizeof(*gdt)); -#endif - /* make gdt gates and memory segments */ - setsegment(&gdt[GCODE_SEL].sd, 0, 0xfc3ff, SDT_MEMERA, SEL_KPL, 1, 1); - setsegment(&gdt[GDATA_SEL].sd, 0, 0xfc3ff, SDT_MEMRWA, SEL_KPL, 1, 1); - setsegment(&gdt[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1, - SDT_MEMERA, SEL_UPL, 1, 1); - setsegment(&gdt[GUCODEBIG_SEL].sd, 0, x86_btop(VM_MAXUSER_ADDRESS) - 1, - SDT_MEMERA, SEL_UPL, 1, 1); - setsegment(&gdt[GUDATA_SEL].sd, 0, x86_btop(VM_MAXUSER_ADDRESS) - 1, - SDT_MEMRWA, SEL_UPL, 1, 1); -#ifdef COMPAT_MACH - setgate(&gdt[GMACHCALLS_SEL].gd, &IDTVEC(mach_trap), 1, - SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); -#endif -#if NBIOSCALL > 0 - /* bios trampoline GDT entries */ - setsegment(&gdt[GBIOSCODE_SEL].sd, 0, 0xfc3ff, SDT_MEMERA, SEL_KPL, 0, - 0); - setsegment(&gdt[GBIOSDATA_SEL].sd, 0, 0xfc3ff, SDT_MEMRWA, SEL_KPL, 0, - 0); -#endif - setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary, - sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 1, 1); - -#if !defined(XEN) - setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1); - lgdt(®ion); -#else - frames[0] = xpmap_ptom((uint32_t)gdt - KERNBASE) >> PAGE_SHIFT; - /* pmap_kremove((vaddr_t)gdt, PAGE_SIZE); */ - pmap_kenter_pa((vaddr_t)gdt, (uint32_t)gdt - KERNBASE, - VM_PROT_READ); - XENPRINTK(("loading gdt %lx, %d entries\n", frames[0] << PAGE_SHIFT, - NGDT)); - if (HYPERVISOR_set_gdt(frames, NGDT)) - panic("HYPERVISOR_set_gdt failed!\n"); - lgdt_finish(); -#endif -} - -void -init386(paddr_t first_avail) -{ -#if !defined(XEN) - union descriptor *tgdt; -#endif - extern void consinit(void); -#if !defined(XEN) - extern struct extent *iomem_ex; -#if !defined(REALBASEMEM) && !defined(REALEXTMEM) - struct btinfo_memmap *bim; -#endif - struct region_descriptor region; -#endif - int x; -#if !defined(XEN) - int first16q; - u_int64_t seg_start, seg_end; - u_int64_t seg_start1, seg_end1; -#endif - paddr_t realmode_reserved_start; - psize_t realmode_reserved_size; - int needs_earlier_install_pte0; -#if NBIOSCALL > 0 - extern int biostramp_image_size; - extern u_char biostramp_image[]; -#endif - - XENPRINTK(("HYPERVISOR_shared_info %p\n", HYPERVISOR_shared_info)); -#ifdef XENDEBUG_LOW - xen_dbglow_init(); -#endif - - cpu_probe_features(&cpu_info_primary); - cpu_feature = cpu_info_primary.ci_feature_flags; - - /* not on Xen... */ - cpu_feature &= ~(CPUID_PGE|CPUID_PSE|CPUID_MTRR|CPUID_FXSR); - - lwp0.l_addr = proc0paddr; - cpu_info_primary.ci_curpcb = &lwp0.l_addr->u_pcb; - - XENPRINTK(("proc0paddr %p pcb %p first_avail %p\n", - proc0paddr, cpu_info_primary.ci_curpcb, (void *)first_avail)); - XENPRINTK(("ptdpaddr %p atdevbase %p\n", (void *)PTDpaddr, - (void *)atdevbase)); - - x86_bus_space_init(); - consinit(); /* XXX SHOULD NOT BE DONE HERE */ - /* - * Initailize PAGE_SIZE-dependent variables. - */ - uvm_setpagesize(); - - /* - * Saving SSE registers won't work if the save area isn't - * 16-byte aligned. - */ - if (offsetof(struct user, u_pcb.pcb_savefpu) & 0xf) - panic("init386: pcb_savefpu not 16-byte aligned"); - - /* - * Start with 2 color bins -- this is just a guess to get us - * started. We'll recolor when we determine the largest cache - * sizes on the system. - */ - uvmexp.ncolors = 2; - -#if !defined(XEN) - /* - * BIOS leaves data in physical page 0 - * Even if it didn't, our VM system doesn't like using zero as a - * physical page number. - * We may also need pages in low memory (one each) for secondary CPU - * startup, for BIOS calls, and for ACPI, plus a page table page to map - * them into the first few pages of the kernel's pmap. - */ - avail_start = PAGE_SIZE; -#else - /* Make sure the end of the space used by the kernel is rounded. */ - first_avail = round_page(first_avail); - avail_start = first_avail - KERNBASE; - avail_end = ptoa(xen_start_info.nr_pages) + - (KERNTEXTOFF - KERNBASE_LOCORE); - pmap_pa_start = (KERNTEXTOFF - KERNBASE_LOCORE); - pmap_pa_end = avail_end; - mem_clusters[0].start = avail_start; - mem_clusters[0].size = avail_end - avail_start; - mem_cluster_cnt++; - physmem += atop(mem_clusters[0].size); -#endif - - /* - * reserve memory for real-mode call - */ - needs_earlier_install_pte0 = 0; - realmode_reserved_start = 0; - realmode_reserved_size = 0; -#if NBIOSCALL > 0 - /* save us a page for trampoline code */ - realmode_reserved_size += PAGE_SIZE; - needs_earlier_install_pte0 = 1; -#endif -#ifdef MULTIPROCESSOR /* XXX */ -#if !defined(XEN) - KASSERT(avail_start == PAGE_SIZE); /* XXX */ -#endif - if (realmode_reserved_size < MP_TRAMPOLINE) /* XXX */ - realmode_reserved_size = MP_TRAMPOLINE; /* XXX */ - needs_earlier_install_pte0 = 1; /* XXX */ -#endif /* XXX */ -#if NACPI > 0 - /* trampoline code for wake handler */ - realmode_reserved_size += ptoa(acpi_md_get_npages_of_wakecode()+1); - needs_earlier_install_pte0 = 1; -#endif - if (needs_earlier_install_pte0) { - /* page table for directory entry 0 */ - realmode_reserved_size += PAGE_SIZE; - } - if (realmode_reserved_size>0) { - realmode_reserved_start = avail_start; - avail_start += realmode_reserved_size; - } - -#ifdef DEBUG_MEMLOAD - printf("mem_cluster_count: %d\n", mem_cluster_cnt); -#endif - - /* - * Call pmap initialization to make new kernel address space. - * We must do this before loading pages into the VM system. - */ - pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE); - -#if !defined(XEN) -#if !defined(REALBASEMEM) && !defined(REALEXTMEM) - /* - * Check to see if we have a memory map from the BIOS (passed - * to us by the boot program. - */ - bim = lookup_bootinfo(BTINFO_MEMMAP); - if (bim != NULL && bim->num > 0) { -#ifdef DEBUG_MEMLOAD - printf("BIOS MEMORY MAP (%d ENTRIES):\n", bim->num); -#endif - for (x = 0; x < bim->num; x++) { -#ifdef DEBUG_MEMLOAD - printf(" addr 0x%qx size 0x%qx type 0x%x\n", - bim->entry[x].addr, - bim->entry[x].size, - bim->entry[x].type); -#endif - - /* - * If the segment is not memory, skip it. - */ - switch (bim->entry[x].type) { - case BIM_Memory: - case BIM_ACPI: - case BIM_NVS: - break; - default: - continue; - } - - /* - * Sanity check the entry. - * XXX Need to handle uint64_t in extent code - * XXX and 64-bit physical addresses in i386 - * XXX port. - */ - seg_start = bim->entry[x].addr; - seg_end = bim->entry[x].addr + bim->entry[x].size; - - /* - * Avoid Compatibility Holes. - * XXX Holes within memory space that allow access - * XXX to be directed to the PC-compatible frame buffer - * XXX (0xa0000-0xbffff),to adapter ROM space - * XXX (0xc0000-0xdffff), and to system BIOS space - * XXX (0xe0000-0xfffff). - * XXX Some laptop(for example,Toshiba Satellite2550X) - * XXX report this area and occurred problems, - * XXX so we avoid this area. - */ - if (seg_start < 0x100000 && seg_end > 0xa0000) { - printf("WARNING: memory map entry overlaps " - "with ``Compatibility Holes'': " - "0x%qx/0x%qx/0x%x\n", seg_start, - seg_end - seg_start, bim->entry[x].type); - add_mem_cluster(seg_start, 0xa0000, - bim->entry[x].type); - add_mem_cluster(0x100000, seg_end, - bim->entry[x].type); - } else - add_mem_cluster(seg_start, seg_end, - bim->entry[x].type); - } - } -#endif /* ! REALBASEMEM && ! REALEXTMEM */ - /* - * If the loop above didn't find any valid segment, fall back to - * former code. - */ - if (mem_cluster_cnt == 0) { - /* - * Allocate the physical addresses used by RAM from the iomem - * extent map. This is done before the addresses are - * page rounded just to make sure we get them all. - */ - if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem), - EX_NOWAIT)) { - /* XXX What should we do? */ - printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM " - "IOMEM EXTENT MAP!\n"); - } - mem_clusters[0].start = 0; - mem_clusters[0].size = trunc_page(KBTOB(biosbasemem)); - physmem += atop(mem_clusters[0].size); - if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem), - EX_NOWAIT)) { - /* XXX What should we do? */ - printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM " - "IOMEM EXTENT MAP!\n"); - } -#if NISADMA > 0 - /* - * Some motherboards/BIOSes remap the 384K of RAM that would - * normally be covered by the ISA hole to the end of memory - * so that it can be used. However, on a 16M system, this - * would cause bounce buffers to be allocated and used. - * This is not desirable behaviour, as more than 384K of - * bounce buffers might be allocated. As a work-around, - * we round memory down to the nearest 1M boundary if - * we're using any isadma devices and the remapped memory - * is what puts us over 16M. - */ - if (biosextmem > (15*1024) && biosextmem < (16*1024)) { - char pbuf[9]; - - format_bytes(pbuf, sizeof(pbuf), - biosextmem - (15*1024)); - printf("Warning: ignoring %s of remapped memory\n", - pbuf); - biosextmem = (15*1024); - } -#endif - mem_clusters[1].start = IOM_END; - mem_clusters[1].size = trunc_page(KBTOB(biosextmem)); - physmem += atop(mem_clusters[1].size); - - mem_cluster_cnt = 2; - - avail_end = IOM_END + trunc_page(KBTOB(biosextmem)); - } - /* - * If we have 16M of RAM or less, just put it all on - * the default free list. Otherwise, put the first - * 16M of RAM on a lower priority free list (so that - * all of the ISA DMA'able memory won't be eaten up - * first-off). - */ - if (avail_end <= (16 * 1024 * 1024)) - first16q = VM_FREELIST_DEFAULT; - else - first16q = VM_FREELIST_FIRST16; - - /* Make sure the end of the space used by the kernel is rounded. */ - first_avail = round_page(first_avail); -#endif - - XENPRINTK(("load the memory cluster %p(%d) - %p(%ld)\n", - (void *)avail_start, (int)atop(avail_start), - (void *)avail_end, (int)atop(avail_end))); - uvm_page_physload(atop(avail_start), atop(avail_end), - atop(avail_start), atop(avail_end), - VM_FREELIST_DEFAULT); - -#if !defined(XEN) - - /* - * Now, load the memory clusters (which have already been - * rounded and truncated) into the VM system. - * - * NOTE: WE ASSUME THAT MEMORY STARTS AT 0 AND THAT THE KERNEL - * IS LOADED AT IOM_END (1M). - */ - for (x = 0; x < mem_cluster_cnt; x++) { - seg_start = mem_clusters[x].start; - seg_end = mem_clusters[x].start + mem_clusters[x].size; - seg_start1 = 0; - seg_end1 = 0; - - /* - * Skip memory before our available starting point. - */ - if (seg_end <= avail_start) - continue; - - if (avail_start >= seg_start && avail_start < seg_end) { - if (seg_start != 0) - panic("init386: memory doesn't start at 0"); - seg_start = avail_start; - if (seg_start == seg_end) - continue; - } - - /* - * If this segment contains the kernel, split it - * in two, around the kernel. - */ - if (seg_start <= IOM_END && first_avail <= seg_end) { - seg_start1 = first_avail; - seg_end1 = seg_end; - seg_end = IOM_END; - } - - /* First hunk */ - if (seg_start != seg_end) { - if (seg_start < (16 * 1024 * 1024) && - first16q != VM_FREELIST_DEFAULT) { - u_int64_t tmp; - - if (seg_end > (16 * 1024 * 1024)) - tmp = (16 * 1024 * 1024); - else - tmp = seg_end; - - if (tmp != seg_start) { -#ifdef DEBUG_MEMLOAD - printf("loading 0x%qx-0x%qx " - "(0x%lx-0x%lx)\n", - seg_start, tmp, - atop(seg_start), atop(tmp)); -#endif - uvm_page_physload(atop(seg_start), - atop(tmp), atop(seg_start), - atop(tmp), first16q); - } - seg_start = tmp; - } - - if (seg_start != seg_end) { -#ifdef DEBUG_MEMLOAD - printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n", - seg_start, seg_end, - atop(seg_start), atop(seg_end)); -#endif - uvm_page_physload(atop(seg_start), - atop(seg_end), atop(seg_start), - atop(seg_end), VM_FREELIST_DEFAULT); - } - } - - /* Second hunk */ - if (seg_start1 != seg_end1) { - if (seg_start1 < (16 * 1024 * 1024) && - first16q != VM_FREELIST_DEFAULT) { - u_int64_t tmp; - - if (seg_end1 > (16 * 1024 * 1024)) - tmp = (16 * 1024 * 1024); - else - tmp = seg_end1; - - if (tmp != seg_start1) { -#ifdef DEBUG_MEMLOAD - printf("loading 0x%qx-0x%qx " - "(0x%lx-0x%lx)\n", - seg_start1, tmp, - atop(seg_start1), atop(tmp)); -#endif - uvm_page_physload(atop(seg_start1), - atop(tmp), atop(seg_start1), - atop(tmp), first16q); - } - seg_start1 = tmp; - } - - if (seg_start1 != seg_end1) { -#ifdef DEBUG_MEMLOAD - printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n", - seg_start1, seg_end1, - atop(seg_start1), atop(seg_end1)); -#endif - uvm_page_physload(atop(seg_start1), - atop(seg_end1), atop(seg_start1), - atop(seg_end1), VM_FREELIST_DEFAULT); - } - } - } -#endif - - /* - * Steal memory for the message buffer (at end of core). - */ - { - struct vm_physseg *vps; - psize_t sz = round_page(MSGBUFSIZE); - psize_t reqsz = sz; - - for (x = 0; x < vm_nphysseg; x++) { - vps = &vm_physmem[x]; - if (ptoa(vps->avail_end) == avail_end) - goto found; - } - panic("init386: can't find end of memory"); - - found: - /* Shrink so it'll fit in the last segment. */ - if ((vps->avail_end - vps->avail_start) < atop(sz)) - sz = ptoa(vps->avail_end - vps->avail_start); - - vps->avail_end -= atop(sz); - vps->end -= atop(sz); - msgbuf_paddr = ptoa(vps->avail_end); - - /* Remove the last segment if it now has no pages. */ - if (vps->start == vps->end) { - for (vm_nphysseg--; x < vm_nphysseg; x++) - vm_physmem[x] = vm_physmem[x + 1]; - } - - /* Now find where the new avail_end is. */ - for (avail_end = 0, x = 0; x < vm_nphysseg; x++) - if (vm_physmem[x].avail_end > avail_end) - avail_end = vm_physmem[x].avail_end; - avail_end = ptoa(avail_end); - - /* Warn if the message buffer had to be shrunk. */ - if (sz != reqsz) - printf("WARNING: %ld bytes not available for msgbuf " - "in last cluster (%ld used)\n", reqsz, sz); - } - - /* - * install PT page for the first 4M if needed. - */ - if (needs_earlier_install_pte0) { - paddr_t paddr; -#ifdef DIAGNOSTIC - if (realmode_reserved_size < PAGE_SIZE) { - panic("cannot steal memory for first 4M PT page."); - } -#endif - paddr=realmode_reserved_start+realmode_reserved_size-PAGE_SIZE; - pmap_enter(pmap_kernel(), (vaddr_t)vtopte(0), paddr, - VM_PROT_READ|VM_PROT_WRITE, - PMAP_WIRED|VM_PROT_READ|VM_PROT_WRITE); - pmap_update(pmap_kernel()); - /* make sure it is clean before using */ - memset(vtopte(0), 0, PAGE_SIZE); - realmode_reserved_size -= PAGE_SIZE; - } - -#if NBIOSCALL > 0 - /* - * this should be caught at kernel build time, but put it here - * in case someone tries to fake it out... - */ -#ifdef DIAGNOSTIC - if (realmode_reserved_start > BIOSTRAMP_BASE || - (realmode_reserved_start+realmode_reserved_size) < (BIOSTRAMP_BASE+ - PAGE_SIZE)) { - panic("cannot steal memory for PT page of bioscall."); - } - if (biostramp_image_size > PAGE_SIZE) - panic("biostramp_image_size too big: %x vs. %x", - biostramp_image_size, PAGE_SIZE); -#endif - pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, /* virtual */ - (paddr_t)BIOSTRAMP_BASE, /* physical */ - VM_PROT_ALL); /* protection */ - pmap_update(pmap_kernel()); - memcpy((caddr_t)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size); -#ifdef DEBUG_BIOSCALL - printf("biostramp installed @ %x\n", BIOSTRAMP_BASE); -#endif - realmode_reserved_size -= PAGE_SIZE; - realmode_reserved_start += PAGE_SIZE; -#endif - -#if NACPI > 0 - /* - * Steal memory for the acpi wake code - */ - { - paddr_t paddr, p; - psize_t sz; - int npg; - - paddr = realmode_reserved_start; - npg = acpi_md_get_npages_of_wakecode(); - sz = ptoa(npg); -#ifdef DIAGNOSTIC - if (realmode_reserved_size < sz) { - panic("cannot steal memory for ACPI wake code."); - } -#endif - - /* identical mapping */ - p = paddr; - for (x=0; x<npg; x++) { - printf("kenter: 0x%08X\n", (unsigned)p); - pmap_kenter_pa((vaddr_t)p, p, VM_PROT_ALL); - p += PAGE_SIZE; - } - pmap_update(pmap_kernel()); - - acpi_md_install_wakecode(paddr); - - realmode_reserved_size -= sz; - realmode_reserved_start += sz; - } -#endif - - pmap_enter(pmap_kernel(), idt_vaddr, idt_paddr, - VM_PROT_READ|VM_PROT_WRITE, PMAP_WIRED|VM_PROT_READ|VM_PROT_WRITE); - pmap_update(pmap_kernel()); - memset((void *)idt_vaddr, 0, PAGE_SIZE); - -#if !defined(XEN) - idt = (struct gate_descriptor *)idt_vaddr; -#ifdef I586_CPU - pmap_enter(pmap_kernel(), pentium_idt_vaddr, idt_paddr, - VM_PROT_READ, PMAP_WIRED|VM_PROT_READ); - pentium_idt = (union descriptor *)pentium_idt_vaddr; -#endif -#endif - pmap_update(pmap_kernel()); - - initgdt(); - - HYPERVISOR_set_callbacks( - GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback, - GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); - -#if !defined(XEN) - tgdt = gdt; - gdt = (union descriptor *) - ((char *)idt + NIDT * sizeof (struct gate_descriptor)); - ldt = gdt + NGDT; - - memcpy(gdt, tgdt, NGDT*sizeof(*gdt)); - - setsegment(&gdt[GLDT_SEL].sd, ldt, NLDT * sizeof(ldt[0]) - 1, - SDT_SYSLDT, SEL_KPL, 0, 0); -#else - ldt = (union descriptor *)idt_vaddr; -#endif - - /* make ldt gates and memory segments */ - setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1, - SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); - - ldt[LUCODE_SEL] = gdt[GUCODE_SEL]; - ldt[LUCODEBIG_SEL] = gdt[GUCODEBIG_SEL]; - ldt[LUDATA_SEL] = gdt[GUDATA_SEL]; - ldt[LSOL26CALLS_SEL] = ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; - -#if !defined(XEN) - /* exceptions */ - for (x = 0; x < 32; x++) { - setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386TGT, - (x == 3 || x == 4) ? SEL_UPL : SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - idt_allocmap[x] = 1; - } - - /* new-style interrupt gate for syscalls */ - setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386TGT, SEL_UPL, - GSEL(GCODE_SEL, SEL_KPL)); - idt_allocmap[128] = 1; -#ifdef COMPAT_SVR4 - setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386TGT, - SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); - idt_allocmap[0xd2] = 1; -#endif /* COMPAT_SVR4 */ -#endif - - memset(xen_idt, 0, sizeof(trap_info_t) * MAX_XEN_IDT); - xen_idt_idx = 0; - for (x = 0; x < 32; x++) { - KASSERT(xen_idt_idx < MAX_XEN_IDT); - xen_idt[xen_idt_idx].vector = x; - xen_idt[xen_idt_idx].flags = - (x == 3 || x == 4) ? SEL_UPL : SEL_XEN; - xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); - xen_idt[xen_idt_idx].address = - (uint32_t)IDTVEC(exceptions)[x]; - xen_idt_idx++; - } - KASSERT(xen_idt_idx < MAX_XEN_IDT); - xen_idt[xen_idt_idx].vector = 128; - xen_idt[xen_idt_idx].flags = SEL_UPL; - xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); - xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(syscall); - xen_idt_idx++; -#ifdef COMPAT_SVR4 - KASSERT(xen_idt_idx < MAX_XEN_IDT); - xen_idt[xen_idt_idx].vector = 0xd2; - xen_idt[xen_idt_idx].flags = SEL_UPL; - xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); - xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(svr4_fasttrap); - xen_idt_idx++; -#endif /* COMPAT_SVR4 */ - -#if !defined(XEN) - setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1); - lgdt(®ion); -#else - lldt(GSEL(GLDT_SEL, SEL_KPL)); -#endif - -#if !defined(XEN) - cpu_init_idt(); -#else - db_trap_callback = ddb_trap_hook; - - XENPRINTF(("HYPERVISOR_set_trap_table %p\n", xen_idt)); - if (HYPERVISOR_set_trap_table(xen_idt)) - panic("HYPERVISOR_set_trap_table %p failed\n", xen_idt); -#endif - -#if NKSYMS || defined(DDB) || defined(LKM) - { - extern int end; - struct btinfo_symtab *symtab; - -#ifdef DDB - db_machine_init(); -#endif - - symtab = lookup_bootinfo(BTINFO_SYMTAB); - - if (symtab) { - symtab->ssym += KERNBASE; - symtab->esym += KERNBASE; - ksyms_init(symtab->nsym, (int *)symtab->ssym, - (int *)symtab->esym); - } - else - ksyms_init(*(int *)&end, ((int *)&end) + 1, - xen_start_info.mod_start ? - (void *)xen_start_info.mod_start : - (void *)xen_start_info.mfn_list); - } -#endif -#ifdef DDB - if (boothowto & RB_KDB) - Debugger(); -#endif -#ifdef IPKDB - ipkdb_init(); - if (boothowto & RB_KDB) - ipkdb_connect(0); -#endif -#ifdef KGDB - kgdb_port_init(); - if (boothowto & RB_KDB) { - kgdb_debug_init = 1; - kgdb_connect(1); - } -#endif - -#if NMCA > 0 - /* check for MCA bus, needed to be done before ISA stuff - if - * MCA is detected, ISA needs to use level triggered interrupts - * by default */ - mca_busprobe(); -#endif - -#if defined(XEN) - events_default_setup(); -#else - intr_default_setup(); -#endif - - /* Initialize software interrupts. */ - softintr_init(); - - splraise(IPL_IPI); - enable_intr(); - - if (physmem < btoc(2 * 1024 * 1024)) { - printf("warning: too little memory available; " - "have %lu bytes, want %lu bytes\n" - "running in degraded mode\n" - "press a key to confirm\n\n", - ptoa(physmem), 2*1024*1024UL); - cngetc(); - } - -#ifdef __HAVE_CPU_MAXPROC - /* Make sure maxproc is sane */ - if (maxproc > cpu_maxproc()) - maxproc = cpu_maxproc(); -#endif -} - -#ifdef COMPAT_NOMID -static int -exec_nomid(struct proc *p, struct exec_package *epp) -{ - int error; - u_long midmag, magic; - u_short mid; - struct exec *execp = epp->ep_hdr; - - /* check on validity of epp->ep_hdr performed by exec_out_makecmds */ - - midmag = ntohl(execp->a_midmag); - mid = (midmag >> 16) & 0xffff; - magic = midmag & 0xffff; - - if (magic == 0) { - magic = (execp->a_midmag & 0xffff); - mid = MID_ZERO; - } - - midmag = mid << 16 | magic; - - switch (midmag) { - case (MID_ZERO << 16) | ZMAGIC: - /* - * 386BSD's ZMAGIC format: - */ - error = exec_aout_prep_oldzmagic(p, epp); - break; - - case (MID_ZERO << 16) | QMAGIC: - /* - * BSDI's QMAGIC format: - * same as new ZMAGIC format, but with different magic number - */ - error = exec_aout_prep_zmagic(p, epp); - break; - - case (MID_ZERO << 16) | NMAGIC: - /* - * BSDI's NMAGIC format: - * same as NMAGIC format, but with different magic number - * and with text starting at 0. - */ - error = exec_aout_prep_oldnmagic(p, epp); - break; - - case (MID_ZERO << 16) | OMAGIC: - /* - * BSDI's OMAGIC format: - * same as OMAGIC format, but with different magic number - * and with text starting at 0. - */ - error = exec_aout_prep_oldomagic(p, epp); - break; - - default: - error = ENOEXEC; - } - - return error; -} -#endif - -/* - * cpu_exec_aout_makecmds(): - * CPU-dependent a.out format hook for execve(). - * - * Determine of the given exec package refers to something which we - * understand and, if so, set up the vmcmds for it. - * - * On the i386, old (386bsd) ZMAGIC binaries and BSDI QMAGIC binaries - * if COMPAT_NOMID is given as a kernel option. - */ -int -cpu_exec_aout_makecmds(struct proc *p, struct exec_package *epp) -{ - int error = ENOEXEC; - -#ifdef COMPAT_NOMID - if ((error = exec_nomid(p, epp)) == 0) - return error; -#endif /* ! COMPAT_NOMID */ - - return error; -} - -void * -lookup_bootinfo(int type) -{ - struct btinfo_common *help; - int n = *(int*)bootinfo; - help = (struct btinfo_common *)(bootinfo + sizeof(int)); - while(n--) { - if(help->type == type) - return(help); - help = (struct btinfo_common *)((char*)help + help->len); - } - return(0); -} - -#include <dev/ic/mc146818reg.h> /* for NVRAM POST */ -#include <i386/isa/nvram.h> /* for NVRAM POST */ - -void -cpu_reset() -{ - - disable_intr(); - -#if 0 - /* - * Ensure the NVRAM reset byte contains something vaguely sane. - */ - - outb(IO_RTC, NVRAM_RESET); - outb(IO_RTC+1, NVRAM_RESET_RST); - - /* - * The keyboard controller has 4 random output pins, one of which is - * connected to the RESET pin on the CPU in many PCs. We tell the - * keyboard controller to pulse this line a couple of times. - */ - outb(IO_KBD + KBCMDP, KBC_PULSE0); - delay(100000); - outb(IO_KBD + KBCMDP, KBC_PULSE0); - delay(100000); -#endif - - HYPERVISOR_reboot(); - - for (;;); -} - -void -cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags) -{ - const struct trapframe *tf = l->l_md.md_regs; - __greg_t *gr = mcp->__gregs; - __greg_t ras_eip; - - /* Save register context. */ -#ifdef VM86 - if (tf->tf_eflags & PSL_VM) { - gr[_REG_GS] = tf->tf_vm86_gs; - gr[_REG_FS] = tf->tf_vm86_fs; - gr[_REG_ES] = tf->tf_vm86_es; - gr[_REG_DS] = tf->tf_vm86_ds; - gr[_REG_EFL] = get_vflags(l); - } else -#endif - { - gr[_REG_GS] = tf->tf_gs; - gr[_REG_FS] = tf->tf_fs; - gr[_REG_ES] = tf->tf_es; - gr[_REG_DS] = tf->tf_ds; - gr[_REG_EFL] = tf->tf_eflags; - } - gr[_REG_EDI] = tf->tf_edi; - gr[_REG_ESI] = tf->tf_esi; - gr[_REG_EBP] = tf->tf_ebp; - gr[_REG_EBX] = tf->tf_ebx; - gr[_REG_EDX] = tf->tf_edx; - gr[_REG_ECX] = tf->tf_ecx; - gr[_REG_EAX] = tf->tf_eax; - gr[_REG_EIP] = tf->tf_eip; - gr[_REG_CS] = tf->tf_cs; - gr[_REG_ESP] = tf->tf_esp; - gr[_REG_UESP] = tf->tf_esp; - gr[_REG_SS] = tf->tf_ss; - gr[_REG_TRAPNO] = tf->tf_trapno; - gr[_REG_ERR] = tf->tf_err; - - if ((ras_eip = (__greg_t)ras_lookup(l->l_proc, - (caddr_t) gr[_REG_EIP])) != -1) - gr[_REG_EIP] = ras_eip; - - *flags |= _UC_CPU; - - /* Save floating point register context, if any. */ - if ((l->l_md.md_flags & MDL_USEDFPU) != 0) { -#if NNPX > 0 - /* - * If this process is the current FP owner, dump its - * context to the PCB first. - * XXX npxsave() also clears the FPU state; depending on the - * XXX application this might be a penalty. - */ - if (l->l_addr->u_pcb.pcb_fpcpu) { - npxsave_lwp(l, 1); - } -#endif - if (i386_use_fxsave) { - memcpy(&mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm, - &l->l_addr->u_pcb.pcb_savefpu.sv_xmm, - sizeof (mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm)); - *flags |= _UC_FXSAVE; - } else { - memcpy(&mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state, - &l->l_addr->u_pcb.pcb_savefpu.sv_87, - sizeof (mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state)); - } -#if 0 - /* Apparently nothing ever touches this. */ - ucp->mcp.mc_fp.fp_emcsts = l->l_addr->u_pcb.pcb_saveemc; -#endif - *flags |= _UC_FPU; - } -} - -int -cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags) -{ - struct trapframe *tf = l->l_md.md_regs; - __greg_t *gr = mcp->__gregs; - - /* Restore register context, if any. */ - if ((flags & _UC_CPU) != 0) { -#ifdef VM86 - if (gr[_REG_EFL] & PSL_VM) { - tf->tf_vm86_gs = gr[_REG_GS]; - tf->tf_vm86_fs = gr[_REG_FS]; - tf->tf_vm86_es = gr[_REG_ES]; - tf->tf_vm86_ds = gr[_REG_DS]; - set_vflags(l, gr[_REG_EFL]); - if (flags & _UC_VM) { - void syscall_vm86(struct trapframe *); - l->l_proc->p_md.md_syscall = syscall_vm86; - } - } else -#endif - { - /* - * Check for security violations. If we're returning - * to protected mode, the CPU will validate the segment - * registers automatically and generate a trap on - * violations. We handle the trap, rather than doing - * all of the checking here. - */ - if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) || - !USERMODE(gr[_REG_CS], gr[_REG_EFL])) { - printf("cpu_setmcontext error: uc EFL: 0x%08x" - " tf EFL: 0x%08x uc CS: 0x%x\n", - gr[_REG_EFL], tf->tf_eflags, gr[_REG_CS]); - return (EINVAL); - } - tf->tf_gs = gr[_REG_GS]; - tf->tf_fs = gr[_REG_FS]; - tf->tf_es = gr[_REG_ES]; - tf->tf_ds = gr[_REG_DS]; - /* Only change the user-alterable part of eflags */ - tf->tf_eflags &= ~PSL_USER; - tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER); - } - tf->tf_edi = gr[_REG_EDI]; - tf->tf_esi = gr[_REG_ESI]; - tf->tf_ebp = gr[_REG_EBP]; - tf->tf_ebx = gr[_REG_EBX]; - tf->tf_edx = gr[_REG_EDX]; - tf->tf_ecx = gr[_REG_ECX]; - tf->tf_eax = gr[_REG_EAX]; - tf->tf_eip = gr[_REG_EIP]; - tf->tf_cs = gr[_REG_CS]; - tf->tf_esp = gr[_REG_UESP]; - tf->tf_ss = gr[_REG_SS]; - } - - /* Restore floating point register context, if any. */ - if ((flags & _UC_FPU) != 0) { -#if NNPX > 0 - /* - * If we were using the FPU, forget that we were. - */ - if (l->l_addr->u_pcb.pcb_fpcpu != NULL) - npxsave_lwp(l, 0); -#endif - if (flags & _UC_FXSAVE) { - if (i386_use_fxsave) { - memcpy( - &l->l_addr->u_pcb.pcb_savefpu.sv_xmm, - &mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm, - sizeof (&l->l_addr->u_pcb.pcb_savefpu.sv_xmm)); - } else { - /* This is a weird corner case */ - process_xmm_to_s87((struct savexmm *) - &mcp->__fpregs.__fp_reg_set.__fp_xmm_state.__fp_xmm, - &l->l_addr->u_pcb.pcb_savefpu.sv_87); - } - } else { - if (i386_use_fxsave) { - process_s87_to_xmm((struct save87 *) - &mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state, - &l->l_addr->u_pcb.pcb_savefpu.sv_xmm); - } else { - memcpy(&l->l_addr->u_pcb.pcb_savefpu.sv_87, - &mcp->__fpregs.__fp_reg_set.__fpchip_state.__fp_state, - sizeof (l->l_addr->u_pcb.pcb_savefpu.sv_87)); - } - } - /* If not set already. */ - l->l_md.md_flags |= MDL_USEDFPU; -#if 0 - /* Apparently unused. */ - l->l_addr->u_pcb.pcb_saveemc = mcp->mc_fp.fp_emcsts; -#endif - } - if (flags & _UC_SETSTACK) - l->l_proc->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK; - if (flags & _UC_CLRSTACK) - l->l_proc->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK; - return (0); -} - -void -cpu_initclocks() -{ - (*initclock_func)(); -} - -#ifdef MULTIPROCESSOR -void -need_resched(struct cpu_info *ci) -{ - - if (ci->ci_want_resched) - return; - - ci->ci_want_resched = 1; - if ((ci)->ci_curlwp != NULL) - aston((ci)->ci_curlwp->l_proc); - else if (ci != curcpu()) - x86_send_ipi(ci, 0); -} -#endif - -/* - * Allocate an IDT vector slot within the given range. - * XXX needs locking to avoid MP allocation races. - */ - -int -idt_vec_alloc(int low, int high) -{ - int vec; - - simple_lock(&idt_lock); - for (vec = low; vec <= high; vec++) { - if (idt_allocmap[vec] == 0) { - idt_allocmap[vec] = 1; - simple_unlock(&idt_lock); - return vec; - } - } - simple_unlock(&idt_lock); - return 0; -} - -void -idt_vec_set(int vec, void (*function)(void)) -{ - /* - * Vector should be allocated, so no locking needed. - */ - KASSERT(idt_allocmap[vec] == 1); - setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); -} - -void -idt_vec_free(int vec) -{ - simple_lock(&idt_lock); - unsetgate(&idt[vec]); - idt_allocmap[vec] = 0; - simple_unlock(&idt_lock); -} - -/* - * Number of processes is limited by number of available GDT slots. - */ -int -cpu_maxproc(void) -{ -#ifdef USER_LDT - return ((MAXGDTSIZ - NGDT) / 2); -#else - return (MAXGDTSIZ - NGDT); -#endif -} - -#if defined(DDB) || defined(KGDB) - -/* - * Callback to output a backtrace when entering ddb. - */ -void -ddb_trap_hook(int where) -{ - static int once = 0; - db_addr_t db_dot; - - if (once != 0 || where != 1) - return; - once = 1; - - if (curlwp != NULL) { - db_printf("Stopped"); - if (curproc == NULL) - db_printf("; curlwp = %p," - " curproc is NULL at\t", curlwp); - else - db_printf(" in pid %d.%d (%s) at\t", - curproc->p_pid, curlwp->l_lid, - curproc->p_comm); - } else - db_printf("Stopped at\t"); - db_dot = PC_REGS(DDB_REGS); - db_print_loc_and_inst(db_dot); - - db_stack_trace_print((db_expr_t) db_dot, FALSE, 65535, - "", db_printf); -#ifdef DEBUG - db_show_regs((db_expr_t) db_dot, FALSE, 65535, ""); -#endif -} - -#endif /* DDB || KGDB */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/i386/pmap.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/pmap.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,4522 +0,0 @@ -/* $NetBSD: pmap.c,v 1.1.2.1 2004/05/22 15:57:52 he Exp $ */ -/* NetBSD: pmap.c,v 1.172 2004/04/12 13:17:46 yamt Exp */ - -/* - * - * Copyright (c) 1997 Charles D. Cranor and Washington University. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Charles D. Cranor and - * Washington University. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * pmap.c: i386 pmap module rewrite - * Chuck Cranor <chuck@xxxxxxxxxxxxxx> - * 11-Aug-97 - * - * history of this pmap module: in addition to my own input, i used - * the following references for this rewrite of the i386 pmap: - * - * [1] the NetBSD i386 pmap. this pmap appears to be based on the - * BSD hp300 pmap done by Mike Hibler at University of Utah. - * it was then ported to the i386 by William Jolitz of UUNET - * Technologies, Inc. Then Charles M. Hannum of the NetBSD - * project fixed some bugs and provided some speed ups. - * - * [2] the FreeBSD i386 pmap. this pmap seems to be the - * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson - * and David Greenman. - * - * [3] the Mach pmap. this pmap, from CMU, seems to have migrated - * between several processors. the VAX version was done by - * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 - * version was done by Lance Berc, Mike Kupfer, Bob Baron, - * David Golub, and Richard Draves. the alpha version was - * done by Alessandro Forin (CMU/Mach) and Chris Demetriou - * (NetBSD/alpha). - */ - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.1.2.1 2004/05/22 15:57:52 he Exp $"); - -#include "opt_cputype.h" -#include "opt_user_ldt.h" -#include "opt_largepages.h" -#include "opt_lockdebug.h" -#include "opt_multiprocessor.h" -#include "opt_kstack_dr0.h" -#include "opt_xen.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/malloc.h> -#include <sys/pool.h> -#include <sys/user.h> -#include <sys/kernel.h> - -#include <uvm/uvm.h> - -#include <machine/atomic.h> -#include <machine/cpu.h> -#include <machine/specialreg.h> -#include <machine/gdt.h> - -#include <dev/isa/isareg.h> -#include <machine/isa_machdep.h> - -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/xenpmap.h> - -void xpmap_find_pte(paddr_t); - -/* #define XENDEBUG */ - -#ifdef XENDEBUG -#define XENPRINTF(x) printf x -#define XENPRINTK(x) printf x -#else -#define XENPRINTF(x) -#define XENPRINTK(x) -#endif -#define PRINTF(x) printf x -#define PRINTK(x) printf x - - -/* - * general info: - * - * - for an explanation of how the i386 MMU hardware works see - * the comments in <machine/pte.h>. - * - * - for an explanation of the general memory structure used by - * this pmap (including the recursive mapping), see the comments - * in <machine/pmap.h>. - * - * this file contains the code for the "pmap module." the module's - * job is to manage the hardware's virtual to physical address mappings. - * note that there are two levels of mapping in the VM system: - * - * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's - * to map ranges of virtual address space to objects/files. for - * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only - * to the file /bin/ls starting at offset zero." note that - * the upper layer mapping is not concerned with how individual - * vm_pages are mapped. - * - * [2] the lower layer of the VM system (the pmap) maintains the mappings - * from virtual addresses. it is concerned with which vm_page is - * mapped where. for example, when you run /bin/ls and start - * at page 0x1000 the fault routine may lookup the correct page - * of the /bin/ls file and then ask the pmap layer to establish - * a mapping for it. - * - * note that information in the lower layer of the VM system can be - * thrown away since it can easily be reconstructed from the info - * in the upper layer. - * - * data structures we use include: - * - * - struct pmap: describes the address space of one thread - * - struct pv_entry: describes one <PMAP,VA> mapping of a PA - * - struct pv_head: there is one pv_head per managed page of - * physical memory. the pv_head points to a list of pv_entry - * structures which describe all the <PMAP,VA> pairs that this - * page is mapped in. this is critical for page based operations - * such as pmap_page_protect() [change protection on _all_ mappings - * of a page] - * - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's. - * if we run out of pv_entry's we allocate a new pv_page and free - * its pv_entrys. - * - pmap_remove_record: a list of virtual addresses whose mappings - * have been changed. used for TLB flushing. - */ - -/* - * memory allocation - * - * - there are three data structures that we must dynamically allocate: - * - * [A] new process' page directory page (PDP) - * - plan 1: done at pmap_create() we use - * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this - * allocation. - * - * if we are low in free physical memory then we sleep in - * uvm_km_alloc -- in this case this is ok since we are creating - * a new pmap and should not be holding any locks. - * - * if the kernel is totally out of virtual space - * (i.e. uvm_km_alloc returns NULL), then we panic. - * - * XXX: the fork code currently has no way to return an "out of - * memory, try again" error code since uvm_fork [fka vm_fork] - * is a void function. - * - * [B] new page tables pages (PTP) - * - call uvm_pagealloc() - * => success: zero page, add to pm_pdir - * => failure: we are out of free vm_pages, let pmap_enter() - * tell UVM about it. - * - * note: for kernel PTPs, we start with NKPTP of them. as we map - * kernel memory (at uvm_map time) we check to see if we've grown - * the kernel pmap. if so, we call the optional function - * pmap_growkernel() to grow the kernel PTPs in advance. - * - * [C] pv_entry structures - * - plan 1: try to allocate one off the free list - * => success: done! - * => failure: no more free pv_entrys on the list - * - plan 2: try to allocate a new pv_page to add a chunk of - * pv_entrys to the free list - * [a] obtain a free, unmapped, VA in kmem_map. either - * we have one saved from a previous call, or we allocate - * one now using a "vm_map_lock_try" in uvm_map - * => success: we have an unmapped VA, continue to [b] - * => failure: unable to lock kmem_map or out of VA in it. - * move on to plan 3. - * [b] allocate a page in kmem_object for the VA - * => success: map it in, free the pv_entry's, DONE! - * => failure: kmem_object locked, no free vm_pages, etc. - * save VA for later call to [a], go to plan 3. - * If we fail, we simply let pmap_enter() tell UVM about it. - */ - -/* - * locking - * - * we have the following locks that we must contend with: - * - * "normal" locks: - * - * - pmap_main_lock - * this lock is used to prevent deadlock and/or provide mutex - * access to the pmap system. most operations lock the pmap - * structure first, then they lock the pv_lists (if needed). - * however, some operations such as pmap_page_protect lock - * the pv_lists and then lock pmaps. in order to prevent a - * cycle, we require a mutex lock when locking the pv_lists - * first. thus, the "pmap = >pv_list" lockers must gain a - * read-lock on pmap_main_lock before locking the pmap. and - * the "pv_list => pmap" lockers must gain a write-lock on - * pmap_main_lock before locking. since only one thread - * can write-lock a lock at a time, this provides mutex. - * - * "simple" locks: - * - * - pmap lock (per pmap, part of uvm_object) - * this lock protects the fields in the pmap structure including - * the non-kernel PDEs in the PDP, and the PTEs. it also locks - * in the alternate PTE space (since that is determined by the - * entry in the PDP). - * - * - pvh_lock (per pv_head) - * this lock protects the pv_entry list which is chained off the - * pv_head structure for a specific managed PA. it is locked - * when traversing the list (e.g. adding/removing mappings, - * syncing R/M bits, etc.) - * - * - pvalloc_lock - * this lock protects the data structures which are used to manage - * the free list of pv_entry structures. - * - * - pmaps_lock - * this lock protects the list of active pmaps (headed by "pmaps"). - * we lock it when adding or removing pmaps from this list. - * - */ - -/* - * locking data structures - */ - -static struct simplelock pvalloc_lock; -static struct simplelock pmaps_lock; - -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) -static struct lock pmap_main_lock; - -#define PMAP_MAP_TO_HEAD_LOCK() \ - (void) spinlockmgr(&pmap_main_lock, LK_SHARED, NULL) -#define PMAP_MAP_TO_HEAD_UNLOCK() \ - (void) spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL) - -#define PMAP_HEAD_TO_MAP_LOCK() \ - (void) spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, NULL) -#define PMAP_HEAD_TO_MAP_UNLOCK() \ - spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0) - -#else - -#define PMAP_MAP_TO_HEAD_LOCK() /* null */ -#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */ - -#define PMAP_HEAD_TO_MAP_LOCK() /* null */ -#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ - -#endif - -#define COUNT(x) /* nothing */ - -/* - * TLB Shootdown: - * - * When a mapping is changed in a pmap, the TLB entry corresponding to - * the virtual address must be invalidated on all processors. In order - * to accomplish this on systems with multiple processors, messages are - * sent from the processor which performs the mapping change to all - * processors on which the pmap is active. For other processors, the - * ASN generation numbers for that processor is invalidated, so that - * the next time the pmap is activated on that processor, a new ASN - * will be allocated (which implicitly invalidates all TLB entries). - * - * Shootdown job queue entries are allocated using a simple special- - * purpose allocator for speed. - */ -struct pmap_tlb_shootdown_job { - TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; - vaddr_t pj_va; /* virtual address */ - pmap_t pj_pmap; /* the pmap which maps the address */ - pt_entry_t pj_pte; /* the PTE bits */ - struct pmap_tlb_shootdown_job *pj_nextfree; -}; - -#define PMAP_TLB_SHOOTDOWN_JOB_ALIGN 32 -union pmap_tlb_shootdown_job_al { - struct pmap_tlb_shootdown_job pja_job; - char pja_align[PMAP_TLB_SHOOTDOWN_JOB_ALIGN]; -}; - -struct pmap_tlb_shootdown_q { - TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; - int pq_pte; /* aggregate PTE bits */ - int pq_count; /* number of pending requests */ - __cpu_simple_lock_t pq_slock; /* spin lock on queue */ - int pq_flushg; /* pending flush global */ - int pq_flushu; /* pending flush user */ -} pmap_tlb_shootdown_q[X86_MAXPROCS]; - -#define PMAP_TLB_MAXJOBS 16 - -void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *); -struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get - (struct pmap_tlb_shootdown_q *); -void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *, - struct pmap_tlb_shootdown_job *); - -__cpu_simple_lock_t pmap_tlb_shootdown_job_lock; -union pmap_tlb_shootdown_job_al *pj_page, *pj_free; - -/* - * global data structures - */ - -struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ - -/* - * nkpde is the number of kernel PTPs allocated for the kernel at - * boot time (NKPTP is a compile time override). this number can - * grow dynamically as needed (but once allocated, we never free - * kernel PTPs). - */ - -int nkpde = NKPTP; -#ifdef NKPDE -#error "obsolete NKPDE: use NKPTP" -#endif - -/* - * pmap_pg_g: if our processor supports PG_G in the PTE then we - * set pmap_pg_g to PG_G (otherwise it is zero). - */ - -int pmap_pg_g = 0; - -#ifdef LARGEPAGES -/* - * pmap_largepages: if our processor supports PG_PS and we are - * using it, this is set to TRUE. - */ - -int pmap_largepages; -#endif - -/* - * i386 physical memory comes in a big contig chunk with a small - * hole toward the front of it... the following two paddr_t's - * (shared with machdep.c) describe the physical address space - * of this machine. - */ -paddr_t avail_start; /* PA of first available physical page */ -paddr_t avail_end; /* PA of last available physical page */ - -paddr_t pmap_pa_start; /* PA of first physical page for this domain */ -paddr_t pmap_pa_end; /* PA of last physical page for this domain */ - - /* MA of last physical page of the machine */ -paddr_t pmap_mem_end = HYPERVISOR_VIRT_START; /* updated for domain-0 */ - -/* - * other data structures - */ - -static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ -static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ - -/* - * the following two vaddr_t's are used during system startup - * to keep track of how much of the kernel's VM space we have used. - * once the system is started, the management of the remaining kernel - * VM space is turned over to the kernel_map vm_map. - */ - -static vaddr_t virtual_avail; /* VA of first free KVA */ -static vaddr_t virtual_end; /* VA of last free KVA */ - - -/* - * pv_page management structures: locked by pvalloc_lock - */ - -TAILQ_HEAD(pv_pagelist, pv_page); -static struct pv_pagelist pv_freepages; /* list of pv_pages with free entrys */ -static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */ -static int pv_nfpvents; /* # of free pv entries */ -static struct pv_page *pv_initpage; /* bootstrap page from kernel_map */ -static vaddr_t pv_cachedva; /* cached VA for later use */ - -#define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */ -#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2)) - /* high water mark */ - -static __inline int -pv_compare(struct pv_entry *a, struct pv_entry *b) -{ - if (a->pv_pmap < b->pv_pmap) - return (-1); - else if (a->pv_pmap > b->pv_pmap) - return (1); - else if (a->pv_va < b->pv_va) - return (-1); - else if (a->pv_va > b->pv_va) - return (1); - else - return (0); -} - -SPLAY_PROTOTYPE(pvtree, pv_entry, pv_node, pv_compare); -SPLAY_GENERATE(pvtree, pv_entry, pv_node, pv_compare); - -/* - * linked list of all non-kernel pmaps - */ - -static struct pmap_head pmaps; - -/* - * pool that pmap structures are allocated from - */ - -struct pool pmap_pmap_pool; - -/* - * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a - * X86_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing - * due to false sharing. - */ - -#ifdef MULTIPROCESSOR -#define PTESLEW(pte, id) ((pte)+(id)*NPTECL) -#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE) -#else -#define PTESLEW(pte, id) (pte) -#define VASLEW(va,id) (va) -#endif - -/* - * special VAs and the PTEs that map them - */ -static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte; -static caddr_t csrcp, cdstp, zerop, ptpp; - -/* - * pool and cache that PDPs are allocated from - */ - -struct pool pmap_pdp_pool; -struct pool_cache pmap_pdp_cache; -u_int pmap_pdp_cache_generation; - -int pmap_pdp_ctor(void *, void *, int); -void pmap_pdp_dtor(void *, void *); - -caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */ - -extern vaddr_t msgbuf_vaddr; -extern paddr_t msgbuf_paddr; - -extern vaddr_t idt_vaddr; /* we allocate IDT early */ -extern paddr_t idt_paddr; - -#if defined(I586_CPU) -/* stuff to fix the pentium f00f bug */ -extern vaddr_t pentium_idt_vaddr; -#endif - - -/* - * local prototypes - */ - -static struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t); -static struct vm_page *pmap_alloc_ptp(struct pmap *, int); -static struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */ -#define ALLOCPV_NEED 0 /* need PV now */ -#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ -#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ -static struct pv_entry *pmap_alloc_pvpage(struct pmap *, int); -static void pmap_enter_pv(struct pv_head *, - struct pv_entry *, struct pmap *, - vaddr_t, struct vm_page *); -static void pmap_free_pv(struct pmap *, struct pv_entry *); -static void pmap_free_pvs(struct pmap *, struct pv_entry *); -static void pmap_free_pv_doit(struct pv_entry *); -static void pmap_free_pvpage(void); -static struct vm_page *pmap_get_ptp(struct pmap *, int); -static boolean_t pmap_is_curpmap(struct pmap *); -static boolean_t pmap_is_active(struct pmap *, int); -static pt_entry_t *pmap_map_ptes(struct pmap *); -static struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *, - vaddr_t); -static void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int); -static boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, - pt_entry_t *, vaddr_t, int32_t *, int); -static void pmap_remove_ptes(struct pmap *, struct vm_page *, - vaddr_t, vaddr_t, vaddr_t, int32_t *, - int); -#define PMAP_REMOVE_ALL 0 /* remove all mappings */ -#define PMAP_REMOVE_SKIPWIRED 1 /* skip wired mappings */ - -static vaddr_t pmap_tmpmap_pa(paddr_t); -static pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *); -static void pmap_tmpunmap_pa(void); -static void pmap_tmpunmap_pvepte(struct pv_entry *); -static void pmap_unmap_ptes(struct pmap *); - -static boolean_t pmap_reactivate(struct pmap *); - -#ifdef DEBUG -u_int curapdp; -#endif - -/* - * p m a p i n l i n e h e l p e r f u n c t i o n s - */ - -/* - * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? - * of course the kernel is always loaded - */ - -__inline static boolean_t -pmap_is_curpmap(pmap) - struct pmap *pmap; -{ - - return((pmap == pmap_kernel()) || - (pmap == curcpu()->ci_pmap)); -} - -/* - * pmap_is_active: is this pmap loaded into the specified processor's %cr3? - */ - -__inline static boolean_t -pmap_is_active(pmap, cpu_id) - struct pmap *pmap; - int cpu_id; -{ - - return (pmap == pmap_kernel() || - (pmap->pm_cpus & (1U << cpu_id)) != 0); -} - -/* - * pmap_tmpmap_pa: map a page in for tmp usage - */ - -__inline static vaddr_t -pmap_tmpmap_pa(pa) - paddr_t pa; -{ -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - pt_entry_t *maptp; - caddr_t ptpva = VASLEW(ptpp, id); -#if defined(DIAGNOSTIC) - if (*ptpte) - panic("pmap_tmpmap_pa: ptp_pte in use?"); -#endif - maptp = (pt_entry_t *)vtomach((vaddr_t)ptpte); - PTE_SET(ptpte, maptp, PG_V | PG_RW | pa); /* always a new mapping */ - return((vaddr_t)ptpva); -} - -/* - * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) - */ - -__inline static void -pmap_tmpunmap_pa() -{ -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - pt_entry_t *maptp; - caddr_t ptpva = VASLEW(ptpp, id); -#if defined(DIAGNOSTIC) - if (!pmap_valid_entry(*ptp_pte)) - panic("pmap_tmpunmap_pa: our pte invalid?"); -#endif - maptp = (pt_entry_t *)vtomach((vaddr_t)ptpte); - PTE_CLEAR(ptpte, maptp); /* zap! */ - pmap_update_pg((vaddr_t)ptpva); -#ifdef MULTIPROCESSOR - /* - * No need for tlb shootdown here, since ptp_pte is per-CPU. - */ -#endif -} - -/* - * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry - * - * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL] - */ - -__inline static pt_entry_t * -pmap_tmpmap_pvepte(pve) - struct pv_entry *pve; -{ -#ifdef DIAGNOSTIC - if (pve->pv_pmap == pmap_kernel()) - panic("pmap_tmpmap_pvepte: attempt to map kernel"); -#endif - - /* is it current pmap? use direct mapping... */ - if (pmap_is_curpmap(pve->pv_pmap)) - return(vtopte(pve->pv_va)); - - return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp))) - + ptei((unsigned)pve->pv_va)); -} - -/* - * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte - */ - -__inline static void -pmap_tmpunmap_pvepte(pve) - struct pv_entry *pve; -{ - /* was it current pmap? if so, return */ - if (pmap_is_curpmap(pve->pv_pmap)) - return; - - pmap_tmpunmap_pa(); -} - -__inline static void -pmap_apte_flush(struct pmap *pmap) -{ -#if defined(MULTIPROCESSOR) - struct pmap_tlb_shootdown_q *pq; - struct cpu_info *ci, *self = curcpu(); - CPU_INFO_ITERATOR cii; - int s; -#endif - - tlbflush(); /* flush TLB on current processor */ -#if defined(MULTIPROCESSOR) - /* - * Flush the APTE mapping from all other CPUs that - * are using the pmap we are using (who's APTE space - * is the one we've just modified). - * - * XXXthorpej -- find a way to defer the IPI. - */ - for (CPU_INFO_FOREACH(cii, ci)) { - if (ci == self) - continue; - if (pmap_is_active(pmap, ci->ci_cpuid)) { - pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; - s = splipi(); - __cpu_simple_lock(&pq->pq_slock); - pq->pq_flushu++; - __cpu_simple_unlock(&pq->pq_slock); - splx(s); - x86_send_ipi(ci, X86_IPI_TLB); - } - } -#endif -} - -/* - * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in - * - * => we lock enough pmaps to keep things locked in - * => must be undone with pmap_unmap_ptes before returning - */ - -__inline static pt_entry_t * -pmap_map_ptes(pmap) - struct pmap *pmap; -{ - pd_entry_t opde; - pd_entry_t *mapdp; - struct pmap *ourpmap; - struct cpu_info *ci; - - /* the kernel's pmap is always accessible */ - if (pmap == pmap_kernel()) { - return(PTE_BASE); - } - - ci = curcpu(); - if (ci->ci_want_pmapload && - vm_map_pmap(&ci->ci_curlwp->l_proc->p_vmspace->vm_map) == pmap) - pmap_load(); - - /* if curpmap then we are always mapped */ - if (pmap_is_curpmap(pmap)) { - simple_lock(&pmap->pm_obj.vmobjlock); - return(PTE_BASE); - } - - ourpmap = ci->ci_pmap; - - /* need to lock both curpmap and pmap: use ordered locking */ - if ((unsigned) pmap < (unsigned) ourpmap) { - simple_lock(&pmap->pm_obj.vmobjlock); - simple_lock(&ourpmap->pm_obj.vmobjlock); - } else { - simple_lock(&ourpmap->pm_obj.vmobjlock); - simple_lock(&pmap->pm_obj.vmobjlock); - } - - /* need to load a new alternate pt space into curpmap? */ - COUNT(apdp_pde_map); - opde = PDE_GET(APDP_PDE); - if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { - XENPRINTF(("APDP_PDE %p %p/%p set %p/%p\n", - pmap, - (void *)vtophys((vaddr_t)APDP_PDE), - (void *)xpmap_ptom(vtophys((vaddr_t)APDP_PDE)), - (void *)pmap->pm_pdirpa, - (void *)xpmap_ptom(pmap->pm_pdirpa))); - mapdp = (pt_entry_t *)vtomach((vaddr_t)APDP_PDE); - PDE_SET(APDP_PDE, mapdp, pmap->pm_pdirpa /* | PG_RW */ | PG_V); -#ifdef DEBUG - curapdp = pmap->pm_pdirpa; -#endif - if (pmap_valid_entry(opde)) - pmap_apte_flush(ourpmap); - XENPRINTF(("APDP_PDE set done\n")); - } - return(APTE_BASE); -} - -/* - * pmap_unmap_ptes: unlock the PTE mapping of "pmap" - */ - -__inline static void -pmap_unmap_ptes(pmap) - struct pmap *pmap; -{ -#if defined(MULTIPROCESSOR) - pd_entry_t *mapdp; -#endif - - if (pmap == pmap_kernel()) { - return; - } - if (pmap_is_curpmap(pmap)) { - simple_unlock(&pmap->pm_obj.vmobjlock); - } else { - struct pmap *ourpmap = curcpu()->ci_pmap; - -#if defined(MULTIPROCESSOR) - mapdp = (pt_entry_t *)vtomach((vaddr_t)APDP_PDE); - PDE_CLEAR(APDP_PDE, mapdp); - pmap_apte_flush(ourpmap); -#endif -#ifdef DEBUG - curapdp = 0; -#endif - XENPRINTF(("APDP_PDE clear %p/%p set %p/%p\n", - (void *)vtophys((vaddr_t)APDP_PDE), - (void *)xpmap_ptom(vtophys((vaddr_t)APDP_PDE)), - (void *)pmap->pm_pdirpa, - (void *)xpmap_ptom(pmap->pm_pdirpa))); - COUNT(apdp_pde_unmap); - simple_unlock(&pmap->pm_obj.vmobjlock); - simple_unlock(&ourpmap->pm_obj.vmobjlock); - } -} - -__inline static void -pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) -{ - if (curproc == NULL || curproc->p_vmspace == NULL || - pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) - return; - - if ((opte ^ npte) & PG_X) - pmap_update_pg(va); - - /* - * Executability was removed on the last executable change. - * Reset the code segment to something conservative and - * let the trap handler deal with setting the right limit. - * We can't do that because of locking constraints on the vm map. - */ - - if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) { - struct trapframe *tf = curlwp->l_md.md_regs; - struct pcb *pcb = &curlwp->l_addr->u_pcb; - - pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); - pm->pm_hiexec = I386_MAX_EXE_ADDR; - } -} - -__inline static pt_entry_t -pte_mtop(pt_entry_t pte) -{ - pt_entry_t ppte; - - KDASSERT(pmap_valid_entry(pte)); - ppte = xpmap_mtop(pte); - if ((ppte & PG_FRAME) == XPMAP_OFFSET) { - XENPRINTF(("pte_mtop: null page %08x -> %08x\n", - ppte, pte)); - ppte = pte; - } - - return ppte; -} - -__inline static pt_entry_t -pte_get_ma(pt_entry_t *pte) -{ - - return *pte; -} - -__inline static pt_entry_t -pte_get(pt_entry_t *pte) -{ - - if (pmap_valid_entry(*pte)) - return pte_mtop(*pte); - return *pte; -} - -__inline static pt_entry_t -pte_atomic_update_ma(pt_entry_t *pte, pt_entry_t *mapte, pt_entry_t npte) -{ - pt_entry_t opte; - - XENPRINTK(("pte_atomic_update_ma pte %p mapte %p npte %08x\n", - pte, mapte, npte)); - opte = PTE_GET_MA(pte); - if (opte > pmap_mem_end) { - /* must remove opte unchecked */ - if (npte > pmap_mem_end) - /* must set npte unchecked */ - xpq_queue_unchecked_pte_update(mapte, npte); - else { - /* must set npte checked */ - xpq_queue_unchecked_pte_update(mapte, 0); - xpq_queue_pte_update(mapte, npte); - } - } else { - /* must remove opte checked */ - if (npte > pmap_mem_end) { - /* must set npte unchecked */ - xpq_queue_pte_update(mapte, 0); - xpq_queue_unchecked_pte_update(mapte, npte); - } else - /* must set npte checked */ - xpq_queue_pte_update(mapte, npte); - } - xpq_flush_queue(); - - return opte; -} - -__inline static pt_entry_t -pte_atomic_update(pt_entry_t *pte, pt_entry_t *mapte, pt_entry_t npte) -{ - pt_entry_t opte; - - opte = pte_atomic_update_ma(pte, mapte, npte); - - return pte_mtop(opte); -} - -/* - * Fixup the code segment to cover all potential executable mappings. - * returns 0 if no changes to the code segment were made. - */ - -int -pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) -{ - struct vm_map_entry *ent; - struct pmap *pm = vm_map_pmap(map); - vaddr_t va = 0; - - vm_map_lock_read(map); - for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { - - /* - * This entry has greater va than the entries before. - * We need to make it point to the last page, not past it. - */ - - if (ent->protection & VM_PROT_EXECUTE) - va = trunc_page(ent->end) - PAGE_SIZE; - } - vm_map_unlock_read(map); - if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) - return (0); - - pm->pm_hiexec = va; - if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { - pcb->pcb_cs = tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); - } else { - pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); - return (0); - } - return (1); -} - -/* - * p m a p k e n t e r f u n c t i o n s - * - * functions to quickly enter/remove pages from the kernel address - * space. pmap_kremove is exported to MI kernel. we make use of - * the recursive PTE mappings. - */ - -/* - * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking - * - * => no need to lock anything, assume va is already allocated - * => should be faster than normal pmap enter function - */ - -void -pmap_kenter_pa(va, pa, prot) - vaddr_t va; - paddr_t pa; - vm_prot_t prot; -{ - pt_entry_t *pte, opte, npte; - pt_entry_t *maptp; - - if (va < VM_MIN_KERNEL_ADDRESS) - pte = vtopte(va); - else - pte = kvtopte(va); - - npte = ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) | - PG_V | pmap_pg_g; - - if (pa >= pmap_pa_start && pa < pmap_pa_end) { - npte |= xpmap_ptom(pa); - } else { - XENPRINTF(("pmap_kenter: va %08lx outside pa range %08lx\n", - va, pa)); - npte |= pa; - } - - maptp = (pt_entry_t *)vtomach((vaddr_t)pte); - opte = pte_atomic_update_ma(pte, maptp, npte); /* zap! */ - XENPRINTK(("pmap_kenter_pa(%p,%p) %p, was %08x now %08x\n", (void *)va, - (void *)pa, pte, opte, npte)); -#ifdef LARGEPAGES - /* XXX For now... */ - if (opte & PG_PS) - panic("pmap_kenter_pa: PG_PS"); -#endif - if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { -#if defined(MULTIPROCESSOR) - int32_t cpumask = 0; - - pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); - pmap_tlb_shootnow(cpumask); -#else - /* Don't bother deferring in the single CPU case. */ - pmap_update_pg(va); -#endif - } -} - -/* - * pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking - * - * => no need to lock anything, assume va is already allocated - * => should be faster than normal pmap enter function - */ - -void pmap_kenter_ma __P((vaddr_t, paddr_t, vm_prot_t)); - -void -pmap_kenter_ma(va, ma, prot) - vaddr_t va; - paddr_t ma; - vm_prot_t prot; -{ - pt_entry_t *pte, opte, npte; - pt_entry_t *maptp; - - KASSERT (va >= VM_MIN_KERNEL_ADDRESS); - pte = kvtopte(va); - - npte = ma | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) | - PG_V | pmap_pg_g; - - maptp = (pt_entry_t *)vtomach((vaddr_t)pte); - opte = pte_atomic_update_ma(pte, maptp, npte); /* zap! */ - XENPRINTK(("pmap_kenter_ma(%p,%p) %p, was %08x\n", (void *)va, - (void *)ma, pte, opte)); -#ifdef LARGEPAGES - /* XXX For now... */ - if (opte & PG_PS) - panic("pmap_kenter_ma: PG_PS"); -#endif - if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { -#if defined(MULTIPROCESSOR) - int32_t cpumask = 0; - - pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); - pmap_tlb_shootnow(cpumask); -#else - /* Don't bother deferring in the single CPU case. */ - pmap_update_pg(va); -#endif - } -} - -/* - * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking - * - * => no need to lock anything - * => caller must dispose of any vm_page mapped in the va range - * => note: not an inline function - * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE - * => we assume kernel only unmaps valid addresses and thus don't bother - * checking the valid bit before doing TLB flushing - */ - -void -pmap_kremove(va, len) - vaddr_t va; - vsize_t len; -{ - pt_entry_t *pte, opte; - pt_entry_t *maptp; - int32_t cpumask = 0; - - XENPRINTK(("pmap_kremove va %p, len %08lx\n", (void *)va, len)); - len >>= PAGE_SHIFT; - for ( /* null */ ; len ; len--, va += PAGE_SIZE) { - if (va < VM_MIN_KERNEL_ADDRESS) - pte = vtopte(va); - else - pte = kvtopte(va); - maptp = (pt_entry_t *)vtomach((vaddr_t)pte); - opte = pte_atomic_update_ma(pte, maptp, 0); /* zap! */ - XENPRINTK(("pmap_kremove pte %p, was %08x\n", pte, opte)); -#ifdef LARGEPAGES - /* XXX For now... */ - if (opte & PG_PS) - panic("pmap_kremove: PG_PS"); -#endif -#ifdef DIAGNOSTIC - if (opte & PG_PVLIST) - panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", - va); -#endif - if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) - pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); - } - pmap_tlb_shootnow(cpumask); -} - -/* - * p m a p i n i t f u n c t i o n s - * - * pmap_bootstrap and pmap_init are called during system startup - * to init the pmap module. pmap_bootstrap() does a low level - * init just to get things rolling. pmap_init() finishes the job. - */ - -/* - * pmap_bootstrap: get the system in a state where it can run with VM - * properly enabled (called before main()). the VM system is - * fully init'd later... - * - * => on i386, locore.s has already enabled the MMU by allocating - * a PDP for the kernel, and nkpde PTP's for the kernel. - * => kva_start is the first free virtual address in kernel space - */ - -void -pmap_bootstrap(kva_start) - vaddr_t kva_start; -{ - struct pmap *kpm; - vaddr_t kva; - pt_entry_t *pte; - pt_entry_t *maptp; - int i; - - /* - * set up our local static global vars that keep track of the - * usage of KVM before kernel_map is set up - */ - - virtual_avail = kva_start; /* first free KVA */ - virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */ - - /* - * find out where physical memory ends on the real hardware. - */ - - if (xen_start_info.flags & SIF_PRIVILEGED) - pmap_mem_end = find_pmap_mem_end(kva_start); - - /* - * set up protection_codes: we need to be able to convert from - * a MI protection code (some combo of VM_PROT...) to something - * we can jam into a i386 PTE. - */ - - protection_codes[VM_PROT_NONE] = 0; /* --- */ - protection_codes[VM_PROT_EXECUTE] = PG_X; /* --x */ - protection_codes[VM_PROT_READ] = PG_RO; /* -r- */ - protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO|PG_X;/* -rx */ - protection_codes[VM_PROT_WRITE] = PG_RW; /* w-- */ - protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW|PG_X;/* w-x */ - protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW; /* wr- */ - protection_codes[VM_PROT_ALL] = PG_RW|PG_X; /* wrx */ - - /* - * now we init the kernel's pmap - * - * the kernel pmap's pm_obj is not used for much. however, in - * user pmaps the pm_obj contains the list of active PTPs. - * the pm_obj currently does not have a pager. it might be possible - * to add a pager that would allow a process to read-only mmap its - * own page tables (fast user level vtophys?). this may or may not - * be useful. - */ - - kpm = pmap_kernel(); - simple_lock_init(&kpm->pm_obj.vmobjlock); - kpm->pm_obj.pgops = NULL; - TAILQ_INIT(&kpm->pm_obj.memq); - kpm->pm_obj.uo_npages = 0; - kpm->pm_obj.uo_refs = 1; - memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */ - kpm->pm_pdir = (pd_entry_t *)(lwp0.l_addr->u_pcb.pcb_cr3 + KERNBASE); - XENPRINTF(("pm_pdirpa %p PTDpaddr %p\n", - (void *)lwp0.l_addr->u_pcb.pcb_cr3, (void *)PTDpaddr)); - kpm->pm_pdirpa = (u_int32_t) lwp0.l_addr->u_pcb.pcb_cr3; - kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = - x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS); - - /* - * the above is just a rough estimate and not critical to the proper - * operation of the system. - */ - - /* - * Begin to enable global TLB entries if they are supported. - * The G bit has no effect until the CR4_PGE bit is set in CR4, - * which happens in cpu_init(), which is run on each cpu - * (and happens later) - */ - - if (cpu_feature & CPUID_PGE) { - pmap_pg_g = PG_G; /* enable software */ - - /* add PG_G attribute to already mapped kernel pages */ - for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ; - kva += PAGE_SIZE) - if (pmap_valid_entry(PTE_BASE[x86_btop(kva)])) { -#if !defined(XEN) - PTE_BASE[x86_btop(kva)] |= PG_G; -#else - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&PTE_BASE[x86_btop(kva)]); - PTE_SETBITS(&PTE_BASE[x86_btop(kva)], maptp, - PG_G); - } - PTE_UPDATES_FLUSH(); -#endif - } - -#ifdef LARGEPAGES - /* - * enable large pages if they are supported. - */ - - if (cpu_feature & CPUID_PSE) { - paddr_t pa; - vaddr_t kva_end; - pd_entry_t *pde; - pd_entry_t *mapdp; - extern char _etext; - - lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */ - pmap_largepages = 1; /* enable software */ - - /* - * the TLB must be flushed after enabling large pages - * on Pentium CPUs, according to section 3.6.2.2 of - * "Intel Architecture Software Developer's Manual, - * Volume 3: System Programming". - */ - tlbflush(); - - /* - * now, remap the kernel text using large pages. we - * assume that the linker has properly aligned the - * .data segment to a 4MB boundary. - */ - kva_end = roundup((vaddr_t)&_etext, NBPD); - for (pa = 0, kva = KERNBASE; kva < kva_end; - kva += NBPD, pa += NBPD) { - pde = &kpm->pm_pdir[pdei(kva)]; - mapdp = (pt_entry_t *)vtomach((vaddr_t)pde); - PDE_SET(pde, mapdp, pa | pmap_pg_g | PG_PS | - PG_KR | PG_V); /* zap! */ - tlbflush(); - } - } -#endif /* LARGEPAGES */ - - /* - * now we allocate the "special" VAs which are used for tmp mappings - * by the pmap (and other modules). we allocate the VAs by advancing - * virtual_avail (note that there are no pages mapped at these VAs). - * we find the PTE that maps the allocated VA via the linear PTE - * mapping. - */ - - pte = PTE_BASE + x86_btop(virtual_avail); - -#ifdef MULTIPROCESSOR - /* - * Waste some VA space to avoid false sharing of cache lines - * for page table pages: Give each possible CPU a cache line - * of PTE's (8) to play with, though we only need 4. We could - * recycle some of this waste by putting the idle stacks here - * as well; we could waste less space if we knew the largest - * CPU ID beforehand. - */ - csrcp = (caddr_t) virtual_avail; csrc_pte = pte; - - cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; - - zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; - - ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; - - virtual_avail += PAGE_SIZE * X86_MAXPROCS * NPTECL; - pte += X86_MAXPROCS * NPTECL; -#else - csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ - virtual_avail += PAGE_SIZE; pte++; /* advance */ - - cdstp = (caddr_t) virtual_avail; cdst_pte = pte; - virtual_avail += PAGE_SIZE; pte++; - - zerop = (caddr_t) virtual_avail; zero_pte = pte; - virtual_avail += PAGE_SIZE; pte++; - - ptpp = (caddr_t) virtual_avail; ptp_pte = pte; - virtual_avail += PAGE_SIZE; pte++; -#endif - - XENPRINTK(("pmap_bootstrap csrcp %p cdstp %p zerop %p ptpp %p\n", - csrc_pte, cdst_pte, zero_pte, ptp_pte)); - /* - * Nothing after this point actually needs pte; - */ - pte = (void *)0xdeadbeef; - - /* XXX: vmmap used by mem.c... should be uvm_map_reserve */ - vmmap = (char *)virtual_avail; /* don't need pte */ - virtual_avail += PAGE_SIZE; - - msgbuf_vaddr = virtual_avail; /* don't need pte */ - virtual_avail += round_page(MSGBUFSIZE); - - idt_vaddr = virtual_avail; /* don't need pte */ - virtual_avail += PAGE_SIZE; - idt_paddr = avail_start; /* steal a page */ - avail_start += PAGE_SIZE; - -#if defined(I586_CPU) - /* pentium f00f bug stuff */ - pentium_idt_vaddr = virtual_avail; /* don't need pte */ - virtual_avail += PAGE_SIZE; -#endif - - /* - * now we reserve some VM for mapping pages when doing a crash dump - */ - - virtual_avail = reserve_dumppages(virtual_avail); - - /* - * init the static-global locks and global lists. - */ - -#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) - spinlockinit(&pmap_main_lock, "pmaplk", 0); -#endif - simple_lock_init(&pvalloc_lock); - simple_lock_init(&pmaps_lock); - LIST_INIT(&pmaps); - TAILQ_INIT(&pv_freepages); - TAILQ_INIT(&pv_unusedpgs); - - /* - * initialize the pmap pool. - */ - - pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", - &pool_allocator_nointr); - - /* - * Initialize the TLB shootdown queues. - */ - - __cpu_simple_lock_init(&pmap_tlb_shootdown_job_lock); - - for (i = 0; i < X86_MAXPROCS; i++) { - TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); - __cpu_simple_lock_init(&pmap_tlb_shootdown_q[i].pq_slock); - } - - /* - * initialize the PDE pool and cache. - */ - pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, 0, "pdppl", - &pool_allocator_nointr); - pool_cache_init(&pmap_pdp_cache, &pmap_pdp_pool, - pmap_pdp_ctor, pmap_pdp_dtor, NULL); - - /* - * ensure the TLB is sync'd with reality by flushing it... - */ - - tlbflush(); -} - -/* - * pmap_init: called from uvm_init, our job is to get the pmap - * system ready to manage mappings... this mainly means initing - * the pv_entry stuff. - */ - -void -pmap_init() -{ - int i; - - /* - * now we need to free enough pv_entry structures to allow us to get - * the kmem_map/kmem_object allocated and inited (done after this - * function is finished). to do this we allocate one bootstrap page out - * of kernel_map and use it to provide an initial pool of pv_entry - * structures. we never free this page. - */ - - pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, PAGE_SIZE); - if (pv_initpage == NULL) - panic("pmap_init: pv_initpage"); - pv_cachedva = 0; /* a VA we have allocated but not used yet */ - pv_nfpvents = 0; - (void) pmap_add_pvpage(pv_initpage, FALSE); - - pj_page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE); - if (pj_page == NULL) - panic("pmap_init: pj_page"); - - for (i = 0; - i < (PAGE_SIZE / sizeof (union pmap_tlb_shootdown_job_al) - 1); - i++) - pj_page[i].pja_job.pj_nextfree = &pj_page[i + 1].pja_job; - pj_page[i].pja_job.pj_nextfree = NULL; - pj_free = &pj_page[0]; - - /* - * done: pmap module is up (and ready for business) - */ - - pmap_initialized = TRUE; -} - -/* - * p v _ e n t r y f u n c t i o n s - */ - -/* - * pv_entry allocation functions: - * the main pv_entry allocation functions are: - * pmap_alloc_pv: allocate a pv_entry structure - * pmap_free_pv: free one pv_entry - * pmap_free_pvs: free a list of pv_entrys - * - * the rest are helper functions - */ - -/* - * pmap_alloc_pv: inline function to allocate a pv_entry structure - * => we lock pvalloc_lock - * => if we fail, we call out to pmap_alloc_pvpage - * => 3 modes: - * ALLOCPV_NEED = we really need a pv_entry, even if we have to steal it - * ALLOCPV_TRY = we want a pv_entry, but not enough to steal - * ALLOCPV_NONEED = we are trying to grow our free list, don't really need - * one now - * - * "try" is for optional functions like pmap_copy(). - */ - -__inline static struct pv_entry * -pmap_alloc_pv(pmap, mode) - struct pmap *pmap; - int mode; -{ - struct pv_page *pvpage; - struct pv_entry *pv; - - simple_lock(&pvalloc_lock); - - pvpage = TAILQ_FIRST(&pv_freepages); - if (pvpage != NULL) { - pvpage->pvinfo.pvpi_nfree--; - if (pvpage->pvinfo.pvpi_nfree == 0) { - /* nothing left in this one? */ - TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list); - } - pv = pvpage->pvinfo.pvpi_pvfree; - KASSERT(pv); - pvpage->pvinfo.pvpi_pvfree = SPLAY_RIGHT(pv, pv_node); - pv_nfpvents--; /* took one from pool */ - } else { - pv = NULL; /* need more of them */ - } - - /* - * if below low water mark or we didn't get a pv_entry we try and - * create more pv_entrys ... - */ - - if (pv_nfpvents < PVE_LOWAT || pv == NULL) { - if (pv == NULL) - pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ? - mode : ALLOCPV_NEED); - else - (void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED); - } - simple_unlock(&pvalloc_lock); - return(pv); -} - -/* - * pmap_alloc_pvpage: maybe allocate a new pvpage - * - * if need_entry is false: try and allocate a new pv_page - * if need_entry is true: try and allocate a new pv_page and return a - * new pv_entry from it. if we are unable to allocate a pv_page - * we make a last ditch effort to steal a pv_page from some other - * mapping. if that fails, we panic... - * - * => we assume that the caller holds pvalloc_lock - */ - -static struct pv_entry * -pmap_alloc_pvpage(pmap, mode) - struct pmap *pmap; - int mode; -{ - struct vm_page *pg; - struct pv_page *pvpage; - struct pv_entry *pv; - int s; - - /* - * if we need_entry and we've got unused pv_pages, allocate from there - */ - - pvpage = TAILQ_FIRST(&pv_unusedpgs); - if (mode != ALLOCPV_NONEED && pvpage != NULL) { - - /* move it to pv_freepages list */ - TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list); - TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list); - - /* allocate a pv_entry */ - pvpage->pvinfo.pvpi_nfree--; /* can't go to zero */ - pv = pvpage->pvinfo.pvpi_pvfree; - KASSERT(pv); - pvpage->pvinfo.pvpi_pvfree = SPLAY_RIGHT(pv, pv_node); - pv_nfpvents--; /* took one from pool */ - return(pv); - } - - /* - * see if we've got a cached unmapped VA that we can map a page in. - * if not, try to allocate one. - */ - - if (pv_cachedva == 0) { - s = splvm(); /* must protect kmem_map with splvm! */ - pv_cachedva = uvm_km_kmemalloc(kmem_map, NULL, PAGE_SIZE, - UVM_KMF_TRYLOCK|UVM_KMF_VALLOC); - splx(s); - if (pv_cachedva == 0) { - return (NULL); - } - } - - pg = uvm_pagealloc(NULL, pv_cachedva - vm_map_min(kernel_map), NULL, - UVM_PGA_USERESERVE); - if (pg == NULL) - return (NULL); - pg->flags &= ~PG_BUSY; /* never busy */ - - /* - * add a mapping for our new pv_page and free its entrys (save one!) - * - * NOTE: If we are allocating a PV page for the kernel pmap, the - * pmap is already locked! (...but entering the mapping is safe...) - */ - - pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ | VM_PROT_WRITE); - pmap_update(pmap_kernel()); - pvpage = (struct pv_page *) pv_cachedva; - pv_cachedva = 0; - return (pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED)); -} - -/* - * pmap_add_pvpage: add a pv_page's pv_entrys to the free list - * - * => caller must hold pvalloc_lock - * => if need_entry is true, we allocate and return one pv_entry - */ - -static struct pv_entry * -pmap_add_pvpage(pvp, need_entry) - struct pv_page *pvp; - boolean_t need_entry; -{ - int tofree, lcv; - - /* do we need to return one? */ - tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE; - - pvp->pvinfo.pvpi_pvfree = NULL; - pvp->pvinfo.pvpi_nfree = tofree; - for (lcv = 0 ; lcv < tofree ; lcv++) { - SPLAY_RIGHT(&pvp->pvents[lcv], pv_node) = - pvp->pvinfo.pvpi_pvfree; - pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv]; - } - if (need_entry) - TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list); - else - TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list); - pv_nfpvents += tofree; - return((need_entry) ? &pvp->pvents[lcv] : NULL); -} - -/* - * pmap_free_pv_doit: actually free a pv_entry - * - * => do not call this directly! instead use either - * 1. pmap_free_pv ==> free a single pv_entry - * 2. pmap_free_pvs => free a list of pv_entrys - * => we must be holding pvalloc_lock - */ - -__inline static void -pmap_free_pv_doit(pv) - struct pv_entry *pv; -{ - struct pv_page *pvp; - - pvp = (struct pv_page *) x86_trunc_page(pv); - pv_nfpvents++; - pvp->pvinfo.pvpi_nfree++; - - /* nfree == 1 => fully allocated page just became partly allocated */ - if (pvp->pvinfo.pvpi_nfree == 1) { - TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list); - } - - /* free it */ - SPLAY_RIGHT(pv, pv_node) = pvp->pvinfo.pvpi_pvfree; - pvp->pvinfo.pvpi_pvfree = pv; - - /* - * are all pv_page's pv_entry's free? move it to unused queue. - */ - - if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) { - TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list); - TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list); - } -} - -/* - * pmap_free_pv: free a single pv_entry - * - * => we gain the pvalloc_lock - */ - -__inline static void -pmap_free_pv(pmap, pv) - struct pmap *pmap; - struct pv_entry *pv; -{ - simple_lock(&pvalloc_lock); - pmap_free_pv_doit(pv); - - /* - * Can't free the PV page if the PV entries were associated with - * the kernel pmap; the pmap is already locked. - */ - if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL && - pmap != pmap_kernel()) - pmap_free_pvpage(); - - simple_unlock(&pvalloc_lock); -} - -/* - * pmap_free_pvs: free a list of pv_entrys - * - * => we gain the pvalloc_lock - */ - -__inline static void -pmap_free_pvs(pmap, pvs) - struct pmap *pmap; - struct pv_entry *pvs; -{ - struct pv_entry *nextpv; - - simple_lock(&pvalloc_lock); - - for ( /* null */ ; pvs != NULL ; pvs = nextpv) { - nextpv = SPLAY_RIGHT(pvs, pv_node); - pmap_free_pv_doit(pvs); - } - - /* - * Can't free the PV page if the PV entries were associated with - * the kernel pmap; the pmap is already locked. - */ - if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL && - pmap != pmap_kernel()) - pmap_free_pvpage(); - - simple_unlock(&pvalloc_lock); -} - - -/* - * pmap_free_pvpage: try and free an unused pv_page structure - * - * => assume caller is holding the pvalloc_lock and that - * there is a page on the pv_unusedpgs list - * => if we can't get a lock on the kmem_map we try again later - */ - -static void -pmap_free_pvpage() -{ - int s; - struct vm_map *map; - struct vm_map_entry *dead_entries; - struct pv_page *pvp; - - s = splvm(); /* protect kmem_map */ - - pvp = TAILQ_FIRST(&pv_unusedpgs); - - /* - * note: watch out for pv_initpage which is allocated out of - * kernel_map rather than kmem_map. - */ - - if (pvp == pv_initpage) - map = kernel_map; - else - map = kmem_map; - if (vm_map_lock_try(map)) { - - /* remove pvp from pv_unusedpgs */ - TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list); - - /* unmap the page */ - dead_entries = NULL; - uvm_unmap_remove(map, (vaddr_t)pvp, ((vaddr_t)pvp) + PAGE_SIZE, - &dead_entries); - vm_map_unlock(map); - - if (dead_entries != NULL) - uvm_unmap_detach(dead_entries, 0); - - pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */ - } - if (pvp == pv_initpage) - /* no more initpage, we've freed it */ - pv_initpage = NULL; - - splx(s); -} - -/* - * pmap_lock_pvhs: Lock pvh1 and optional pvh2 - * Observe locking order when locking both pvhs - */ - -__inline static void -pmap_lock_pvhs(struct pv_head *pvh1, struct pv_head *pvh2) -{ - - if (pvh2 == NULL) { - simple_lock(&pvh1->pvh_lock); - return; - } - - if (pvh1 < pvh2) { - simple_lock(&pvh1->pvh_lock); - simple_lock(&pvh2->pvh_lock); - } else { - simple_lock(&pvh2->pvh_lock); - simple_lock(&pvh1->pvh_lock); - } -} - - -/* - * main pv_entry manipulation functions: - * pmap_enter_pv: enter a mapping onto a pv_head list - * pmap_remove_pv: remove a mappiing from a pv_head list - * - * NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock - * the pvh before calling - */ - -/* - * pmap_enter_pv: enter a mapping onto a pv_head lst - * - * => caller should hold the proper lock on pmap_main_lock - * => caller should have pmap locked - * => caller should have the pv_head locked - * => caller should adjust ptp's wire_count before calling - */ - -__inline static void -pmap_enter_pv(pvh, pve, pmap, va, ptp) - struct pv_head *pvh; - struct pv_entry *pve; /* preallocated pve for us to use */ - struct pmap *pmap; - vaddr_t va; - struct vm_page *ptp; /* PTP in pmap that maps this VA */ -{ - pve->pv_pmap = pmap; - pve->pv_va = va; - pve->pv_ptp = ptp; /* NULL for kernel pmap */ - SPLAY_INSERT(pvtree, &pvh->pvh_root, pve); /* add to locked list */ -} - -/* - * pmap_remove_pv: try to remove a mapping from a pv_list - * - * => caller should hold proper lock on pmap_main_lock - * => pmap should be locked - * => caller should hold lock on pv_head [so that attrs can be adjusted] - * => caller should adjust ptp's wire_count and free PTP if needed - * => we return the removed pve - */ - -__inline static struct pv_entry * -pmap_remove_pv(pvh, pmap, va) - struct pv_head *pvh; - struct pmap *pmap; - vaddr_t va; -{ - struct pv_entry tmp, *pve; - - tmp.pv_pmap = pmap; - tmp.pv_va = va; - pve = SPLAY_FIND(pvtree, &pvh->pvh_root, &tmp); - if (pve == NULL) - return (NULL); - SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve); - return(pve); /* return removed pve */ -} - -/* - * p t p f u n c t i o n s - */ - -/* - * pmap_alloc_ptp: allocate a PTP for a PMAP - * - * => pmap should already be locked by caller - * => we use the ptp's wire_count to count the number of active mappings - * in the PTP (we start it at one to prevent any chance this PTP - * will ever leak onto the active/inactive queues) - */ - -__inline static struct vm_page * -pmap_alloc_ptp(pmap, pde_index) - struct pmap *pmap; - int pde_index; -{ - struct vm_page *ptp; - pd_entry_t *mapdp; - - ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL, - UVM_PGA_USERESERVE|UVM_PGA_ZERO); - if (ptp == NULL) - return(NULL); - - /* got one! */ - ptp->flags &= ~PG_BUSY; /* never busy */ - ptp->wire_count = 1; /* no mappings yet */ - mapdp = (pt_entry_t *)vtomach((vaddr_t)&pmap->pm_pdir[pde_index]); - PDE_SET(&pmap->pm_pdir[pde_index], mapdp, - (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V)); - pmap->pm_stats.resident_count++; /* count PTP as resident */ - pmap->pm_ptphint = ptp; - return(ptp); -} - -/* - * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) - * - * => pmap should NOT be pmap_kernel() - * => pmap should be locked - */ - -static struct vm_page * -pmap_get_ptp(pmap, pde_index) - struct pmap *pmap; - int pde_index; -{ - struct vm_page *ptp; - - if (pmap_valid_entry(pmap->pm_pdir[pde_index])) { - - /* valid... check hint (saves us a PA->PG lookup) */ - if (pmap->pm_ptphint && - (PDE_GET(&pmap->pm_pdir[pde_index]) & PG_FRAME) == - VM_PAGE_TO_PHYS(pmap->pm_ptphint)) - return(pmap->pm_ptphint); - - ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index)); -#ifdef DIAGNOSTIC - if (ptp == NULL) - panic("pmap_get_ptp: unmanaged user PTP"); -#endif - pmap->pm_ptphint = ptp; - return(ptp); - } - - /* allocate a new PTP (updates ptphint) */ - return(pmap_alloc_ptp(pmap, pde_index)); -} - -/* - * p m a p l i f e c y c l e f u n c t i o n s - */ - -/* - * pmap_pdp_ctor: constructor for the PDP cache. - */ - -int -pmap_pdp_ctor(void *arg, void *object, int flags) -{ - pd_entry_t *pdir = object; - paddr_t pdirpa; - - /* - * NOTE: The `pmap_lock' is held when the PDP is allocated. - * WE MUST NOT BLOCK! - */ - - /* fetch the physical address of the page directory. */ - (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa); - - XENPRINTF(("pmap_pdp_ctor %p %p\n", pdir, (void *)pdirpa)); - - /* zero init area */ - memset(pdir, 0, PDSLOT_PTE * sizeof(pd_entry_t)); - - /* put in recursive PDE to map the PTEs */ - pdir[PDSLOT_PTE] = xpmap_ptom(pdirpa | PG_V /* | PG_KW */); - - /* put in kernel VM PDEs */ - memcpy(&pdir[PDSLOT_KERN], &PDP_BASE[PDSLOT_KERN], - nkpde * sizeof(pd_entry_t)); - - /* zero the rest */ - memset(&pdir[PDSLOT_KERN + nkpde], 0, - PAGE_SIZE - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); - - pmap_enter(pmap_kernel(), (vaddr_t)pdir, pdirpa, VM_PROT_READ, - VM_PROT_READ); - pmap_update(pmap_kernel()); - - /* pin page type */ - xpq_queue_pin_table(xpmap_ptom(pdirpa), XPQ_PIN_L2_TABLE); - xpq_flush_queue(); - - return (0); -} - -void -pmap_pdp_dtor(void *arg, void *object) -{ - pd_entry_t *pdir = object; - paddr_t pdirpa; - - /* fetch the physical address of the page directory. */ - pdirpa = PDE_GET(&pdir[PDSLOT_PTE]) & PG_FRAME; - - XENPRINTF(("pmap_pdp_dtor %p %p\n", pdir, (void *)pdirpa)); - - /* unpin page type */ - xpq_queue_unpin_table(xpmap_ptom(pdirpa)); - xpq_flush_queue(); -} - -/* - * pmap_create: create a pmap - * - * => note: old pmap interface took a "size" args which allowed for - * the creation of "software only" pmaps (not in bsd). - */ - -struct pmap * -pmap_create() -{ - struct pmap *pmap; - u_int gen; - - XENPRINTF(("pmap_create\n")); - pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); - - /* init uvm_object */ - simple_lock_init(&pmap->pm_obj.vmobjlock); - pmap->pm_obj.pgops = NULL; /* currently not a mappable object */ - TAILQ_INIT(&pmap->pm_obj.memq); - pmap->pm_obj.uo_npages = 0; - pmap->pm_obj.uo_refs = 1; - pmap->pm_stats.wired_count = 0; - pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */ - pmap->pm_ptphint = NULL; - pmap->pm_hiexec = 0; - pmap->pm_flags = 0; - pmap->pm_cpus = 0; - - /* init the LDT */ - pmap->pm_ldt = NULL; - pmap->pm_ldt_len = 0; - pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); - - /* allocate PDP */ - - /* - * we need to lock pmaps_lock to prevent nkpde from changing on - * us. note that there is no need to splvm to protect us from - * malloc since malloc allocates out of a submap and we should - * have already allocated kernel PTPs to cover the range... - * - * NOTE: WE MUST NOT BLOCK WHILE HOLDING THE `pmap_lock', nor - * must we call pmap_growkernel() while holding it! - */ - - try_again: - gen = pmap_pdp_cache_generation; - pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK); - - simple_lock(&pmaps_lock); - - if (gen != pmap_pdp_cache_generation) { - simple_unlock(&pmaps_lock); - pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir); - goto try_again; - } - - pmap->pm_pdirpa = PDE_GET(&pmap->pm_pdir[PDSLOT_PTE]) & PG_FRAME; - XENPRINTF(("pmap_create %p set pm_pdirpa %p/%p slotval %p\n", pmap, - (void *)pmap->pm_pdirpa, - (void *)xpmap_ptom(pmap->pm_pdirpa), - (void *)pmap->pm_pdir[PDSLOT_PTE])); - - LIST_INSERT_HEAD(&pmaps, pmap, pm_list); - - simple_unlock(&pmaps_lock); - - return (pmap); -} - -/* - * pmap_destroy: drop reference count on pmap. free pmap if - * reference count goes to zero. - */ - -void -pmap_destroy(pmap) - struct pmap *pmap; -{ - int refs; -#ifdef DIAGNOSTIC - struct cpu_info *ci; - CPU_INFO_ITERATOR cii; -#endif /* DIAGNOSTIC */ - - /* - * drop reference count - */ - - simple_lock(&pmap->pm_obj.vmobjlock); - refs = --pmap->pm_obj.uo_refs; - simple_unlock(&pmap->pm_obj.vmobjlock); - if (refs > 0) { - return; - } - -#ifdef DIAGNOSTIC - for (CPU_INFO_FOREACH(cii, ci)) - if (ci->ci_pmap == pmap) - panic("destroying pmap being used"); -#endif /* DIAGNOSTIC */ - - /* - * reference count is zero, free pmap resources and then free pmap. - */ - - XENPRINTF(("pmap_destroy %p pm_pdirpa %p/%p\n", pmap, - (void *)pmap->pm_pdirpa, - (void *)xpmap_ptom(pmap->pm_pdirpa))); - - /* - * remove it from global list of pmaps - */ - - simple_lock(&pmaps_lock); - LIST_REMOVE(pmap, pm_list); - simple_unlock(&pmaps_lock); - - /* - * destroyed pmap shouldn't have remaining PTPs - */ - - KASSERT(pmap->pm_obj.uo_npages == 0); - KASSERT(TAILQ_EMPTY(&pmap->pm_obj.memq)); - - /* - * MULTIPROCESSOR -- no need to flush out of other processors' - * APTE space because we do that in pmap_unmap_ptes(). - */ - pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir); - -#ifdef USER_LDT - if (pmap->pm_flags & PMF_USER_LDT) { - /* - * no need to switch the LDT; this address space is gone, - * nothing is using it. - * - * No need to lock the pmap for ldt_free (or anything else), - * we're the last one to use it. - */ - ldt_free(pmap); - uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt, - pmap->pm_ldt_len * sizeof(union descriptor)); - } -#endif - - pool_put(&pmap_pmap_pool, pmap); -} - -/* - * Add a reference to the specified pmap. - */ - -void -pmap_reference(pmap) - struct pmap *pmap; -{ - simple_lock(&pmap->pm_obj.vmobjlock); - pmap->pm_obj.uo_refs++; - simple_unlock(&pmap->pm_obj.vmobjlock); -} - -#if defined(PMAP_FORK) -/* - * pmap_fork: perform any necessary data structure manipulation when - * a VM space is forked. - */ - -void -pmap_fork(pmap1, pmap2) - struct pmap *pmap1, *pmap2; -{ - simple_lock(&pmap1->pm_obj.vmobjlock); - simple_lock(&pmap2->pm_obj.vmobjlock); - -#ifdef USER_LDT - /* Copy the LDT, if necessary. */ - if (pmap1->pm_flags & PMF_USER_LDT) { - union descriptor *new_ldt; - size_t len; - - len = pmap1->pm_ldt_len * sizeof(union descriptor); - new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len); - memcpy(new_ldt, pmap1->pm_ldt, len); - pmap2->pm_ldt = new_ldt; - pmap2->pm_ldt_len = pmap1->pm_ldt_len; - pmap2->pm_flags |= PMF_USER_LDT; - ldt_alloc(pmap2, new_ldt, len); - } -#endif /* USER_LDT */ - - simple_unlock(&pmap2->pm_obj.vmobjlock); - simple_unlock(&pmap1->pm_obj.vmobjlock); -} -#endif /* PMAP_FORK */ - -#ifdef USER_LDT -/* - * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and - * restore the default. - */ - -void -pmap_ldt_cleanup(l) - struct lwp *l; -{ - struct pcb *pcb = &l->l_addr->u_pcb; - pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap; - union descriptor *old_ldt = NULL; - size_t len = 0; - - simple_lock(&pmap->pm_obj.vmobjlock); - - if (pmap->pm_flags & PMF_USER_LDT) { - ldt_free(pmap); - pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); - pcb->pcb_ldt_sel = pmap->pm_ldt_sel; - if (pcb == curpcb) - lldt(pcb->pcb_ldt_sel); - old_ldt = pmap->pm_ldt; - len = pmap->pm_ldt_len * sizeof(union descriptor); - pmap->pm_ldt = NULL; - pmap->pm_ldt_len = 0; - pmap->pm_flags &= ~PMF_USER_LDT; - } - - simple_unlock(&pmap->pm_obj.vmobjlock); - - if (old_ldt != NULL) - uvm_km_free(kernel_map, (vaddr_t)old_ldt, len); -} -#endif /* USER_LDT */ - -/* - * pmap_activate: activate a process' pmap - * - * => called from cpu_switch() - * => if lwp is the curlwp, then set ci_want_pmapload so that - * actual MMU context switch will be done by pmap_load() later - */ - -void -pmap_activate(l) - struct lwp *l; -{ - struct cpu_info *ci = curcpu(); - struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); - - if (l == ci->ci_curlwp) { - struct pcb *pcb; - - KASSERT(ci->ci_want_pmapload == 0); - KASSERT(ci->ci_tlbstate != TLBSTATE_VALID); -#ifdef KSTACK_CHECK_DR0 - /* - * setup breakpoint on the top of stack - */ - if (l == &lwp0) - dr0(0, 0, 0, 0); - else - dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1); -#endif - - /* - * no need to switch to kernel vmspace because - * it's a subset of any vmspace. - */ - - if (pmap == pmap_kernel()) { - ci->ci_want_pmapload = 0; - return; - } - - pcb = &l->l_addr->u_pcb; - pcb->pcb_ldt_sel = pmap->pm_ldt_sel; - - ci->ci_want_pmapload = 1; - } -} - -/* - * pmap_reactivate: try to regain reference to the pmap. - */ - -static boolean_t -pmap_reactivate(struct pmap *pmap) -{ - struct cpu_info *ci = curcpu(); - u_int32_t cpumask = 1U << ci->ci_cpuid; - int s; - boolean_t result; - u_int32_t oldcpus; - - /* - * if we still have a lazy reference to this pmap, - * we can assume that there was no tlb shootdown - * for this pmap in the meantime. - */ - - s = splipi(); /* protect from tlb shootdown ipis. */ - oldcpus = pmap->pm_cpus; - x86_atomic_setbits_l(&pmap->pm_cpus, cpumask); - if (oldcpus & cpumask) { - KASSERT(ci->ci_tlbstate == TLBSTATE_LAZY); - /* got it */ - result = TRUE; - } else { - KASSERT(ci->ci_tlbstate == TLBSTATE_STALE); - result = FALSE; - } - ci->ci_tlbstate = TLBSTATE_VALID; - splx(s); - - return result; -} - -/* - * pmap_load: actually switch pmap. (fill in %cr3 and LDT info) - */ - -void -pmap_load() -{ - struct cpu_info *ci = curcpu(); - u_int32_t cpumask = 1U << ci->ci_cpuid; - struct pmap *pmap; - struct pmap *oldpmap; - struct lwp *l; - struct pcb *pcb; - pd_entry_t *mapdp; - int s; - - KASSERT(ci->ci_want_pmapload); - - l = ci->ci_curlwp; - KASSERT(l != NULL); - pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); - KASSERT(pmap != pmap_kernel()); - oldpmap = ci->ci_pmap; - - pcb = ci->ci_curpcb; - KASSERT(pcb == &l->l_addr->u_pcb); - /* loaded by pmap_activate */ - KASSERT(pcb->pcb_ldt_sel == pmap->pm_ldt_sel); - - if (pmap == oldpmap) { - if (!pmap_reactivate(pmap)) { - - /* - * pmap has been changed during deactivated. - * our tlb may be stale. - */ - - tlbflush(); - } - - ci->ci_want_pmapload = 0; - return; - } - - /* - * actually switch pmap. - */ - - x86_atomic_clearbits_l(&oldpmap->pm_cpus, cpumask); - - KASSERT((pmap->pm_cpus & cpumask) == 0); - - KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE); - pmap_reference(pmap); - KERNEL_UNLOCK(); - - /* - * mark the pmap in use by this processor. - */ - - s = splipi(); - x86_atomic_setbits_l(&pmap->pm_cpus, cpumask); - ci->ci_pmap = pmap; - ci->ci_tlbstate = TLBSTATE_VALID; - splx(s); - - /* - * clear apdp slot before loading %cr3 since Xen only allows - * linear pagetable mappings in the current pagetable. - */ - KDASSERT(curapdp == 0); - mapdp = (pt_entry_t *)vtomach((vaddr_t)APDP_PDE); - PDE_CLEAR(APDP_PDE, mapdp); - - /* - * update tss and load corresponding registers. - */ - - lldt(pcb->pcb_ldt_sel); - pcb->pcb_cr3 = pmap->pm_pdirpa; - lcr3(pcb->pcb_cr3); - - ci->ci_want_pmapload = 0; - - KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE); - pmap_destroy(oldpmap); - KERNEL_UNLOCK(); -} - -/* - * pmap_deactivate: deactivate a process' pmap - */ - -void -pmap_deactivate(l) - struct lwp *l; -{ - - if (l == curlwp) - pmap_deactivate2(l); -} - -/* - * pmap_deactivate2: context switch version of pmap_deactivate. - * always treat l as curlwp. - */ - -void -pmap_deactivate2(l) - struct lwp *l; -{ - struct pmap *pmap; - struct cpu_info *ci = curcpu(); - - if (ci->ci_want_pmapload) { - KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) - != pmap_kernel()); - KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) - != ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID); - - /* - * userspace has not been touched. - * nothing to do here. - */ - - ci->ci_want_pmapload = 0; - return; - } - - pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); - - if (pmap == pmap_kernel()) { - return; - } - - KASSERT(ci->ci_pmap == pmap); - - KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); - ci->ci_tlbstate = TLBSTATE_LAZY; - XENPRINTF(("pmap_deactivate %p ebp %p esp %p\n", - l, (void *)l->l_addr->u_pcb.pcb_ebp, - (void *)l->l_addr->u_pcb.pcb_esp)); -} - -/* - * end of lifecycle functions - */ - -/* - * some misc. functions - */ - -/* - * pmap_extract: extract a PA for the given VA - */ - -boolean_t -pmap_extract(pmap, va, pap) - struct pmap *pmap; - vaddr_t va; - paddr_t *pap; -{ - pt_entry_t *ptes, pte; - pd_entry_t pde; - - if (__predict_true((pde = PDE_GET(&pmap->pm_pdir[pdei(va)])) != 0)) { -#ifdef LARGEPAGES - if (pde & PG_PS) { - if (pap != NULL) - *pap = (pde & PG_LGFRAME) | (va & ~PG_LGFRAME); - return (TRUE); - } -#endif - - ptes = pmap_map_ptes(pmap); - pte = PTE_GET(&ptes[x86_btop(va)]); - pmap_unmap_ptes(pmap); - - if (__predict_true((pte & PG_V) != 0)) { - if (pap != NULL) - *pap = (pte & PG_FRAME) | (va & ~PG_FRAME); - return (TRUE); - } - } - return (FALSE); -} - - -/* - * vtophys: virtual address to physical address. For use by - * machine-dependent code only. - */ - -paddr_t -vtophys(va) - vaddr_t va; -{ - paddr_t pa; - - if (pmap_extract(pmap_kernel(), va, &pa) == TRUE) - return (pa); - return (0); -} - - -/* - * pmap_virtual_space: used during bootup [pmap_steal_memory] to - * determine the bounds of the kernel virtual addess space. - */ - -void -pmap_virtual_space(startp, endp) - vaddr_t *startp; - vaddr_t *endp; -{ - *startp = virtual_avail; - *endp = virtual_end; -} - -/* - * pmap_map: map a range of PAs into kvm - * - * => used during crash dump - * => XXX: pmap_map() should be phased out? - */ - -vaddr_t -pmap_map(va, spa, epa, prot) - vaddr_t va; - paddr_t spa, epa; - vm_prot_t prot; -{ - while (spa < epa) { - pmap_enter(pmap_kernel(), va, spa, prot, 0); - va += PAGE_SIZE; - spa += PAGE_SIZE; - } - pmap_update(pmap_kernel()); - return va; -} - -/* - * pmap_zero_page: zero a page - */ - -void -pmap_zero_page(pa) - paddr_t pa; -{ -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *zpte = PTESLEW(zero_pte, id); - pt_entry_t *maptp; - caddr_t zerova = VASLEW(zerop, id); - -#ifdef DIAGNOSTIC - if (PTE_GET(zpte)) - panic("pmap_zero_page: lock botch"); -#endif - - maptp = (pt_entry_t *)vtomach((vaddr_t)zpte); - PTE_SET(zpte, maptp, (pa & PG_FRAME) | PG_V | PG_RW); /* map in */ - pmap_update_pg((vaddr_t)zerova); /* flush TLB */ - - memset(zerova, 0, PAGE_SIZE); /* zero */ - PTE_CLEAR(zpte, maptp); /* zap! */ -} - -/* - * pmap_pagezeroidle: the same, for the idle loop page zero'er. - * Returns TRUE if the page was zero'd, FALSE if we aborted for - * some reason. - */ - -boolean_t -pmap_pageidlezero(pa) - paddr_t pa; -{ -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *zpte = PTESLEW(zero_pte, id); - pt_entry_t *maptp; - caddr_t zerova = VASLEW(zerop, id); - boolean_t rv = TRUE; - int i, *ptr; - -#ifdef DIAGNOSTIC - if (PTE_GET(zpte)) - panic("pmap_zero_page_uncached: lock botch"); -#endif - maptp = (pt_entry_t *)vtomach((vaddr_t)zpte); - PTE_SET(zpte, maptp, (pa & PG_FRAME) | PG_V | PG_RW); /* map in */ - pmap_update_pg((vaddr_t)zerova); /* flush TLB */ - for (i = 0, ptr = (int *) zerova; i < PAGE_SIZE / sizeof(int); i++) { - if (sched_whichqs != 0) { - - /* - * A process has become ready. Abort now, - * so we don't keep it waiting while we - * do slow memory access to finish this - * page. - */ - - rv = FALSE; - break; - } - *ptr++ = 0; - } - - PTE_CLEAR(zpte, maptp); /* zap! */ - return (rv); -} - -/* - * pmap_copy_page: copy a page - */ - -void -pmap_copy_page(srcpa, dstpa) - paddr_t srcpa, dstpa; -{ -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *spte = PTESLEW(csrc_pte,id), *maspte; - pt_entry_t *dpte = PTESLEW(cdst_pte,id), *madpte; - caddr_t csrcva = VASLEW(csrcp, id); - caddr_t cdstva = VASLEW(cdstp, id); - -#ifdef DIAGNOSTIC - if (PTE_GET(spte) || PTE_GET(dpte)) - panic("pmap_copy_page: lock botch"); -#endif - - maspte = (pt_entry_t *)vtomach((vaddr_t)spte); - madpte = (pt_entry_t *)vtomach((vaddr_t)dpte); - PTE_SET(spte, maspte, (srcpa & PG_FRAME) | PG_V | PG_RW); - PTE_SET(dpte, madpte, (dstpa & PG_FRAME) | PG_V | PG_RW); - pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); - memcpy(cdstva, csrcva, PAGE_SIZE); - PTE_CLEAR(spte, maspte); /* zap! */ - PTE_CLEAR(dpte, madpte); /* zap! */ -} - -/* - * p m a p r e m o v e f u n c t i o n s - * - * functions that remove mappings - */ - -/* - * pmap_remove_ptes: remove PTEs from a PTP - * - * => must have proper locking on pmap_master_lock - * => caller must hold pmap's lock - * => PTP must be mapped into KVA - * => PTP should be null if pmap == pmap_kernel() - */ - -static void -pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp, flags) - struct pmap *pmap; - struct vm_page *ptp; - vaddr_t ptpva; - vaddr_t startva, endva; - int32_t *cpumaskp; - int flags; -{ - struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ - struct pv_entry *pve; - pt_entry_t *pte = (pt_entry_t *) ptpva; - pt_entry_t opte; - pt_entry_t *maptp; - - /* - * note that ptpva points to the PTE that maps startva. this may - * or may not be the first PTE in the PTP. - * - * we loop through the PTP while there are still PTEs to look at - * and the wire_count is greater than 1 (because we use the wire_count - * to keep track of the number of real PTEs in the PTP). - */ - - for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1) - ; pte++, startva += PAGE_SIZE) { - struct vm_page *pg; - struct vm_page_md *mdpg; - - if (!pmap_valid_entry(*pte)) - continue; /* VA not mapped */ - if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { - continue; - } - - /* atomically save the old PTE and zap! it */ - maptp = (pt_entry_t *)vtomach((vaddr_t)pte); - opte = pte_atomic_update(pte, maptp, 0); - pmap_exec_account(pmap, startva, opte, 0); - - if (opte & PG_W) - pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - - if (opte & PG_U) - pmap_tlb_shootdown(pmap, startva, opte, cpumaskp); - - if (ptp) { - ptp->wire_count--; /* dropping a PTE */ - /* Make sure that the PDE is flushed */ - if ((ptp->wire_count <= 1) && !(opte & PG_U)) - pmap_tlb_shootdown(pmap, startva, opte, - cpumaskp); - } - - /* - * if we are not on a pv_head list we are done. - */ - - if ((opte & PG_PVLIST) == 0) { -#if defined(DIAGNOSTIC) && !defined(DOM0OPS) - if (PHYS_TO_VM_PAGE(opte & PG_FRAME) != NULL) - panic("pmap_remove_ptes: managed page without " - "PG_PVLIST for 0x%lx", startva); -#endif - continue; - } - - pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); -#ifdef DIAGNOSTIC - if (pg == NULL) - panic("pmap_remove_ptes: unmanaged page marked " - "PG_PVLIST, va = 0x%lx, pa = 0x%lx", - startva, (u_long)(opte & PG_FRAME)); -#endif - mdpg = &pg->mdpage; - - /* sync R/M bits */ - simple_lock(&mdpg->mp_pvhead.pvh_lock); - mdpg->mp_attrs |= (opte & (PG_U|PG_M)); - pve = pmap_remove_pv(&mdpg->mp_pvhead, pmap, startva); - simple_unlock(&mdpg->mp_pvhead.pvh_lock); - - if (pve) { - SPLAY_RIGHT(pve, pv_node) = pv_tofree; - pv_tofree = pve; - } - - /* end of "for" loop: time for next pte */ - } - if (pv_tofree) - pmap_free_pvs(pmap, pv_tofree); -} - - -/* - * pmap_remove_pte: remove a single PTE from a PTP - * - * => must have proper locking on pmap_master_lock - * => caller must hold pmap's lock - * => PTP must be mapped into KVA - * => PTP should be null if pmap == pmap_kernel() - * => returns true if we removed a mapping - */ - -static boolean_t -pmap_remove_pte(pmap, ptp, pte, va, cpumaskp, flags) - struct pmap *pmap; - struct vm_page *ptp; - pt_entry_t *pte; - vaddr_t va; - int32_t *cpumaskp; - int flags; -{ - pt_entry_t opte; - pt_entry_t *maptp; - struct pv_entry *pve; - struct vm_page *pg; - struct vm_page_md *mdpg; - - if (!pmap_valid_entry(*pte)) - return(FALSE); /* VA not mapped */ - if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { - return(FALSE); - } - - /* atomically save the old PTE and zap! it */ - maptp = (pt_entry_t *)vtomach((vaddr_t)pte); - opte = pte_atomic_update(pte, maptp, 0); - - XENPRINTK(("pmap_remove_pte %p, was %08x\n", pte, opte)); - pmap_exec_account(pmap, va, opte, 0); - - if (opte & PG_W) - pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - - if (opte & PG_U) - pmap_tlb_shootdown(pmap, va, opte, cpumaskp); - - if (ptp) { - ptp->wire_count--; /* dropping a PTE */ - /* Make sure that the PDE is flushed */ - if ((ptp->wire_count <= 1) && !(opte & PG_U)) - pmap_tlb_shootdown(pmap, va, opte, cpumaskp); - - } - /* - * if we are not on a pv_head list we are done. - */ - - if ((opte & PG_PVLIST) == 0) { -#if defined(DIAGNOSTIC) && !defined(DOM0OPS) - if (PHYS_TO_VM_PAGE(opte & PG_FRAME) != NULL) - panic("pmap_remove_pte: managed page without " - "PG_PVLIST for 0x%lx", va); -#endif - return(TRUE); - } - - pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); -#ifdef DIAGNOSTIC - if (pg == NULL) - panic("pmap_remove_pte: unmanaged page marked " - "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va, - (u_long)(opte & PG_FRAME)); -#endif - mdpg = &pg->mdpage; - - /* sync R/M bits */ - simple_lock(&mdpg->mp_pvhead.pvh_lock); - mdpg->mp_attrs |= (opte & (PG_U|PG_M)); - pve = pmap_remove_pv(&mdpg->mp_pvhead, pmap, va); - simple_unlock(&mdpg->mp_pvhead.pvh_lock); - - if (pve) - pmap_free_pv(pmap, pve); - return(TRUE); -} - -/* - * pmap_remove: top level mapping removal function - * - * => caller should not be holding any pmap locks - */ - -void -pmap_remove(pmap, sva, eva) - struct pmap *pmap; - vaddr_t sva, eva; -{ - pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL); -} - -/* - * pmap_do_remove: mapping removal guts - * - * => caller should not be holding any pmap locks - */ - -static void -pmap_do_remove(pmap, sva, eva, flags) - struct pmap *pmap; - vaddr_t sva, eva; - int flags; -{ - pt_entry_t *ptes, opte; - pt_entry_t *maptp; - boolean_t result; - paddr_t ptppa; - vaddr_t blkendva; - struct vm_page *ptp; - int32_t cpumask = 0; - TAILQ_HEAD(, vm_page) empty_ptps; - struct cpu_info *ci; - struct pmap *curpmap; - - /* - * we lock in the pmap => pv_head direction - */ - - TAILQ_INIT(&empty_ptps); - - PMAP_MAP_TO_HEAD_LOCK(); - - ptes = pmap_map_ptes(pmap); /* locks pmap */ - - ci = curcpu(); - curpmap = ci->ci_pmap; - - /* - * removing one page? take shortcut function. - */ - - if (sva + PAGE_SIZE == eva) { - if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) { - - /* PA of the PTP */ - ptppa = PDE_GET(&pmap->pm_pdir[pdei(sva)]) & PG_FRAME; - - /* get PTP if non-kernel mapping */ - if (pmap == pmap_kernel()) { - /* we never free kernel PTPs */ - ptp = NULL; - } else { - if (pmap->pm_ptphint && - VM_PAGE_TO_PHYS(pmap->pm_ptphint) == - ptppa) { - ptp = pmap->pm_ptphint; - } else { - ptp = PHYS_TO_VM_PAGE(ptppa); -#ifdef DIAGNOSTIC - if (ptp == NULL) - panic("pmap_remove: unmanaged " - "PTP detected"); -#endif - } - } - - /* do it! */ - result = pmap_remove_pte(pmap, ptp, - &ptes[x86_btop(sva)], sva, &cpumask, flags); - - /* - * if mapping removed and the PTP is no longer - * being used, free it! - */ - - if (result && ptp && ptp->wire_count <= 1) { - /* zap! */ - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&pmap->pm_pdir[pdei(sva)]); - PTE_ATOMIC_CLEAR(&pmap->pm_pdir[pdei(sva)], - maptp, opte); -#if defined(MULTIPROCESSOR) - /* - * XXXthorpej Redundant shootdown can happen - * here if we're using APTE space. - */ -#endif - pmap_tlb_shootdown(curpmap, - ((vaddr_t)ptes) + ptp->offset, opte, - &cpumask); -#if defined(MULTIPROCESSOR) - /* - * Always shoot down the pmap's self-mapping - * of the PTP. - * XXXthorpej Redundant shootdown can happen - * here if pmap == curpmap (not APTE space). - */ - pmap_tlb_shootdown(pmap, - ((vaddr_t)PTE_BASE) + ptp->offset, opte, - &cpumask); -#endif - pmap->pm_stats.resident_count--; - if (pmap->pm_ptphint == ptp) - pmap->pm_ptphint = - TAILQ_FIRST(&pmap->pm_obj.memq); - ptp->wire_count = 0; - ptp->flags |= PG_ZERO; - uvm_pagerealloc(ptp, NULL, 0); - TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq); - } - } - pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes(pmap); /* unlock pmap */ - PMAP_MAP_TO_HEAD_UNLOCK(); - /* Now we can free unused ptps */ - TAILQ_FOREACH(ptp, &empty_ptps, listq) - uvm_pagefree(ptp); - return; - } - - cpumask = 0; - - for (/* null */ ; sva < eva ; sva = blkendva) { - - /* determine range of block */ - blkendva = x86_round_pdr(sva+1); - if (blkendva > eva) - blkendva = eva; - - /* - * XXXCDC: our PTE mappings should never be removed - * with pmap_remove! if we allow this (and why would - * we?) then we end up freeing the pmap's page - * directory page (PDP) before we are finished using - * it when we hit in in the recursive mapping. this - * is BAD. - * - * long term solution is to move the PTEs out of user - * address space. and into kernel address space (up - * with APTE). then we can set VM_MAXUSER_ADDRESS to - * be VM_MAX_ADDRESS. - */ - - if (pdei(sva) == PDSLOT_PTE) - /* XXXCDC: ugly hack to avoid freeing PDP here */ - continue; - - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) - /* valid block? */ - continue; - - /* PA of the PTP */ - ptppa = (PDE_GET(&pmap->pm_pdir[pdei(sva)]) & PG_FRAME); - - /* get PTP if non-kernel mapping */ - if (pmap == pmap_kernel()) { - /* we never free kernel PTPs */ - ptp = NULL; - } else { - if (pmap->pm_ptphint && - VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) { - ptp = pmap->pm_ptphint; - } else { - ptp = PHYS_TO_VM_PAGE(ptppa); -#ifdef DIAGNOSTIC - if (ptp == NULL) - panic("pmap_remove: unmanaged PTP " - "detected"); -#endif - } - } - pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[x86_btop(sva)], - sva, blkendva, &cpumask, flags); - - /* if PTP is no longer being used, free it! */ - if (ptp && ptp->wire_count <= 1) { - /* zap! */ - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&pmap->pm_pdir[pdei(sva)]); - PTE_ATOMIC_CLEAR(&pmap->pm_pdir[pdei(sva)], - maptp, opte); -#if defined(MULTIPROCESSOR) - /* - * XXXthorpej Redundant shootdown can happen here - * if we're using APTE space. - */ -#endif - pmap_tlb_shootdown(curpmap, - ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); -#if defined(MULTIPROCESSOR) - /* - * Always shoot down the pmap's self-mapping - * of the PTP. - * XXXthorpej Redundant shootdown can happen here - * if pmap == curpmap (not APTE space). - */ - pmap_tlb_shootdown(pmap, - ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask); -#endif - pmap->pm_stats.resident_count--; - if (pmap->pm_ptphint == ptp) /* update hint? */ - pmap->pm_ptphint = pmap->pm_obj.memq.tqh_first; - ptp->wire_count = 0; - ptp->flags |= PG_ZERO; - /* Postpone free to shootdown */ - uvm_pagerealloc(ptp, NULL, 0); - TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq); - } - } - - pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes(pmap); - PMAP_MAP_TO_HEAD_UNLOCK(); - /* Now we can free unused ptps */ - TAILQ_FOREACH(ptp, &empty_ptps, listq) - uvm_pagefree(ptp); -} - -/* - * pmap_page_remove: remove a managed vm_page from all pmaps that map it - * - * => we set pv_head => pmap locking - * => R/M bits are sync'd back to attrs - */ - -void -pmap_page_remove(pg) - struct vm_page *pg; -{ - struct pv_head *pvh; - struct pv_entry *pve, *npve, *killlist = NULL; - pt_entry_t *ptes, opte; - pt_entry_t *maptp; - int32_t cpumask = 0; - TAILQ_HEAD(, vm_page) empty_ptps; - struct vm_page *ptp; - struct cpu_info *ci; - struct pmap *curpmap; - -#ifdef DIAGNOSTIC - int bank, off; - - bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); - if (bank == -1) - panic("pmap_page_remove: unmanaged page?"); -#endif - - pvh = &pg->mdpage.mp_pvhead; - if (SPLAY_ROOT(&pvh->pvh_root) == NULL) { - return; - } - - TAILQ_INIT(&empty_ptps); - - /* set pv_head => pmap locking */ - PMAP_HEAD_TO_MAP_LOCK(); - - ci = curcpu(); - curpmap = ci->ci_pmap; - - /* XXX: needed if we hold head->map lock? */ - simple_lock(&pvh->pvh_lock); - - for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root); pve != NULL; pve = npve) { - npve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve); - ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ - -#ifdef DIAGNOSTIC - if (pve->pv_ptp && - (PDE_GET(&pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]) & - PG_FRAME) != VM_PAGE_TO_PHYS(pve->pv_ptp)) { - printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", - pg, pve->pv_va, pve->pv_ptp); - printf("pmap_page_remove: PTP's phys addr: " - "actual=%lx, recorded=%lx\n", - (PDE_GET(&pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]) - & PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp)); - panic("pmap_page_remove: mapped managed page has " - "invalid pv_ptp field"); - } -#endif - - /* atomically save the old PTE and zap! it */ - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&ptes[x86_btop(pve->pv_va)]); - opte = pte_atomic_update(&ptes[x86_btop(pve->pv_va)], - maptp, 0); - - if (opte & PG_W) - pve->pv_pmap->pm_stats.wired_count--; - pve->pv_pmap->pm_stats.resident_count--; - - /* Shootdown only if referenced */ - if (opte & PG_U) - pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte, - &cpumask); - - /* sync R/M bits */ - pg->mdpage.mp_attrs |= (opte & (PG_U|PG_M)); - - /* update the PTP reference count. free if last reference. */ - if (pve->pv_ptp) { - pve->pv_ptp->wire_count--; - if (pve->pv_ptp->wire_count <= 1) { - /* - * Do we have to shootdown the page just to - * get the pte out of the TLB ? - */ - if(!(opte & PG_U)) - pmap_tlb_shootdown(pve->pv_pmap, - pve->pv_va, opte, &cpumask); - - /* zap! */ - maptp = (pt_entry_t *)vtomach((vaddr_t) - &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]); - PTE_ATOMIC_CLEAR(&pve->pv_pmap->pm_pdir - [pdei(pve->pv_va)], maptp, opte); - pmap_tlb_shootdown(curpmap, - ((vaddr_t)ptes) + pve->pv_ptp->offset, - opte, &cpumask); -#if defined(MULTIPROCESSOR) - /* - * Always shoot down the other pmap's - * self-mapping of the PTP. - */ - pmap_tlb_shootdown(pve->pv_pmap, - ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset, - opte, &cpumask); -#endif - pve->pv_pmap->pm_stats.resident_count--; - /* update hint? */ - if (pve->pv_pmap->pm_ptphint == pve->pv_ptp) - pve->pv_pmap->pm_ptphint = - pve->pv_pmap->pm_obj.memq.tqh_first; - pve->pv_ptp->wire_count = 0; - pve->pv_ptp->flags |= PG_ZERO; - /* Free only after the shootdown */ - uvm_pagerealloc(pve->pv_ptp, NULL, 0); - TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp, - listq); - } - } - pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ - SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve); /* remove it */ - SPLAY_RIGHT(pve, pv_node) = killlist; /* mark it for death */ - killlist = pve; - } - pmap_free_pvs(NULL, killlist); - simple_unlock(&pvh->pvh_lock); - PMAP_HEAD_TO_MAP_UNLOCK(); - pmap_tlb_shootnow(cpumask); - - /* Now we can free unused ptps */ - TAILQ_FOREACH(ptp, &empty_ptps, listq) - uvm_pagefree(ptp); -} - -/* - * p m a p a t t r i b u t e f u n c t i o n s - * functions that test/change managed page's attributes - * since a page can be mapped multiple times we must check each PTE that - * maps it by going down the pv lists. - */ - -/* - * pmap_test_attrs: test a page's attributes - * - * => we set pv_head => pmap locking - */ - -boolean_t -pmap_test_attrs(pg, testbits) - struct vm_page *pg; - int testbits; -{ - struct vm_page_md *mdpg; - int *myattrs; - struct pv_head *pvh; - struct pv_entry *pve; - volatile pt_entry_t *ptes; - pt_entry_t pte; - -#if DIAGNOSTIC - int bank, off; - - bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); - if (bank == -1) - panic("pmap_test_attrs: unmanaged page?"); -#endif - mdpg = &pg->mdpage; - - /* - * before locking: see if attributes are already set and if so, - * return! - */ - - myattrs = &mdpg->mp_attrs; - if (*myattrs & testbits) - return(TRUE); - - /* test to see if there is a list before bothering to lock */ - pvh = &mdpg->mp_pvhead; - if (SPLAY_ROOT(&pvh->pvh_root) == NULL) { - return(FALSE); - } - - /* nope, gonna have to do it the hard way */ - PMAP_HEAD_TO_MAP_LOCK(); - /* XXX: needed if we hold head->map lock? */ - simple_lock(&pvh->pvh_lock); - - for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root); - pve != NULL && (*myattrs & testbits) == 0; - pve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve)) { - ptes = pmap_map_ptes(pve->pv_pmap); - pte = PTE_GET(&ptes[x86_btop(pve->pv_va)]); /* XXX flags only? */ - pmap_unmap_ptes(pve->pv_pmap); - *myattrs |= pte; - } - - /* - * note that we will exit the for loop with a non-null pve if - * we have found the bits we are testing for. - */ - - simple_unlock(&pvh->pvh_lock); - PMAP_HEAD_TO_MAP_UNLOCK(); - return((*myattrs & testbits) != 0); -} - -/* - * pmap_clear_attrs: clear the specified attribute for a page. - * - * => we set pv_head => pmap locking - * => we return TRUE if we cleared one of the bits we were asked to - */ - -boolean_t -pmap_clear_attrs(pg, clearbits) - struct vm_page *pg; - int clearbits; -{ - struct vm_page_md *mdpg; - u_int32_t result; - struct pv_head *pvh; - struct pv_entry *pve; - pt_entry_t *ptes, opte; - pt_entry_t *maptp; - int *myattrs; - int32_t cpumask = 0; - -#ifdef DIAGNOSTIC - int bank, off; - - bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); - if (bank == -1) - panic("pmap_change_attrs: unmanaged page?"); -#endif - mdpg = &pg->mdpage; - - PMAP_HEAD_TO_MAP_LOCK(); - pvh = &mdpg->mp_pvhead; - /* XXX: needed if we hold head->map lock? */ - simple_lock(&pvh->pvh_lock); - - myattrs = &mdpg->mp_attrs; - result = *myattrs & clearbits; - *myattrs &= ~clearbits; - - SPLAY_FOREACH(pve, pvtree, &pvh->pvh_root) { -#ifdef DIAGNOSTIC - if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)])) - panic("pmap_change_attrs: mapping without PTP " - "detected"); -#endif - - ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ - opte = PTE_GET(&ptes[x86_btop(pve->pv_va)]); - if (opte & clearbits) { - /* We need to do something */ - if (clearbits == PG_RW) { - result |= PG_RW; - - /* - * On write protect we might not need to flush - * the TLB - */ - - /* First zap the RW bit! */ - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&ptes[x86_btop(pve->pv_va)]); - PTE_ATOMIC_CLEARBITS( - &ptes[x86_btop(pve->pv_va)], - maptp, PG_RW); - opte = PTE_GET(&ptes[x86_btop(pve->pv_va)]); - - /* - * Then test if it is not cached as RW the TLB - */ - if (!(opte & PG_M)) - goto no_tlb_shootdown; - } - - /* - * Since we need a shootdown me might as well - * always clear PG_U AND PG_M. - */ - - /* zap! */ - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&ptes[x86_btop(pve->pv_va)]); - PTE_ATOMIC_SET(&ptes[x86_btop(pve->pv_va)], maptp, - (opte & ~(PG_U | PG_M)), opte); - - result |= (opte & clearbits); - *myattrs |= (opte & ~(clearbits)); - - pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte, - &cpumask); - } -no_tlb_shootdown: - pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ - } - - simple_unlock(&pvh->pvh_lock); - PMAP_HEAD_TO_MAP_UNLOCK(); - - pmap_tlb_shootnow(cpumask); - return(result != 0); -} - - -/* - * p m a p p r o t e c t i o n f u n c t i o n s - */ - -/* - * pmap_page_protect: change the protection of all recorded mappings - * of a managed page - * - * => NOTE: this is an inline function in pmap.h - */ - -/* see pmap.h */ - -/* - * pmap_protect: set the protection in of the pages in a pmap - * - * => NOTE: this is an inline function in pmap.h - */ - -/* see pmap.h */ - -/* - * pmap_write_protect: write-protect pages in a pmap - */ - -void -pmap_write_protect(pmap, sva, eva, prot) - struct pmap *pmap; - vaddr_t sva, eva; - vm_prot_t prot; -{ - pt_entry_t *ptes, *epte; - pt_entry_t *maptp; -#ifndef XEN - volatile -#endif - pt_entry_t *spte; - vaddr_t blockend; - int32_t cpumask = 0; - - ptes = pmap_map_ptes(pmap); /* locks pmap */ - - /* should be ok, but just in case ... */ - sva &= PG_FRAME; - eva &= PG_FRAME; - - for (/* null */ ; sva < eva ; sva = blockend) { - - blockend = (sva & PD_MASK) + NBPD; - if (blockend > eva) - blockend = eva; - - /* - * XXXCDC: our PTE mappings should never be write-protected! - * - * long term solution is to move the PTEs out of user - * address space. and into kernel address space (up - * with APTE). then we can set VM_MAXUSER_ADDRESS to - * be VM_MAX_ADDRESS. - */ - - /* XXXCDC: ugly hack to avoid freeing PDP here */ - if (pdei(sva) == PDSLOT_PTE) - continue; - - /* empty block? */ - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) - continue; - -#ifdef DIAGNOSTIC - if (sva >= VM_MAXUSER_ADDRESS && - sva < VM_MAX_ADDRESS) - panic("pmap_write_protect: PTE space"); -#endif - - spte = &ptes[x86_btop(sva)]; - epte = &ptes[x86_btop(blockend)]; - - for (/*null */; spte < epte ; spte++) { - if ((PTE_GET(spte) & (PG_RW|PG_V)) == (PG_RW|PG_V)) { - maptp = (pt_entry_t *)vtomach((vaddr_t)spte); - PTE_ATOMIC_CLEARBITS(spte, maptp, PG_RW); - if (PTE_GET(spte) & PG_M) - pmap_tlb_shootdown(pmap, - x86_ptob(spte - ptes), - PTE_GET(spte), &cpumask); - } - } - } - - /* - * if we kept a removal record and removed some pages update the TLB - */ - - pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes(pmap); /* unlocks pmap */ -} - -/* - * end of protection functions - */ - -/* - * pmap_unwire: clear the wired bit in the PTE - * - * => mapping should already be in map - */ - -void -pmap_unwire(pmap, va) - struct pmap *pmap; - vaddr_t va; -{ - pt_entry_t *ptes; - pt_entry_t *maptp; - - if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) { - ptes = pmap_map_ptes(pmap); /* locks pmap */ - -#ifdef DIAGNOSTIC - if (!pmap_valid_entry(ptes[x86_btop(va)])) - panic("pmap_unwire: invalid (unmapped) va 0x%lx", va); -#endif - if ((ptes[x86_btop(va)] & PG_W) != 0) { - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&ptes[x86_btop(va)]); - PTE_ATOMIC_CLEARBITS(&ptes[x86_btop(va)], maptp, PG_W); - pmap->pm_stats.wired_count--; - } -#ifdef DIAGNOSTIC - else { - printf("pmap_unwire: wiring for pmap %p va 0x%lx " - "didn't change!\n", pmap, va); - } -#endif - pmap_unmap_ptes(pmap); /* unlocks map */ - } -#ifdef DIAGNOSTIC - else { - panic("pmap_unwire: invalid PDE"); - } -#endif -} - -/* - * pmap_collect: free resources held by a pmap - * - * => optional function. - * => called when a process is swapped out to free memory. - */ - -void -pmap_collect(pmap) - struct pmap *pmap; -{ - /* - * free all of the pt pages by removing the physical mappings - * for its entire address space. - */ - - pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS, - PMAP_REMOVE_SKIPWIRED); -} - -/* - * pmap_copy: copy mappings from one pmap to another - * - * => optional function - * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) - */ - -/* - * defined as macro in pmap.h - */ - -/* - * pmap_enter: enter a mapping into a pmap - * - * => must be done "now" ... no lazy-evaluation - * => we set pmap => pv_head locking - */ - -int -pmap_enter(pmap, va, pa, prot, flags) - struct pmap *pmap; - vaddr_t va; - paddr_t pa; - vm_prot_t prot; - int flags; -{ - pt_entry_t *ptes, opte, npte; - struct vm_page *ptp, *pg; - struct vm_page_md *mdpg; - struct pv_head *old_pvh, *new_pvh; - struct pv_entry *pve = NULL; /* XXX gcc */ - int error; - boolean_t wired = (flags & PMAP_WIRED) != 0; - pt_entry_t *maptp; - - XENPRINTK(("pmap_enter(%p, %p, %p, %08x, %08x)\n", - pmap, (void *)va, (void *)pa, prot, flags)); - -#ifdef DIAGNOSTIC - /* sanity check: totally out of range? */ - if (va >= VM_MAX_KERNEL_ADDRESS) - panic("pmap_enter: too big"); - - if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) - panic("pmap_enter: trying to map over PDP/APDP!"); - - /* sanity check: kernel PTPs should already have been pre-allocated */ - if (va >= VM_MIN_KERNEL_ADDRESS && - !pmap_valid_entry(pmap->pm_pdir[pdei(va)])) - panic("pmap_enter: missing kernel PTP!"); -#endif - - npte = protection_codes[prot] | PG_V; - - if (pa >= pmap_pa_start && pa < pmap_pa_end) - npte |= xpmap_ptom(pa); - else { - XENPRINTF(("pmap_enter: va %08lx outside pa range %08lx\n", - va, pa)); - npte |= pa; - } - - /* XENPRINTK(("npte %p\n", npte)); */ - - if (wired) - npte |= PG_W; - - if (va < VM_MAXUSER_ADDRESS) - npte |= PG_u; - else if (va < VM_MAX_ADDRESS) - npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ - if (pmap == pmap_kernel()) - npte |= pmap_pg_g; - - /* get lock */ - PMAP_MAP_TO_HEAD_LOCK(); - - ptes = pmap_map_ptes(pmap); /* locks pmap */ - if (pmap == pmap_kernel()) { - ptp = NULL; - } else { - ptp = pmap_get_ptp(pmap, pdei(va)); - if (ptp == NULL) { - if (flags & PMAP_CANFAIL) { - error = ENOMEM; - goto out; - } - panic("pmap_enter: get ptp failed"); - } - } - - /* - * Get first view on old PTE - * on SMP the PTE might gain PG_U and PG_M flags - * before we zap it later - */ - opte = pte_get(&ptes[x86_btop(va)]); /* old PTE */ - XENPRINTK(("npte %p opte %p ptes %p idx %03x\n", - (void *)npte, (void *)opte, ptes, x86_btop(va))); - - /* - * is there currently a valid mapping at our VA and does it - * map to the same PA as the one we want to map ? - */ - - if (pmap_valid_entry(opte) && ((opte & PG_FRAME) == pa)) { - - /* - * first, calculate pm_stats updates. resident count will not - * change since we are replacing/changing a valid mapping. - * wired count might change... - */ - pmap->pm_stats.wired_count += - ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0); - - npte |= (opte & PG_PVLIST); - - XENPRINTK(("pmap update opte == pa")); - /* zap! */ - maptp = (pt_entry_t *)vtomach((vaddr_t)&ptes[x86_btop(va)]); - opte = pte_atomic_update_ma(&ptes[x86_btop(va)], maptp, npte); - - /* - * Any change in the protection level that the CPU - * should know about ? - */ - if ((npte & PG_RW) - || ((opte & (PG_M | PG_RW)) != (PG_M | PG_RW))) { - XENPRINTK(("pmap update opte == pa, prot change")); - /* - * No need to flush the TLB. - * Just add old PG_M, ... flags in new entry. - */ - PTE_ATOMIC_SETBITS(&ptes[x86_btop(va)], maptp, - opte & (PG_M | PG_U)); - goto out_ok; - } - - /* - * Might be cached in the TLB as being writable - * if this is on the PVLIST, sync R/M bit - */ - if (opte & PG_PVLIST) { - pg = PHYS_TO_VM_PAGE(pa); -#ifdef DIAGNOSTIC - if (pg == NULL) - panic("pmap_enter: same pa PG_PVLIST " - "mapping with unmanaged page " - "pa = 0x%lx (0x%lx)", pa, - atop(pa)); -#endif - mdpg = &pg->mdpage; - old_pvh = &mdpg->mp_pvhead; - simple_lock(&old_pvh->pvh_lock); - mdpg->mp_attrs |= opte; - simple_unlock(&old_pvh->pvh_lock); - } - goto shootdown_now; - } - - pg = PHYS_TO_VM_PAGE(pa); - XENPRINTK(("pg %p from %p, init %d\n", pg, (void *)pa, - pmap_initialized)); - if (pmap_initialized && pg != NULL) { - /* This is a managed page */ - npte |= PG_PVLIST; - mdpg = &pg->mdpage; - new_pvh = &mdpg->mp_pvhead; - if ((opte & (PG_PVLIST | PG_V)) != (PG_PVLIST | PG_V)) { - /* We can not steal a pve - allocate one */ - pve = pmap_alloc_pv(pmap, ALLOCPV_NEED); - if (pve == NULL) { - if (!(flags & PMAP_CANFAIL)) - panic("pmap_enter: " - "no pv entries available"); - error = ENOMEM; - goto out; - } - } - } else { - new_pvh = NULL; - } - - /* - * is there currently a valid mapping at our VA? - */ - - if (pmap_valid_entry(opte)) { - - /* - * changing PAs: we must remove the old one first - */ - - /* - * first, calculate pm_stats updates. resident count will not - * change since we are replacing/changing a valid mapping. - * wired count might change... - */ - pmap->pm_stats.wired_count += - ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0); - - if (opte & PG_PVLIST) { - pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); -#ifdef DIAGNOSTIC - if (pg == NULL) - panic("pmap_enter: PG_PVLIST mapping with " - "unmanaged page " - "pa = 0x%lx (0x%lx)", pa, atop(pa)); -#endif - mdpg = &pg->mdpage; - old_pvh = &mdpg->mp_pvhead; - - /* new_pvh is NULL if page will not be managed */ - pmap_lock_pvhs(old_pvh, new_pvh); - - XENPRINTK(("pmap change pa")); - /* zap! */ - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&ptes[x86_btop(va)]); - opte = pte_atomic_update_ma(&ptes[x86_btop(va)], maptp, - npte); - - pve = pmap_remove_pv(old_pvh, pmap, va); - KASSERT(pve != 0); - mdpg->mp_attrs |= opte; - - if (new_pvh) { - pmap_enter_pv(new_pvh, pve, pmap, va, ptp); - simple_unlock(&new_pvh->pvh_lock); - } else - pmap_free_pv(pmap, pve); - simple_unlock(&old_pvh->pvh_lock); - - goto shootdown_test; - } - } else { /* opte not valid */ - pmap->pm_stats.resident_count++; - if (wired) - pmap->pm_stats.wired_count++; - if (ptp) - ptp->wire_count++; - } - - if (new_pvh) { - simple_lock(&new_pvh->pvh_lock); - pmap_enter_pv(new_pvh, pve, pmap, va, ptp); - simple_unlock(&new_pvh->pvh_lock); - } - - XENPRINTK(("pmap initial setup\n")); - maptp = (pt_entry_t *)vtomach((vaddr_t)&ptes[x86_btop(va)]); - opte = pte_atomic_update_ma(&ptes[x86_btop(va)], - maptp, npte); /* zap! */ - -shootdown_test: - /* Update page attributes if needed */ - if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { -#if defined(MULTIPROCESSOR) - int32_t cpumask = 0; -#endif -shootdown_now: -#if defined(MULTIPROCESSOR) - pmap_tlb_shootdown(pmap, va, opte, &cpumask); - pmap_tlb_shootnow(cpumask); -#else - /* Don't bother deferring in the single CPU case. */ - if (pmap_is_curpmap(pmap)) - pmap_update_pg(va); -#endif - } - -out_ok: - error = 0; - -out: - pmap_unmap_ptes(pmap); - PMAP_MAP_TO_HEAD_UNLOCK(); - - XENPRINTK(("pmap_enter: %d\n", error)); - return error; -} - -/* - * pmap_enter_ma: enter a mapping into a pmap - * - * => must be done "now" ... no lazy-evaluation - * => we set pmap => pv_head locking - */ - -int -pmap_enter_ma(pmap, va, pa, prot, flags) - struct pmap *pmap; - vaddr_t va; - paddr_t pa; - vm_prot_t prot; - int flags; -{ - pt_entry_t *ptes, opte, npte; - pt_entry_t *maptp; - struct vm_page *ptp, *pg; - struct vm_page_md *mdpg; - struct pv_head *old_pvh; - struct pv_entry *pve = NULL; /* XXX gcc */ - int error; - boolean_t wired = (flags & PMAP_WIRED) != 0; - - XENPRINTK(("pmap_enter_ma(%p, %p, %p, %08x, %08x)\n", - pmap, (void *)va, (void *)pa, prot, flags)); - -#ifdef DIAGNOSTIC - /* sanity check: totally out of range? */ - if (va >= VM_MAX_KERNEL_ADDRESS) - panic("pmap_enter: too big"); - - if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) - panic("pmap_enter: trying to map over PDP/APDP!"); - - /* sanity check: kernel PTPs should already have been pre-allocated */ - if (va >= VM_MIN_KERNEL_ADDRESS && - !pmap_valid_entry(pmap->pm_pdir[pdei(va)])) - panic("pmap_enter: missing kernel PTP!"); -#endif - - npte = pa | protection_codes[prot] | PG_V; - /* XENPRINTK(("npte %p\n", npte)); */ - - if (wired) - npte |= PG_W; - - if (va < VM_MAXUSER_ADDRESS) - npte |= PG_u; - else if (va < VM_MAX_ADDRESS) - npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ - if (pmap == pmap_kernel()) - npte |= pmap_pg_g; - - /* get lock */ - PMAP_MAP_TO_HEAD_LOCK(); - - ptes = pmap_map_ptes(pmap); /* locks pmap */ - if (pmap == pmap_kernel()) { - ptp = NULL; - } else { - ptp = pmap_get_ptp(pmap, pdei(va)); - if (ptp == NULL) { - if (flags & PMAP_CANFAIL) { - error = ENOMEM; - goto out; - } - panic("pmap_enter: get ptp failed"); - } - } - - /* - * Get first view on old PTE - * on SMP the PTE might gain PG_U and PG_M flags - * before we zap it later - */ - opte = pte_get_ma(&ptes[x86_btop(va)]); /* old PTE */ - XENPRINTK(("npte %p opte %p ptes %p idx %03x\n", - (void *)npte, (void *)opte, ptes, x86_btop(va))); - XENPRINTF(("pmap_enter_ma pa %08lx va %08lx opte %08x npte %08x " - "wired %d count %ld\n", pa, va, opte, npte, wired, - pmap->pm_stats.wired_count)); - - /* - * is there currently a valid mapping at our VA and does it - * map to the same MA as the one we want to map ? - */ - - if (pmap_valid_entry(opte) && ((opte & PG_FRAME) == pa)) { - - /* - * first, calculate pm_stats updates. resident count will not - * change since we are replacing/changing a valid mapping. - * wired count might change... - */ - pmap->pm_stats.wired_count += - ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0); - - XENPRINTK(("pmap update opte == pa")); - /* zap! */ - maptp = (pt_entry_t *)vtomach((vaddr_t)&ptes[x86_btop(va)]); - opte = pte_atomic_update_ma(&ptes[x86_btop(va)], maptp, npte); - - /* - * Any change in the protection level that the CPU - * should know about ? - */ - if ((npte & PG_RW) - || ((opte & (PG_M | PG_RW)) != (PG_M | PG_RW))) { - XENPRINTK(("pmap update opte == pa, prot change")); - /* - * No need to flush the TLB. - * Just add old PG_M, ... flags in new entry. - */ - PTE_ATOMIC_SETBITS(&ptes[x86_btop(va)], maptp, - opte & (PG_M | PG_U)); - goto out_ok; - } - - /* - * Might be cached in the TLB as being writable - * if this is on the PVLIST, sync R/M bit - */ - KDASSERT((opte & PG_PVLIST) == 0); - goto shootdown_now; - } - - /* - * no managed mapping for pages mapped through pmap_enter_ma. - */ - - /* - * is there currently a valid mapping at our VA? - */ - - if (pmap_valid_entry(opte)) { - - /* - * changing PAs: we must remove the old one first - */ - - /* - * first, calculate pm_stats updates. resident count will not - * change since we are replacing/changing a valid mapping. - * wired count might change... - */ - pmap->pm_stats.wired_count += - ((npte & PG_W) ? 1 : 0 - (opte & PG_W) ? 1 : 0); - - if (opte & PG_PVLIST) { - opte = xpmap_mtop(opte); - KDASSERT((opte & PG_FRAME) != - (KERNTEXTOFF - KERNBASE_LOCORE)); - - pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); -#ifdef DIAGNOSTIC - if (pg == NULL) - panic("pmap_enter: PG_PVLIST mapping with " - "unmanaged page " - "pa = 0x%lx (0x%lx)", pa, atop(pa)); -#endif - mdpg = &pg->mdpage; - old_pvh = &mdpg->mp_pvhead; - - /* NULL new_pvh since page will not be managed */ - pmap_lock_pvhs(old_pvh, NULL); - - XENPRINTK(("pmap change pa")); - /* zap! */ - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&ptes[x86_btop(va)]); - opte = pte_atomic_update_ma(&ptes[x86_btop(va)], maptp, - npte); - - pve = pmap_remove_pv(old_pvh, pmap, va); - KASSERT(pve != 0); - mdpg->mp_attrs |= opte; - - pmap_free_pv(pmap, pve); - simple_unlock(&old_pvh->pvh_lock); - - goto shootdown_test; - } - } else { /* opte not valid */ - pmap->pm_stats.resident_count++; - if (wired) - pmap->pm_stats.wired_count++; - if (ptp) - ptp->wire_count++; - } - - XENPRINTK(("pmap initial setup")); - maptp = (pt_entry_t *)vtomach((vaddr_t)&ptes[x86_btop(va)]); - opte = pte_atomic_update_ma(&ptes[x86_btop(va)], - maptp, npte); /* zap! */ - -shootdown_test: - /* Update page attributes if needed */ - if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { -#if defined(MULTIPROCESSOR) - int32_t cpumask = 0; -#endif -shootdown_now: -#if defined(MULTIPROCESSOR) - pmap_tlb_shootdown(pmap, va, opte, &cpumask); - pmap_tlb_shootnow(cpumask); -#else - /* Don't bother deferring in the single CPU case. */ - if (pmap_is_curpmap(pmap)) - pmap_update_pg(va); -#endif - } - -out_ok: - error = 0; - -out: - pmap_unmap_ptes(pmap); - PMAP_MAP_TO_HEAD_UNLOCK(); - - XENPRINTK(("pmap_enter: %d\n", error)); - return error; -} - -/* - * pmap_growkernel: increase usage of KVM space - * - * => we allocate new PTPs for the kernel and install them in all - * the pmaps on the system. - */ - -vaddr_t -pmap_growkernel(maxkvaddr) - vaddr_t maxkvaddr; -{ - struct pmap *kpm = pmap_kernel(), *pm; - pd_entry_t *mapdp; - pt_entry_t *maptp; - int needed_kpde; /* needed number of kernel PTPs */ - int s; - paddr_t ptaddr; - - needed_kpde = (u_int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1)) - / NBPD; - XENPRINTF(("pmap_growkernel %p: %d -> %d\n", (void *)maxkvaddr, - nkpde, needed_kpde)); - if (needed_kpde <= nkpde) - goto out; /* we are OK */ - - /* - * whoops! we need to add kernel PTPs - */ - - s = splhigh(); /* to be safe */ - simple_lock(&kpm->pm_obj.vmobjlock); - - for (/*null*/ ; nkpde < needed_kpde ; nkpde++) { - - mapdp = (pt_entry_t *)vtomach((vaddr_t)&kpm->pm_pdir[PDSLOT_KERN + nkpde]); - if (uvm.page_init_done == FALSE) { - - /* - * we're growing the kernel pmap early (from - * uvm_pageboot_alloc()). this case must be - * handled a little differently. - */ - - if (uvm_page_physget(&ptaddr) == FALSE) - panic("pmap_growkernel: out of memory"); - pmap_zero_page(ptaddr); - - XENPRINTF(("xxxx maybe not PG_RW\n")); - PDE_SET(&kpm->pm_pdir[PDSLOT_KERN + nkpde], mapdp, ptaddr | PG_RW | PG_V); - - /* count PTP as resident */ - kpm->pm_stats.resident_count++; - continue; - } - - /* - * THIS *MUST* BE CODED SO AS TO WORK IN THE - * pmap_initialized == FALSE CASE! WE MAY BE - * INVOKED WHILE pmap_init() IS RUNNING! - */ - - if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde) == NULL) { - panic("pmap_growkernel: alloc ptp failed"); - } - - /* PG_u not for kernel */ - PDE_CLEARBITS(&kpm->pm_pdir[PDSLOT_KERN + nkpde], mapdp, PG_u); - - /* distribute new kernel PTP to all active pmaps */ - simple_lock(&pmaps_lock); - for (pm = pmaps.lh_first; pm != NULL; - pm = pm->pm_list.le_next) { - XENPRINTF(("update\n")); - maptp = (pt_entry_t *)vtomach( - (vaddr_t)&pm->pm_pdir[PDSLOT_KERN + nkpde]); - PDE_COPY(&pm->pm_pdir[PDSLOT_KERN + nkpde], maptp, - &kpm->pm_pdir[PDSLOT_KERN + nkpde]); - } - - /* Invalidate the PDP cache. */ - pool_cache_invalidate(&pmap_pdp_cache); - pmap_pdp_cache_generation++; - - simple_unlock(&pmaps_lock); - } - - simple_unlock(&kpm->pm_obj.vmobjlock); - splx(s); - -out: - XENPRINTF(("pmap_growkernel return %d %p\n", nkpde, - (void *)(VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD)))); - return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD)); -} - -#ifdef DEBUG -void pmap_dump(struct pmap *, vaddr_t, vaddr_t); - -/* - * pmap_dump: dump all the mappings from a pmap - * - * => caller should not be holding any pmap locks - */ - -void -pmap_dump(pmap, sva, eva) - struct pmap *pmap; - vaddr_t sva, eva; -{ - pt_entry_t *ptes, *pte; - vaddr_t blkendva; - - /* - * if end is out of range truncate. - * if (end == start) update to max. - */ - - if (eva > VM_MAXUSER_ADDRESS || eva <= sva) - eva = VM_MAXUSER_ADDRESS; - - /* - * we lock in the pmap => pv_head direction - */ - - PMAP_MAP_TO_HEAD_LOCK(); - ptes = pmap_map_ptes(pmap); /* locks pmap */ - - /* - * dumping a range of pages: we dump in PTP sized blocks (4MB) - */ - - for (/* null */ ; sva < eva ; sva = blkendva) { - - /* determine range of block */ - blkendva = x86_round_pdr(sva+1); - if (blkendva > eva) - blkendva = eva; - - /* valid block? */ - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) - continue; - - pte = &ptes[x86_btop(sva)]; - for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { - if (!pmap_valid_entry(*pte)) - continue; - XENPRINTF(("va %#lx -> pa %#lx (pte=%#lx)\n", - sva, PTE_GET(pte), PTE_GET(pte) & PG_FRAME)); - } - } - pmap_unmap_ptes(pmap); - PMAP_MAP_TO_HEAD_UNLOCK(); -} -#endif - -/******************** TLB shootdown code ********************/ - - -void -pmap_tlb_shootnow(int32_t cpumask) -{ - struct cpu_info *self; -#ifdef MULTIPROCESSOR - struct cpu_info *ci; - CPU_INFO_ITERATOR cii; - int s; -#ifdef DIAGNOSTIC - int count = 0; -#endif -#endif - - if (cpumask == 0) - return; - - self = curcpu(); -#ifdef MULTIPROCESSOR - s = splipi(); - self->ci_tlb_ipi_mask = cpumask; -#endif - - pmap_do_tlb_shootdown(self); /* do *our* work. */ - -#ifdef MULTIPROCESSOR - splx(s); - - /* - * Send the TLB IPI to other CPUs pending shootdowns. - */ - for (CPU_INFO_FOREACH(cii, ci)) { - if (ci == self) - continue; - if (cpumask & (1U << ci->ci_cpuid)) - if (x86_send_ipi(ci, X86_IPI_TLB) != 0) - x86_atomic_clearbits_l(&self->ci_tlb_ipi_mask, - (1U << ci->ci_cpuid)); - } - - while (self->ci_tlb_ipi_mask != 0) { -#ifdef DIAGNOSTIC - if (count++ > 10000000) - panic("TLB IPI rendezvous failed (mask %x)", - self->ci_tlb_ipi_mask); -#endif - x86_pause(); - } -#endif -} - -/* - * pmap_tlb_shootdown: - * - * Cause the TLB entry for pmap/va to be shot down. - */ -void -pmap_tlb_shootdown(pmap, va, pte, cpumaskp) - pmap_t pmap; - vaddr_t va; - pt_entry_t pte; - int32_t *cpumaskp; -{ - struct cpu_info *ci, *self; - struct pmap_tlb_shootdown_q *pq; - struct pmap_tlb_shootdown_job *pj; - CPU_INFO_ITERATOR cii; - int s; - -#ifdef LARGEPAGES - if (pte & PG_PS) - va &= PG_LGFRAME; -#endif - - if (pmap_initialized == FALSE || cpus_attached == 0) { - pmap_update_pg(va); - return; - } - - self = curcpu(); - - s = splipi(); -#if 0 - printf("dshootdown %lx\n", va); -#endif - - for (CPU_INFO_FOREACH(cii, ci)) { - /* Note: we queue shootdown events for ourselves here! */ - if (pmap_is_active(pmap, ci->ci_cpuid) == 0) - continue; - if (ci != self && !(ci->ci_flags & CPUF_RUNNING)) - continue; - pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; - __cpu_simple_lock(&pq->pq_slock); - - /* - * If there's a global flush already queued, or a - * non-global flush, and this pte doesn't have the G - * bit set, don't bother. - */ - if (pq->pq_flushg > 0 || - (pq->pq_flushu > 0 && (pte & pmap_pg_g) == 0)) { - __cpu_simple_unlock(&pq->pq_slock); - continue; - } - -#ifdef I386_CPU - /* - * i386 CPUs can't invalidate a single VA, only - * flush the entire TLB, so don't bother allocating - * jobs for them -- just queue a `flushu'. - * - * XXX note that this can be executed for non-i386 - * when called * early (before identifycpu() has set - * cpu_class) - */ - if (cpu_class == CPUCLASS_386) { - pq->pq_flushu++; - *cpumaskp |= 1U << ci->ci_cpuid; - __cpu_simple_unlock(&pq->pq_slock); - continue; - } -#endif - - pj = pmap_tlb_shootdown_job_get(pq); - pq->pq_pte |= pte; - if (pj == NULL) { - /* - * Couldn't allocate a job entry. - * Kill it now for this CPU, unless the failure - * was due to too many pending flushes; otherwise, - * tell other cpus to kill everything.. - */ - if (ci == self && pq->pq_count < PMAP_TLB_MAXJOBS) { - pmap_update_pg(va); - __cpu_simple_unlock(&pq->pq_slock); - continue; - } else { - if (pq->pq_pte & pmap_pg_g) - pq->pq_flushg++; - else - pq->pq_flushu++; - /* - * Since we've nailed the whole thing, - * drain the job entries pending for that - * processor. - */ - pmap_tlb_shootdown_q_drain(pq); - *cpumaskp |= 1U << ci->ci_cpuid; - } - } else { - pj->pj_pmap = pmap; - pj->pj_va = va; - pj->pj_pte = pte; - TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list); - *cpumaskp |= 1U << ci->ci_cpuid; - } - __cpu_simple_unlock(&pq->pq_slock); - } - splx(s); -} - -/* - * pmap_do_tlb_shootdown_checktlbstate: check and update ci_tlbstate. - * - * => called at splipi. - * => return TRUE if we need to maintain user tlbs. - */ -static __inline boolean_t -pmap_do_tlb_shootdown_checktlbstate(struct cpu_info *ci) -{ - - KASSERT(ci == curcpu()); - - if (ci->ci_tlbstate == TLBSTATE_LAZY) { - KASSERT(ci->ci_pmap != pmap_kernel()); - /* - * mostly KASSERT(ci->ci_pmap->pm_cpus & (1U << ci->ci_cpuid)); - */ - - /* - * we no longer want tlb shootdown ipis for this pmap. - * mark the pmap no longer in use by this processor. - */ - - x86_atomic_clearbits_l(&ci->ci_pmap->pm_cpus, - 1U << ci->ci_cpuid); - ci->ci_tlbstate = TLBSTATE_STALE; - } - - if (ci->ci_tlbstate == TLBSTATE_STALE) - return FALSE; - - return TRUE; -} - -/* - * pmap_do_tlb_shootdown: - * - * Process pending TLB shootdown operations for this processor. - */ -void -pmap_do_tlb_shootdown(struct cpu_info *self) -{ - u_long cpu_id = self->ci_cpuid; - struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; - struct pmap_tlb_shootdown_job *pj; - int s; -#ifdef MULTIPROCESSOR - struct cpu_info *ci; - CPU_INFO_ITERATOR cii; -#endif - KASSERT(self == curcpu()); - - s = splipi(); - - __cpu_simple_lock(&pq->pq_slock); - - if (pq->pq_flushg) { - COUNT(flushg); - pmap_do_tlb_shootdown_checktlbstate(self); - tlbflushg(); - pq->pq_flushg = 0; - pq->pq_flushu = 0; - pmap_tlb_shootdown_q_drain(pq); - } else { - /* - * TLB flushes for PTEs with PG_G set may be in the queue - * after a flushu, they need to be dealt with. - */ - if (pq->pq_flushu) { - COUNT(flushu); - pmap_do_tlb_shootdown_checktlbstate(self); - tlbflush(); - } - while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { - TAILQ_REMOVE(&pq->pq_head, pj, pj_list); - - if ((pj->pj_pte & pmap_pg_g) || - pj->pj_pmap == pmap_kernel()) { - pmap_update_pg(pj->pj_va); - } else if (!pq->pq_flushu && - pj->pj_pmap == self->ci_pmap) { - if (pmap_do_tlb_shootdown_checktlbstate(self)) - pmap_update_pg(pj->pj_va); - } - - pmap_tlb_shootdown_job_put(pq, pj); - } - - pq->pq_flushu = pq->pq_pte = 0; - } - -#ifdef MULTIPROCESSOR - for (CPU_INFO_FOREACH(cii, ci)) - x86_atomic_clearbits_l(&ci->ci_tlb_ipi_mask, - (1U << cpu_id)); -#endif - __cpu_simple_unlock(&pq->pq_slock); - - splx(s); -} - - -/* - * pmap_tlb_shootdown_q_drain: - * - * Drain a processor's TLB shootdown queue. We do not perform - * the shootdown operations. This is merely a convenience - * function. - * - * Note: We expect the queue to be locked. - */ -void -pmap_tlb_shootdown_q_drain(pq) - struct pmap_tlb_shootdown_q *pq; -{ - struct pmap_tlb_shootdown_job *pj; - - while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { - TAILQ_REMOVE(&pq->pq_head, pj, pj_list); - pmap_tlb_shootdown_job_put(pq, pj); - } - pq->pq_pte = 0; -} - -/* - * pmap_tlb_shootdown_job_get: - * - * Get a TLB shootdown job queue entry. This places a limit on - * the number of outstanding jobs a processor may have. - * - * Note: We expect the queue to be locked. - */ -struct pmap_tlb_shootdown_job * -pmap_tlb_shootdown_job_get(pq) - struct pmap_tlb_shootdown_q *pq; -{ - struct pmap_tlb_shootdown_job *pj; - - if (pq->pq_count >= PMAP_TLB_MAXJOBS) - return (NULL); - - __cpu_simple_lock(&pmap_tlb_shootdown_job_lock); - if (pj_free == NULL) { - __cpu_simple_unlock(&pmap_tlb_shootdown_job_lock); - return NULL; - } - pj = &pj_free->pja_job; - pj_free = - (union pmap_tlb_shootdown_job_al *)pj_free->pja_job.pj_nextfree; - __cpu_simple_unlock(&pmap_tlb_shootdown_job_lock); - - pq->pq_count++; - return (pj); -} - -/* - * pmap_tlb_shootdown_job_put: - * - * Put a TLB shootdown job queue entry onto the free list. - * - * Note: We expect the queue to be locked. - */ -void -pmap_tlb_shootdown_job_put(pq, pj) - struct pmap_tlb_shootdown_q *pq; - struct pmap_tlb_shootdown_job *pj; -{ - -#ifdef DIAGNOSTIC - if (pq->pq_count == 0) - panic("pmap_tlb_shootdown_job_put: queue length inconsistency"); -#endif - __cpu_simple_lock(&pmap_tlb_shootdown_job_lock); - pj->pj_nextfree = &pj_free->pja_job; - pj_free = (union pmap_tlb_shootdown_job_al *)pj; - __cpu_simple_unlock(&pmap_tlb_shootdown_job_lock); - - pq->pq_count--; -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/i386/sys_machdep.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/sys_machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,550 +0,0 @@ -/* $NetBSD: sys_machdep.c,v 1.1.2.1 2004/05/22 15:59:21 he Exp $ */ -/* NetBSD: sys_machdep.c,v 1.70 2003/10/27 14:11:47 junyoung Exp */ - -/*- - * Copyright (c) 1998 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Charles M. Hannum. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sys_machdep.c,v 1.1.2.1 2004/05/22 15:59:21 he Exp $"); - -#include "opt_compat_netbsd.h" -#include "opt_mtrr.h" -#include "opt_perfctrs.h" -#include "opt_user_ldt.h" -#include "opt_vm86.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/ioctl.h> -#include <sys/file.h> -#include <sys/time.h> -#include <sys/proc.h> -#include <sys/user.h> -#include <sys/uio.h> -#include <sys/kernel.h> -#include <sys/buf.h> -#include <sys/signal.h> -#include <sys/malloc.h> - -#include <sys/mount.h> -#include <sys/sa.h> -#include <sys/syscallargs.h> - -#include <uvm/uvm_extern.h> - -#include <machine/cpu.h> -#include <machine/cpufunc.h> -#include <machine/gdt.h> -#include <machine/psl.h> -#include <machine/reg.h> -#include <machine/sysarch.h> -#include <machine/mtrr.h> - -#ifdef VM86 -#include <machine/vm86.h> -#endif - -#ifdef PERFCTRS -#include <machine/pmc.h> -#endif - -extern struct vm_map *kernel_map; - -int i386_iopl(struct lwp *, void *, register_t *); -int i386_get_ioperm(struct lwp *, void *, register_t *); -int i386_set_ioperm(struct lwp *, void *, register_t *); -int i386_get_mtrr(struct lwp *, void *, register_t *); -int i386_set_mtrr(struct lwp *, void *, register_t *); - -#ifdef USER_LDT - -#ifdef LDT_DEBUG -static void i386_print_ldt(int, const struct segment_descriptor *); - -static void -i386_print_ldt(i, d) - int i; - const struct segment_descriptor *d; -{ - printf("[%d] lolimit=0x%x, lobase=0x%x, type=%u, dpl=%u, p=%u, " - "hilimit=0x%x, xx=%x, def32=%u, gran=%u, hibase=0x%x\n", - i, d->sd_lolimit, d->sd_lobase, d->sd_type, d->sd_dpl, d->sd_p, - d->sd_hilimit, d->sd_xx, d->sd_def32, d->sd_gran, d->sd_hibase); -} -#endif - -int -i386_get_ldt(l, args, retval) - struct lwp *l; - void *args; - register_t *retval; -{ - int error; - struct proc *p = l->l_proc; - pmap_t pmap = p->p_vmspace->vm_map.pmap; - int nldt, num; - union descriptor *lp, *cp; - struct i386_get_ldt_args ua; - - if ((error = copyin(args, &ua, sizeof(ua))) != 0) - return (error); - -#ifdef LDT_DEBUG - printf("i386_get_ldt: start=%d num=%d descs=%p\n", ua.start, - ua.num, ua.desc); -#endif - - if (ua.start < 0 || ua.num < 0 || ua.start > 8192 || ua.num > 8192 || - ua.start + ua.num > 8192) - return (EINVAL); - - cp = malloc(ua.num * sizeof(union descriptor), M_TEMP, M_WAITOK); - if (cp == NULL) - return ENOMEM; - - simple_lock(&pmap->pm_lock); - - if (pmap->pm_flags & PMF_USER_LDT) { - nldt = pmap->pm_ldt_len; - lp = pmap->pm_ldt; - } else { - nldt = NLDT; - lp = ldt; - } - - if (ua.start > nldt) { - simple_unlock(&pmap->pm_lock); - free(cp, M_TEMP); - return (EINVAL); - } - - lp += ua.start; - num = min(ua.num, nldt - ua.start); -#ifdef LDT_DEBUG - { - int i; - for (i = 0; i < num; i++) - i386_print_ldt(i, &lp[i].sd); - } -#endif - - memcpy(cp, lp, num * sizeof(union descriptor)); - simple_unlock(&pmap->pm_lock); - - error = copyout(cp, ua.desc, num * sizeof(union descriptor)); - if (error == 0) - *retval = num; - - free(cp, M_TEMP); - return (error); -} - -int -i386_set_ldt(l, args, retval) - struct lwp *l; - void *args; - register_t *retval; -{ - int error, i, n; - struct proc *p = l->l_proc; - struct pcb *pcb = &l->l_addr->u_pcb; - pmap_t pmap = p->p_vmspace->vm_map.pmap; - struct i386_set_ldt_args ua; - union descriptor *descv; - size_t old_len, new_len, ldt_len; - union descriptor *old_ldt, *new_ldt; - - if ((error = copyin(args, &ua, sizeof(ua))) != 0) - return (error); - - if (ua.start < 0 || ua.num < 0 || ua.start > 8192 || ua.num > 8192 || - ua.start + ua.num > 8192) - return (EINVAL); - - descv = malloc(sizeof (*descv) * ua.num, M_TEMP, M_NOWAIT); - if (descv == NULL) - return (ENOMEM); - - if ((error = copyin(ua.desc, descv, sizeof (*descv) * ua.num)) != 0) - goto out; - - /* Check descriptors for access violations. */ - for (i = 0; i < ua.num; i++) { - union descriptor *desc = &descv[i]; - - switch (desc->sd.sd_type) { - case SDT_SYSNULL: - desc->sd.sd_p = 0; - break; - case SDT_SYS286CGT: - case SDT_SYS386CGT: - /* - * Only allow call gates targeting a segment - * in the LDT or a user segment in the fixed - * part of the gdt. Segments in the LDT are - * constrained (below) to be user segments. - */ - if (desc->gd.gd_p != 0 && - !ISLDT(desc->gd.gd_selector) && - ((IDXSEL(desc->gd.gd_selector) >= NGDT) || - (gdt[IDXSEL(desc->gd.gd_selector)].sd.sd_dpl != - SEL_UPL))) { - error = EACCES; - goto out; - } - break; - case SDT_MEMEC: - case SDT_MEMEAC: - case SDT_MEMERC: - case SDT_MEMERAC: - /* Must be "present" if executable and conforming. */ - if (desc->sd.sd_p == 0) { - error = EACCES; - goto out; - } - break; - case SDT_MEMRO: - case SDT_MEMROA: - case SDT_MEMRW: - case SDT_MEMRWA: - case SDT_MEMROD: - case SDT_MEMRODA: - case SDT_MEMRWD: - case SDT_MEMRWDA: - case SDT_MEME: - case SDT_MEMEA: - case SDT_MEMER: - case SDT_MEMERA: - break; - default: - /* - * Make sure that unknown descriptor types are - * not marked present. - */ - if (desc->sd.sd_p != 0) { - error = EACCES; - goto out; - } - break; - } - - if (desc->sd.sd_p != 0) { - /* Only user (ring-3) descriptors may be present. */ - if (desc->sd.sd_dpl != SEL_UPL) { - error = EACCES; - goto out; - } - } - } - - /* allocate user ldt */ - simple_lock(&pmap->pm_lock); - if (pmap->pm_ldt == 0 || (ua.start + ua.num) > pmap->pm_ldt_len) { - if (pmap->pm_flags & PMF_USER_LDT) - ldt_len = pmap->pm_ldt_len; - else - ldt_len = 512; - while ((ua.start + ua.num) > ldt_len) - ldt_len *= 2; - new_len = ldt_len * sizeof(union descriptor); - - simple_unlock(&pmap->pm_lock); - new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, - new_len); - simple_lock(&pmap->pm_lock); - - if (pmap->pm_ldt != NULL && ldt_len <= pmap->pm_ldt_len) { - /* - * Another thread (re)allocated the LDT to - * sufficient size while we were blocked in - * uvm_km_alloc. Oh well. The new entries - * will quite probably not be right, but - * hey.. not our problem if user applications - * have race conditions like that. - */ - uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len); - goto copy; - } - - old_ldt = pmap->pm_ldt; - - if (old_ldt != NULL) { - old_len = pmap->pm_ldt_len * sizeof(union descriptor); - } else { - old_len = NLDT * sizeof(union descriptor); - old_ldt = ldt; - } - - memcpy(new_ldt, old_ldt, old_len); - memset((caddr_t)new_ldt + old_len, 0, new_len - old_len); - - if (old_ldt != ldt) - uvm_km_free(kernel_map, (vaddr_t)old_ldt, old_len); - - pmap->pm_ldt = new_ldt; - pmap->pm_ldt_len = ldt_len; - - if (pmap->pm_flags & PMF_USER_LDT) - ldt_free(pmap); - else - pmap->pm_flags |= PMF_USER_LDT; - ldt_alloc(pmap, new_ldt, new_len); - pcb->pcb_ldt_sel = pmap->pm_ldt_sel; - if (pcb == curpcb) - lldt(pcb->pcb_ldt_sel); - - } -copy: - /* Now actually replace the descriptors. */ - for (i = 0, n = ua.start; i < ua.num; i++, n++) - pmap->pm_ldt[n] = descv[i]; - - simple_unlock(&pmap->pm_lock); - - *retval = ua.start; - -out: - free(descv, M_TEMP); - return (error); -} -#endif /* USER_LDT */ - -int -i386_iopl(l, args, retval) - struct lwp *l; - void *args; - register_t *retval; -{ - int error; - struct proc *p = l->l_proc; - struct pcb *pcb = &l->l_addr->u_pcb; - struct i386_iopl_args ua; - dom0_op_t op; - - if ((xen_start_info.flags & SIF_PRIVILEGED) == 0) - return EPERM; - - if (securelevel > 1) - return EPERM; - - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return error; - - if ((error = copyin(args, &ua, sizeof(ua))) != 0) - return error; - - pcb->pcb_tss.tss_ioopt &= ~SEL_RPL; - if (ua.iopl) - pcb->pcb_tss.tss_ioopt |= SEL_UPL; /* i/o pl */ - else - pcb->pcb_tss.tss_ioopt |= SEL_KPL; /* i/o pl */ - - /* Force the change at ring 0. */ - op.cmd = DOM0_IOPL; - op.u.iopl.domain = DOMID_SELF; - op.u.iopl.iopl = pcb->pcb_tss.tss_ioopt & SEL_RPL; /* i/o pl */ - HYPERVISOR_dom0_op(&op); - - return 0; -} - -int -i386_get_ioperm(l, args, retval) - struct lwp *l; - void *args; - register_t *retval; -{ - int error; - struct pcb *pcb = &l->l_addr->u_pcb; - struct i386_get_ioperm_args ua; - - if ((error = copyin(args, &ua, sizeof(ua))) != 0) - return (error); - - return copyout(pcb->pcb_iomap, ua.iomap, sizeof(pcb->pcb_iomap)); -} - -int -i386_set_ioperm(l, args, retval) - struct lwp *l; - void *args; - register_t *retval; -{ - int error; - struct proc *p = l->l_proc; - struct pcb *pcb = &l->l_addr->u_pcb; - struct i386_set_ioperm_args ua; - - if (securelevel > 1) - return EPERM; - - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return error; - - if ((error = copyin(args, &ua, sizeof(ua))) != 0) - return (error); - - return copyin(ua.iomap, pcb->pcb_iomap, sizeof(pcb->pcb_iomap)); -} - -#ifdef MTRR -int -i386_get_mtrr(struct lwp *l, void *args, register_t *retval) -{ - struct i386_get_mtrr_args ua; - int error, n; - struct proc *p = l->l_proc; - - if (mtrr_funcs == NULL) - return ENOSYS; - - error = copyin(args, &ua, sizeof ua); - if (error != 0) - return error; - - error = copyin(ua.n, &n, sizeof n); - if (error != 0) - return error; - - error = mtrr_get(ua.mtrrp, &n, p, MTRR_GETSET_USER); - - copyout(&n, ua.n, sizeof (int)); - - return error; -} - -int -i386_set_mtrr(struct lwp *l, void *args, register_t *retval) -{ - int error, n; - struct i386_set_mtrr_args ua; - struct proc *p = l->l_proc; - - if (mtrr_funcs == NULL) - return ENOSYS; - - error = suser(p->p_ucred, &p->p_acflag); - if (error != 0) - return error; - - error = copyin(args, &ua, sizeof ua); - if (error != 0) - return error; - - error = copyin(ua.n, &n, sizeof n); - if (error != 0) - return error; - - error = mtrr_set(ua.mtrrp, &n, p, MTRR_GETSET_USER); - if (n != 0) - mtrr_commit(); - - copyout(&n, ua.n, sizeof n); - - return error; -} -#endif - -int -sys_sysarch(struct lwp *l, void *v, register_t *retval) -{ - struct sys_sysarch_args /* { - syscallarg(int) op; - syscallarg(void *) parms; - } */ *uap = v; - int error = 0; - - switch(SCARG(uap, op)) { -#ifdef USER_LDT - case I386_GET_LDT: - error = i386_get_ldt(l, SCARG(uap, parms), retval); - break; - - case I386_SET_LDT: - error = i386_set_ldt(l, SCARG(uap, parms), retval); - break; -#endif - - case I386_IOPL: - error = i386_iopl(l, SCARG(uap, parms), retval); - break; - - case I386_GET_IOPERM: - error = i386_get_ioperm(l, SCARG(uap, parms), retval); - break; - - case I386_SET_IOPERM: - error = i386_set_ioperm(l, SCARG(uap, parms), retval); - break; - -#ifdef VM86 - case I386_VM86: - error = i386_vm86(l, SCARG(uap, parms), retval); - break; -#ifdef COMPAT_16 - case I386_OLD_VM86: - error = compat_16_i386_vm86(l, SCARG(uap, parms), retval); - break; -#endif -#endif -#ifdef MTRR - case I386_GET_MTRR: - error = i386_get_mtrr(l, SCARG(uap, parms), retval); - break; - case I386_SET_MTRR: - error = i386_set_mtrr(l, SCARG(uap, parms), retval); - break; -#endif -#ifdef PERFCTRS - case I386_PMC_INFO: - error = pmc_info(l, SCARG(uap, parms), retval); - break; - - case I386_PMC_STARTSTOP: - error = pmc_startstop(l, SCARG(uap, parms), retval); - break; - - case I386_PMC_READ: - error = pmc_read(l, SCARG(uap, parms), retval); - break; -#endif - - default: - error = EINVAL; - break; - } - return (error); -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/i386/vector.S --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/vector.S Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1588 +0,0 @@ -/* $NetBSD: vector.S,v 1.1.2.1 2004/05/22 15:57:16 he Exp $ */ -/* NetBSD: 1.13 2004/03/11 11:39:26 yamt Exp */ - -/* - * Copyright 2002 (c) Wasabi Systems, Inc. - * All rights reserved. - * - * Written by Frank van der Linden for Wasabi Systems, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed for the NetBSD Project by - * Wasabi Systems, Inc. - * 4. The name of Wasabi Systems, Inc. may not be used to endorse - * or promote products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/*- - * Copyright (c) 1998 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Charles M. Hannum. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "opt_ddb.h" -#include "opt_multiprocessor.h" -#include "opt_ipkdb.h" -#include "opt_vm86.h" -#include "opt_xen.h" - -#ifndef XEN -#include <machine/i8259.h> -#endif -#include <machine/i82093reg.h> -#include <machine/i82489reg.h> -#include <machine/asm.h> -#include <machine/frameasm.h> -#include <machine/segments.h> -#include <machine/trap.h> -#include <machine/intr.h> -#include <machine/psl.h> -#ifdef XEN -#include <machine/xen.h> -#endif - -#include <net/netisr.h> - -#include "ioapic.h" -#include "lapic.h" - -#include "npx.h" -#include "assym.h" - -#define __HAVE_GENERIC_SOFT_INTERRUPTS /* XXX */ - - -/* - * Macros for interrupt entry, call to handler, and exit. - * - * XXX - * The interrupt frame is set up to look like a trap frame. This may be a - * waste. The only handler which needs a frame is the clock handler, and it - * only needs a few bits. Xdoreti() needs a trap frame for handling ASTs, but - * it could easily convert the frame on demand. - * - * The direct costs of setting up a trap frame are two pushl's (error code and - * trap number), an addl to get rid of these, and pushing and popping the - * callee-saved registers %esi, %edi, %ebx, and %ebp twice. - * - * If the interrupt frame is made more flexible, INTR can push %eax first and - * decide the ipending case with less overhead, e.g., by avoiding loading the - * segment registers. - * - */ - -#define MY_COUNT _C_LABEL(uvmexp) - -/* XXX See comment in locore.s */ -#ifdef __ELF__ -#define XINTR(name,num) Xintr_/**/name/**/num -#define XSTRAY(name,num) Xstray_/**/name/**/num -#define XINTR_TSS(irq_num) Xintr_tss_ ## irq_num -#else -#define XINTR(name,num) _Xintr_/**/name/**/num -#define XSTRAY(name,num) _Xstray_/**/name/**/num -#define XINTR_TSS(irq_num) Xintr_tss_/**/irq_num -#endif - -/* - * Store address of TSS in %eax, given a selector in %eax. - * Clobbers %eax, %ecx, %edx, but that's ok for its usage. - * This is a bit complicated, but it's done to make as few - * assumptions as possible about the validity of the environment. - * The GDT and the current and previous TSS are known to be OK, - * otherwise we would not be here. The only other thing that needs - * to be OK is the cpu_info structure for the current CPU. - */ -#define GET_TSS \ - andl $0xfff8,%eax ;\ - addl CPUVAR(GDT),%eax ;\ - movl 2(%eax),%edx ;\ - andl $0xffffff,%edx ;\ - movzbl 7(%eax),%eax ;\ - shl $24,%eax ;\ - orl %edx,%eax - -#if NLAPIC > 0 -#ifdef MULTIPROCESSOR -IDTVEC(recurse_lapic_ipi) - pushfl - pushl %cs - pushl %esi - pushl $0 - pushl $T_ASTFLT - INTRENTRY -IDTVEC(resume_lapic_ipi) - cli - jmp 1f -IDTVEC(intr_lapic_ipi) - pushl $0 - pushl $T_ASTFLT - INTRENTRY - movl $0,_C_LABEL(local_apic)+LAPIC_EOI - movl CPUVAR(ILEVEL),%ebx - cmpl $IPL_IPI,%ebx - jae 2f -1: - incl CPUVAR(IDEPTH) - movl $IPL_IPI,CPUVAR(ILEVEL) - sti - pushl %ebx - call _C_LABEL(x86_ipi_handler) - jmp _C_LABEL(Xdoreti) -2: - orl $(1 << LIR_IPI),CPUVAR(IPENDING) - sti - INTRFASTEXIT - -#if defined(DDB) -IDTVEC(intrddbipi) -1: - str %ax - GET_TSS - movzwl (%eax),%eax - GET_TSS - pushl %eax - movl $0xff,_C_LABEL(lapic_tpr) - movl $0,_C_LABEL(local_apic)+LAPIC_EOI - sti - call _C_LABEL(ddb_ipi_tss) - addl $4,%esp - movl $0,_C_LABEL(lapic_tpr) - iret - jmp 1b -#endif /* DDB */ -#endif /* MULTIPROCESSOR */ - - /* - * Interrupt from the local APIC timer. - */ -IDTVEC(recurse_lapic_ltimer) - pushfl - pushl %cs - pushl %esi - pushl $0 - pushl $T_ASTFLT - INTRENTRY -IDTVEC(resume_lapic_ltimer) - cli - jmp 1f -IDTVEC(intr_lapic_ltimer) - pushl $0 - pushl $T_ASTFLT - INTRENTRY - movl $0,_C_LABEL(local_apic)+LAPIC_EOI - movl CPUVAR(ILEVEL),%ebx - cmpl $IPL_CLOCK,%ebx - jae 2f -1: - incl CPUVAR(IDEPTH) - movl $IPL_CLOCK,CPUVAR(ILEVEL) - sti - pushl %ebx - pushl $0 - call _C_LABEL(lapic_clockintr) - addl $4,%esp - jmp _C_LABEL(Xdoreti) -2: - orl $(1 << LIR_TIMER),CPUVAR(IPENDING) - sti - INTRFASTEXIT -#endif /* NLAPIC > 0 */ - -#ifdef MULTIPROCESSOR -#define LOCK_KERNEL pushl %esp ; call _C_LABEL(x86_intlock) ; addl $4,%esp -#define UNLOCK_KERNEL pushl %esp ; call _C_LABEL(x86_intunlock) ; addl $4,%esp -#else -#define LOCK_KERNEL -#define UNLOCK_KERNEL -#endif - -#define voidop(num) - - -#define XENINTRSTUB(name, num, early_ack, late_ack, mask, unmask, level_mask) \ -IDTVEC(recurse_/**/name/**/num) ;\ - pushfl ;\ - pushl %cs ;\ - pushl %esi ;\ - subl $4,%esp ;\ - pushl $T_ASTFLT /* trap # for doing ASTs */ ;\ - INTRENTRY ;\ -IDTVEC(resume_/**/name/**/num) \ - /*movl %esp,%ecx*/ ;\ - movl $IREENT_MAGIC,TF_ERR(%esp) ;\ - movl %ebx,%esi ;\ - movl CPUVAR(ISOURCES) + (num) * 4, %ebp ;\ - movl IS_MAXLEVEL(%ebp),%ebx ;\ - jmp 1f ;\ -IDTVEC(intr_/**/name/**/num) ;\ - pushl $0 /* dummy error code */ ;\ - pushl $T_ASTFLT /* trap # for doing ASTs */ ;\ - INTRENTRY ;\ - /*movl %esp,%ecx*/ ;\ - movl CPUVAR(ISOURCES) + (num) * 4, %ebp ;\ - mask(num) /* mask it in hardware */ ;\ - early_ack(num) /* and allow other intrs */ ;\ - testl %ebp,%ebp ;\ - jz 9f /* stray */ ;\ - movl IS_MAXLEVEL(%ebp),%ebx ;\ - movl CPUVAR(ILEVEL),%esi ;\ - cmpl %ebx,%esi ;\ - jae 10f /* currently masked; hold it */ ;\ - incl MY_COUNT+V_INTR /* statistical info */ ;\ - addl $1,IS_EVCNTLO(%ebp) /* inc event counter */ ;\ - adcl $0,IS_EVCNTHI(%ebp) ;\ -1: \ - pushl %esi ;\ - movl %ebx,CPUVAR(ILEVEL) ;\ - STI(%eax) ;\ - incl CPUVAR(IDEPTH) ;\ - movl IS_HANDLERS(%ebp),%ebx ;\ - LOCK_KERNEL ;\ -6: \ - movl IH_LEVEL(%ebx),%edi ;\ - cmpl %esi,%edi ;\ - jle 7f ;\ - pushl %esp ;\ - pushl IH_ARG(%ebx) ;\ - movl %edi,CPUVAR(ILEVEL) ;\ - call *IH_FUN(%ebx) /* call it */ ;\ - addl $8,%esp /* toss the arg */ ;\ - movl IH_NEXT(%ebx),%ebx /* next handler in chain */ ;\ - testl %ebx,%ebx ;\ - jnz 6b ;\ -5: \ - UNLOCK_KERNEL ;\ - CLI(%eax) ;\ - unmask(num) /* unmask it in hardware */ ;\ - late_ack(num) ;\ - STI(%eax) ;\ - jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\ -7: \ - UNLOCK_KERNEL ;\ - CLI(%eax) ;\ - orl $(1 << num),CPUVAR(IPENDING) ;\ - level_mask(num) ;\ - late_ack(num) ;\ - STI(%eax) ;\ - jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\ -10: \ - CLI(%eax) ;\ - orl $(1 << num),CPUVAR(IPENDING) ;\ - level_mask(num) ;\ -6: ; \ - late_ack(num) ;\ - STIC(%eax) ;\ - jz 4f ; \ - call _C_LABEL(stipending) ; \ - testl %eax,%eax ; \ - jnz 1b ; \ -4: INTRFASTEXIT ;\ -9: \ - unmask(num) ;\ - jmp 6b - -#define hypervisor_asm_unmask(num) \ - movl irq_to_evtchn + (num) * 4,%ecx ;\ - movl HYPERVISOR_shared_info,%eax ;\ - lock ;\ - btrl %ecx,EVENTS_MASK(%eax) - -XENINTRSTUB(xenev,0,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,1,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,2,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,3,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,4,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,5,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,6,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,7,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,8,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,9,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,10,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,11,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,12,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,13,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,14,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,15,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,16,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,17,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,18,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,19,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,20,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,21,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,22,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,23,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,24,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,25,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,26,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,27,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,28,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,29,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,30,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) -XENINTRSTUB(xenev,31,voidop,voidop,voidop,hypervisor_asm_unmask,voidop) - -.globl _C_LABEL(xenev_stubs) -_C_LABEL(xenev_stubs): - .long _C_LABEL(Xintr_xenev0), _C_LABEL(Xrecurse_xenev0) - .long _C_LABEL(Xresume_xenev0) - .long _C_LABEL(Xintr_xenev1), _C_LABEL(Xrecurse_xenev1) - .long _C_LABEL(Xresume_xenev1) - .long _C_LABEL(Xintr_xenev2), _C_LABEL(Xrecurse_xenev2) - .long _C_LABEL(Xresume_xenev2) - .long _C_LABEL(Xintr_xenev3), _C_LABEL(Xrecurse_xenev3) - .long _C_LABEL(Xresume_xenev3) - .long _C_LABEL(Xintr_xenev4), _C_LABEL(Xrecurse_xenev4) - .long _C_LABEL(Xresume_xenev4) - .long _C_LABEL(Xintr_xenev5), _C_LABEL(Xrecurse_xenev5) - .long _C_LABEL(Xresume_xenev5) - .long _C_LABEL(Xintr_xenev6), _C_LABEL(Xrecurse_xenev6) - .long _C_LABEL(Xresume_xenev6) - .long _C_LABEL(Xintr_xenev7), _C_LABEL(Xrecurse_xenev7) - .long _C_LABEL(Xresume_xenev7) - .long _C_LABEL(Xintr_xenev8), _C_LABEL(Xrecurse_xenev8) - .long _C_LABEL(Xresume_xenev8) - .long _C_LABEL(Xintr_xenev9), _C_LABEL(Xrecurse_xenev9) - .long _C_LABEL(Xresume_xenev9) - .long _C_LABEL(Xintr_xenev10), _C_LABEL(Xrecurse_xenev10) - .long _C_LABEL(Xresume_xenev10) - .long _C_LABEL(Xintr_xenev11), _C_LABEL(Xrecurse_xenev11) - .long _C_LABEL(Xresume_xenev11) - .long _C_LABEL(Xintr_xenev12), _C_LABEL(Xrecurse_xenev12) - .long _C_LABEL(Xresume_xenev12) - .long _C_LABEL(Xintr_xenev13), _C_LABEL(Xrecurse_xenev13) - .long _C_LABEL(Xresume_xenev13) - .long _C_LABEL(Xintr_xenev14), _C_LABEL(Xrecurse_xenev14) - .long _C_LABEL(Xresume_xenev14) - .long _C_LABEL(Xintr_xenev15), _C_LABEL(Xrecurse_xenev15) - .long _C_LABEL(Xresume_xenev15) - .long _C_LABEL(Xintr_xenev16), _C_LABEL(Xrecurse_xenev16) - .long _C_LABEL(Xresume_xenev16) - .long _C_LABEL(Xintr_xenev17), _C_LABEL(Xrecurse_xenev17) - .long _C_LABEL(Xresume_xenev17) - .long _C_LABEL(Xintr_xenev18), _C_LABEL(Xrecurse_xenev18) - .long _C_LABEL(Xresume_xenev18) - .long _C_LABEL(Xintr_xenev19), _C_LABEL(Xrecurse_xenev19) - .long _C_LABEL(Xresume_xenev19) - .long _C_LABEL(Xintr_xenev20), _C_LABEL(Xrecurse_xenev20) - .long _C_LABEL(Xresume_xenev20) - .long _C_LABEL(Xintr_xenev21), _C_LABEL(Xrecurse_xenev21) - .long _C_LABEL(Xresume_xenev21) - .long _C_LABEL(Xintr_xenev22), _C_LABEL(Xrecurse_xenev22) - .long _C_LABEL(Xresume_xenev22) - .long _C_LABEL(Xintr_xenev23), _C_LABEL(Xrecurse_xenev23) - .long _C_LABEL(Xresume_xenev23) - .long _C_LABEL(Xintr_xenev24), _C_LABEL(Xrecurse_xenev24) - .long _C_LABEL(Xresume_xenev24) - .long _C_LABEL(Xintr_xenev25), _C_LABEL(Xrecurse_xenev25) - .long _C_LABEL(Xresume_xenev25) - .long _C_LABEL(Xintr_xenev26), _C_LABEL(Xrecurse_xenev26) - .long _C_LABEL(Xresume_xenev26) - .long _C_LABEL(Xintr_xenev27), _C_LABEL(Xrecurse_xenev27) - .long _C_LABEL(Xresume_xenev27) - .long _C_LABEL(Xintr_xenev28), _C_LABEL(Xrecurse_xenev28) - .long _C_LABEL(Xresume_xenev28) - .long _C_LABEL(Xintr_xenev29), _C_LABEL(Xrecurse_xenev29) - .long _C_LABEL(Xresume_xenev29) - .long _C_LABEL(Xintr_xenev30), _C_LABEL(Xrecurse_xenev30) - .long _C_LABEL(Xresume_xenev30) - .long _C_LABEL(Xintr_xenev31), _C_LABEL(Xrecurse_xenev31) - .long _C_LABEL(Xresume_xenev31) - -#ifndef XEN -/* - * This macro defines the generic stub code. Its arguments modifiy it - * for specific PICs. - */ - -#define INTRSTUB(name, num, early_ack, late_ack, mask, unmask, level_mask) \ -IDTVEC(recurse_/**/name/**/num) ;\ - pushfl ;\ - pushl %cs ;\ - pushl %esi ;\ - subl $4,%esp ;\ - pushl $T_ASTFLT /* trap # for doing ASTs */ ;\ - INTRENTRY ;\ -IDTVEC(resume_/**/name/**/num) \ - movl $IREENT_MAGIC,TF_ERR(%esp) ;\ - movl %ebx,%esi ;\ - movl CPUVAR(ISOURCES) + (num) * 4, %ebp ;\ - movl IS_MAXLEVEL(%ebp),%ebx ;\ - jmp 1f ;\ -IDTVEC(intr_/**/name/**/num) ;\ - pushl $0 /* dummy error code */ ;\ - pushl $T_ASTFLT /* trap # for doing ASTs */ ;\ - INTRENTRY ;\ - movl CPUVAR(ISOURCES) + (num) * 4, %ebp ;\ - mask(num) /* mask it in hardware */ ;\ - early_ack(num) /* and allow other intrs */ ;\ - testl %ebp,%ebp ;\ - jz 9f /* stray */ ;\ - movl IS_MAXLEVEL(%ebp),%ebx ;\ - movl CPUVAR(ILEVEL),%esi ;\ - cmpl %ebx,%esi ;\ - jae 10f /* currently masked; hold it */ ;\ - incl MY_COUNT+V_INTR /* statistical info */ ;\ - addl $1,IS_EVCNTLO(%ebp) /* inc event counter */ ;\ - adcl $0,IS_EVCNTHI(%ebp) ;\ -1: \ - pushl %esi ;\ - movl %ebx,CPUVAR(ILEVEL) ;\ - STI(%eax) ;\ - incl CPUVAR(IDEPTH) ;\ - movl IS_HANDLERS(%ebp),%ebx ;\ - LOCK_KERNEL ;\ -6: \ - movl IH_LEVEL(%ebx),%edi ;\ - cmpl %esi,%edi ;\ - jle 7f ;\ - pushl IH_ARG(%ebx) ;\ - movl %edi,CPUVAR(ILEVEL) ;\ - call *IH_FUN(%ebx) /* call it */ ;\ - addl $4,%esp /* toss the arg */ ;\ - movl IH_NEXT(%ebx),%ebx /* next handler in chain */ ;\ - testl %ebx,%ebx ;\ - jnz 6b ;\ -5: \ - UNLOCK_KERNEL ;\ - CLI(%eax) ;\ - unmask(num) /* unmask it in hardware */ ;\ - late_ack(num) ;\ - STI(%eax) ;\ - jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\ -7: \ - UNLOCK_KERNEL ;\ - CLI(%eax) ;\ - orl $(1 << num),CPUVAR(IPENDING) ;\ - level_mask(num) ;\ - late_ack(num) ;\ - STI(%eax) ;\ - jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\ -10: \ - CLI(%eax) ;\ - orl $(1 << num),CPUVAR(IPENDING) ;\ - level_mask(num) ;\ - late_ack(num) ;\ - STIC(%eax) ;\ - jz 4f ; \ - call _C_LABEL(stipending) ; \ - testl %eax,%eax ; \ - jnz 1b ; \ -4: INTRFASTEXIT ;\ -9: \ - unmask(num) ;\ - late_ack(num) ;\ - STIC(%eax) ;\ - jz 4f ; \ - call _C_LABEL(stipending) ; \ - testl %eax,%eax ; \ - jnz 1b ; \ -4: INTRFASTEXIT - -#define ICUADDR IO_ICU1 - -INTRSTUB(legacy,0,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,1,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,2,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,3,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,4,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,5,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,6,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,7,i8259_asm_ack1,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -#undef ICUADDR -#define ICUADDR IO_ICU2 - -INTRSTUB(legacy,8,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,9,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,10,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,11,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,12,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,13,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,14,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -INTRSTUB(legacy,15,i8259_asm_ack2,voidop,i8259_asm_mask,i8259_asm_unmask, - voidop) -#endif - -#if NIOAPIC > 0 - -INTRSTUB(ioapic_edge,0,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,1,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,2,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,3,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,4,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,5,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,6,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,7,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,8,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,9,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,10,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,11,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,12,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,13,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,14,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,15,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,16,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,17,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,18,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,19,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,20,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,21,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,22,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,23,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,24,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,25,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,26,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,27,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,28,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,29,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,30,voidop,ioapic_asm_ack,voidop,voidop,voidop) -INTRSTUB(ioapic_edge,31,voidop,ioapic_asm_ack,voidop,voidop,voidop) - -INTRSTUB(ioapic_level,0,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,1,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,2,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,3,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,4,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,5,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,6,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,7,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,8,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,9,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,10,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,11,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,12,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,13,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,14,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,15,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,16,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,17,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,18,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,19,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,20,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,21,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,22,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,23,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,24,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,25,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,26,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,27,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,28,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,29,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,30,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) -INTRSTUB(ioapic_level,31,voidop,ioapic_asm_ack,voidop,ioapic_unmask,ioapic_mask) - -#endif - -#ifndef XEN -.globl _C_LABEL(i8259_stubs) -_C_LABEL(i8259_stubs): - .long _C_LABEL(Xintr_legacy0), _C_LABEL(Xrecurse_legacy0) - .long _C_LABEL(Xresume_legacy0) - .long _C_LABEL(Xintr_legacy1), _C_LABEL(Xrecurse_legacy1) - .long _C_LABEL(Xresume_legacy1) - .long _C_LABEL(Xintr_legacy2), _C_LABEL(Xrecurse_legacy2) - .long _C_LABEL(Xresume_legacy2) - .long _C_LABEL(Xintr_legacy3), _C_LABEL(Xrecurse_legacy3) - .long _C_LABEL(Xresume_legacy3) - .long _C_LABEL(Xintr_legacy4), _C_LABEL(Xrecurse_legacy4) - .long _C_LABEL(Xresume_legacy4) - .long _C_LABEL(Xintr_legacy5), _C_LABEL(Xrecurse_legacy5) - .long _C_LABEL(Xresume_legacy5) - .long _C_LABEL(Xintr_legacy6), _C_LABEL(Xrecurse_legacy6) - .long _C_LABEL(Xresume_legacy6) - .long _C_LABEL(Xintr_legacy7), _C_LABEL(Xrecurse_legacy7) - .long _C_LABEL(Xresume_legacy7) - .long _C_LABEL(Xintr_legacy8), _C_LABEL(Xrecurse_legacy8) - .long _C_LABEL(Xresume_legacy8) - .long _C_LABEL(Xintr_legacy9), _C_LABEL(Xrecurse_legacy9) - .long _C_LABEL(Xresume_legacy9) - .long _C_LABEL(Xintr_legacy10), _C_LABEL(Xrecurse_legacy10) - .long _C_LABEL(Xresume_legacy10) - .long _C_LABEL(Xintr_legacy11), _C_LABEL(Xrecurse_legacy11) - .long _C_LABEL(Xresume_legacy11) - .long _C_LABEL(Xintr_legacy12), _C_LABEL(Xrecurse_legacy12) - .long _C_LABEL(Xresume_legacy12) - .long _C_LABEL(Xintr_legacy13), _C_LABEL(Xrecurse_legacy13) - .long _C_LABEL(Xresume_legacy13) - .long _C_LABEL(Xintr_legacy14), _C_LABEL(Xrecurse_legacy14) - .long _C_LABEL(Xresume_legacy14) - .long _C_LABEL(Xintr_legacy15), _C_LABEL(Xrecurse_legacy15) - .long _C_LABEL(Xresume_legacy15) -#endif - -#if NIOAPIC > 0 -.globl _C_LABEL(ioapic_edge_stubs) -_C_LABEL(ioapic_edge_stubs): - .long _C_LABEL(Xintr_ioapic_edge0), _C_LABEL(Xrecurse_ioapic_edge0) - .long _C_LABEL(Xresume_ioapic_edge0) - .long _C_LABEL(Xintr_ioapic_edge1), _C_LABEL(Xrecurse_ioapic_edge1) - .long _C_LABEL(Xresume_ioapic_edge1) - .long _C_LABEL(Xintr_ioapic_edge2), _C_LABEL(Xrecurse_ioapic_edge2) - .long _C_LABEL(Xresume_ioapic_edge2) - .long _C_LABEL(Xintr_ioapic_edge3), _C_LABEL(Xrecurse_ioapic_edge3) - .long _C_LABEL(Xresume_ioapic_edge3) - .long _C_LABEL(Xintr_ioapic_edge4), _C_LABEL(Xrecurse_ioapic_edge4) - .long _C_LABEL(Xresume_ioapic_edge4) - .long _C_LABEL(Xintr_ioapic_edge5), _C_LABEL(Xrecurse_ioapic_edge5) - .long _C_LABEL(Xresume_ioapic_edge5) - .long _C_LABEL(Xintr_ioapic_edge6), _C_LABEL(Xrecurse_ioapic_edge6) - .long _C_LABEL(Xresume_ioapic_edge6) - .long _C_LABEL(Xintr_ioapic_edge7), _C_LABEL(Xrecurse_ioapic_edge7) - .long _C_LABEL(Xresume_ioapic_edge7) - .long _C_LABEL(Xintr_ioapic_edge8), _C_LABEL(Xrecurse_ioapic_edge8) - .long _C_LABEL(Xresume_ioapic_edge8) - .long _C_LABEL(Xintr_ioapic_edge9), _C_LABEL(Xrecurse_ioapic_edge9) - .long _C_LABEL(Xresume_ioapic_edge9) - .long _C_LABEL(Xintr_ioapic_edge10), _C_LABEL(Xrecurse_ioapic_edge10) - .long _C_LABEL(Xresume_ioapic_edge10) - .long _C_LABEL(Xintr_ioapic_edge11), _C_LABEL(Xrecurse_ioapic_edge11) - .long _C_LABEL(Xresume_ioapic_edge11) - .long _C_LABEL(Xintr_ioapic_edge12), _C_LABEL(Xrecurse_ioapic_edge12) - .long _C_LABEL(Xresume_ioapic_edge12) - .long _C_LABEL(Xintr_ioapic_edge13), _C_LABEL(Xrecurse_ioapic_edge13) - .long _C_LABEL(Xresume_ioapic_edge13) - .long _C_LABEL(Xintr_ioapic_edge14), _C_LABEL(Xrecurse_ioapic_edge14) - .long _C_LABEL(Xresume_ioapic_edge14) - .long _C_LABEL(Xintr_ioapic_edge15), _C_LABEL(Xrecurse_ioapic_edge15) - .long _C_LABEL(Xresume_ioapic_edge15) - .long _C_LABEL(Xintr_ioapic_edge16), _C_LABEL(Xrecurse_ioapic_edge16) - .long _C_LABEL(Xresume_ioapic_edge16) - .long _C_LABEL(Xintr_ioapic_edge17), _C_LABEL(Xrecurse_ioapic_edge17) - .long _C_LABEL(Xresume_ioapic_edge17) - .long _C_LABEL(Xintr_ioapic_edge18), _C_LABEL(Xrecurse_ioapic_edge18) - .long _C_LABEL(Xresume_ioapic_edge18) - .long _C_LABEL(Xintr_ioapic_edge19), _C_LABEL(Xrecurse_ioapic_edge19) - .long _C_LABEL(Xresume_ioapic_edge19) - .long _C_LABEL(Xintr_ioapic_edge20), _C_LABEL(Xrecurse_ioapic_edge20) - .long _C_LABEL(Xresume_ioapic_edge20) - .long _C_LABEL(Xintr_ioapic_edge21), _C_LABEL(Xrecurse_ioapic_edge21) - .long _C_LABEL(Xresume_ioapic_edge21) - .long _C_LABEL(Xintr_ioapic_edge22), _C_LABEL(Xrecurse_ioapic_edge22) - .long _C_LABEL(Xresume_ioapic_edge22) - .long _C_LABEL(Xintr_ioapic_edge23), _C_LABEL(Xrecurse_ioapic_edge23) - .long _C_LABEL(Xresume_ioapic_edge23) - .long _C_LABEL(Xintr_ioapic_edge24), _C_LABEL(Xrecurse_ioapic_edge24) - .long _C_LABEL(Xresume_ioapic_edge24) - .long _C_LABEL(Xintr_ioapic_edge25), _C_LABEL(Xrecurse_ioapic_edge25) - .long _C_LABEL(Xresume_ioapic_edge25) - .long _C_LABEL(Xintr_ioapic_edge26), _C_LABEL(Xrecurse_ioapic_edge26) - .long _C_LABEL(Xresume_ioapic_edge26) - .long _C_LABEL(Xintr_ioapic_edge27), _C_LABEL(Xrecurse_ioapic_edge27) - .long _C_LABEL(Xresume_ioapic_edge27) - .long _C_LABEL(Xintr_ioapic_edge28), _C_LABEL(Xrecurse_ioapic_edge28) - .long _C_LABEL(Xresume_ioapic_edge28) - .long _C_LABEL(Xintr_ioapic_edge29), _C_LABEL(Xrecurse_ioapic_edge29) - .long _C_LABEL(Xresume_ioapic_edge29) - .long _C_LABEL(Xintr_ioapic_edge30), _C_LABEL(Xrecurse_ioapic_edge30) - .long _C_LABEL(Xresume_ioapic_edge30) - .long _C_LABEL(Xintr_ioapic_edge31), _C_LABEL(Xrecurse_ioapic_edge31) - .long _C_LABEL(Xresume_ioapic_edge31) - -.globl _C_LABEL(ioapic_level_stubs) -_C_LABEL(ioapic_level_stubs): - .long _C_LABEL(Xintr_ioapic_level0), _C_LABEL(Xrecurse_ioapic_level0) - .long _C_LABEL(Xresume_ioapic_level0) - .long _C_LABEL(Xintr_ioapic_level1), _C_LABEL(Xrecurse_ioapic_level1) - .long _C_LABEL(Xresume_ioapic_level1) - .long _C_LABEL(Xintr_ioapic_level2), _C_LABEL(Xrecurse_ioapic_level2) - .long _C_LABEL(Xresume_ioapic_level2) - .long _C_LABEL(Xintr_ioapic_level3), _C_LABEL(Xrecurse_ioapic_level3) - .long _C_LABEL(Xresume_ioapic_level3) - .long _C_LABEL(Xintr_ioapic_level4), _C_LABEL(Xrecurse_ioapic_level4) - .long _C_LABEL(Xresume_ioapic_level4) - .long _C_LABEL(Xintr_ioapic_level5), _C_LABEL(Xrecurse_ioapic_level5) - .long _C_LABEL(Xresume_ioapic_level5) - .long _C_LABEL(Xintr_ioapic_level6), _C_LABEL(Xrecurse_ioapic_level6) - .long _C_LABEL(Xresume_ioapic_level6) - .long _C_LABEL(Xintr_ioapic_level7), _C_LABEL(Xrecurse_ioapic_level7) - .long _C_LABEL(Xresume_ioapic_level7) - .long _C_LABEL(Xintr_ioapic_level8), _C_LABEL(Xrecurse_ioapic_level8) - .long _C_LABEL(Xresume_ioapic_level8) - .long _C_LABEL(Xintr_ioapic_level9), _C_LABEL(Xrecurse_ioapic_level9) - .long _C_LABEL(Xresume_ioapic_level9) - .long _C_LABEL(Xintr_ioapic_level10), _C_LABEL(Xrecurse_ioapic_level10) - .long _C_LABEL(Xresume_ioapic_level10) - .long _C_LABEL(Xintr_ioapic_level11), _C_LABEL(Xrecurse_ioapic_level11) - .long _C_LABEL(Xresume_ioapic_level11) - .long _C_LABEL(Xintr_ioapic_level12), _C_LABEL(Xrecurse_ioapic_level12) - .long _C_LABEL(Xresume_ioapic_level12) - .long _C_LABEL(Xintr_ioapic_level13), _C_LABEL(Xrecurse_ioapic_level13) - .long _C_LABEL(Xresume_ioapic_level13) - .long _C_LABEL(Xintr_ioapic_level14), _C_LABEL(Xrecurse_ioapic_level14) - .long _C_LABEL(Xresume_ioapic_level14) - .long _C_LABEL(Xintr_ioapic_level15), _C_LABEL(Xrecurse_ioapic_level15) - .long _C_LABEL(Xresume_ioapic_level15) - .long _C_LABEL(Xintr_ioapic_level16), _C_LABEL(Xrecurse_ioapic_level16) - .long _C_LABEL(Xresume_ioapic_level16) - .long _C_LABEL(Xintr_ioapic_level17), _C_LABEL(Xrecurse_ioapic_level17) - .long _C_LABEL(Xresume_ioapic_level17) - .long _C_LABEL(Xintr_ioapic_level18), _C_LABEL(Xrecurse_ioapic_level18) - .long _C_LABEL(Xresume_ioapic_level18) - .long _C_LABEL(Xintr_ioapic_level19), _C_LABEL(Xrecurse_ioapic_level19) - .long _C_LABEL(Xresume_ioapic_level19) - .long _C_LABEL(Xintr_ioapic_level20), _C_LABEL(Xrecurse_ioapic_level20) - .long _C_LABEL(Xresume_ioapic_level20) - .long _C_LABEL(Xintr_ioapic_level21), _C_LABEL(Xrecurse_ioapic_level21) - .long _C_LABEL(Xresume_ioapic_level21) - .long _C_LABEL(Xintr_ioapic_level22), _C_LABEL(Xrecurse_ioapic_level22) - .long _C_LABEL(Xresume_ioapic_level22) - .long _C_LABEL(Xintr_ioapic_level23), _C_LABEL(Xrecurse_ioapic_level23) - .long _C_LABEL(Xresume_ioapic_level23) - .long _C_LABEL(Xintr_ioapic_level24), _C_LABEL(Xrecurse_ioapic_level24) - .long _C_LABEL(Xresume_ioapic_level24) - .long _C_LABEL(Xintr_ioapic_level25), _C_LABEL(Xrecurse_ioapic_level25) - .long _C_LABEL(Xresume_ioapic_level25) - .long _C_LABEL(Xintr_ioapic_level26), _C_LABEL(Xrecurse_ioapic_level26) - .long _C_LABEL(Xresume_ioapic_level26) - .long _C_LABEL(Xintr_ioapic_level27), _C_LABEL(Xrecurse_ioapic_level27) - .long _C_LABEL(Xresume_ioapic_level27) - .long _C_LABEL(Xintr_ioapic_level28), _C_LABEL(Xrecurse_ioapic_level28) - .long _C_LABEL(Xresume_ioapic_level28) - .long _C_LABEL(Xintr_ioapic_level29), _C_LABEL(Xrecurse_ioapic_level29) - .long _C_LABEL(Xresume_ioapic_level29) - .long _C_LABEL(Xintr_ioapic_level30), _C_LABEL(Xrecurse_ioapic_level30) - .long _C_LABEL(Xresume_ioapic_level30) - .long _C_LABEL(Xintr_ioapic_level31), _C_LABEL(Xrecurse_ioapic_level31) - .long _C_LABEL(Xresume_ioapic_level31) -#endif - -/* - * Symbols that vmstat -i wants, even though they're not used. - */ -.globl _C_LABEL(intrnames) -_C_LABEL(intrnames): -.globl _C_LABEL(eintrnames) -_C_LABEL(eintrnames): - -.globl _C_LABEL(intrcnt) -_C_LABEL(intrcnt): -.globl _C_LABEL(eintrcnt) -_C_LABEL(eintrcnt): - -/* - * Soft interrupt handlers - */ - -IDTVEC(softserial) - movl $IPL_SOFTSERIAL, CPUVAR(ILEVEL) - incl CPUVAR(IDEPTH) -#ifdef MULTIPROCESSOR - call _C_LABEL(x86_softintlock) -#endif - movl CPUVAR(ISOURCES) + SIR_SERIAL * 4, %edi - addl $1,IS_EVCNTLO(%edi) - adcl $0,IS_EVCNTHI(%edi) - pushl $X86_SOFTINTR_SOFTSERIAL - call _C_LABEL(softintr_dispatch) - addl $4,%esp -#ifdef MULTIPROCESSOR - call _C_LABEL(x86_softintunlock) -#endif - decl CPUVAR(IDEPTH) - jmp *%esi - -IDTVEC(softnet) - movl $IPL_SOFTNET, CPUVAR(ILEVEL) - incl CPUVAR(IDEPTH) -#ifdef MULTIPROCESSOR - call _C_LABEL(x86_softintlock) -#endif - movl CPUVAR(ISOURCES) + SIR_NET * 4, %edi - addl $1,IS_EVCNTLO(%edi) - adcl $0,IS_EVCNTHI(%edi) - - xorl %edi,%edi - xchgl _C_LABEL(netisr),%edi - - /* XXX Do the legacy netisrs here for now. */ -#define DONETISR(s, c) \ - .globl _C_LABEL(c) ;\ - testl $(1 << s),%edi ;\ - jz 1f ;\ - call _C_LABEL(c) ;\ -1: -#include <net/netisr_dispatch.h> - - pushl $X86_SOFTINTR_SOFTNET - call _C_LABEL(softintr_dispatch) - addl $4,%esp -#ifdef MULTIPROCESSOR - call _C_LABEL(x86_softintunlock) -#endif - decl CPUVAR(IDEPTH) - jmp *%esi - -IDTVEC(softclock) - movl $IPL_SOFTCLOCK, CPUVAR(ILEVEL) - incl CPUVAR(IDEPTH) -#ifdef MULTIPROCESSOR - call _C_LABEL(x86_softintlock) -#endif - movl CPUVAR(ISOURCES) + SIR_CLOCK * 4, %edi - addl $1,IS_EVCNTLO(%edi) - adcl $0,IS_EVCNTHI(%edi) - - pushl $X86_SOFTINTR_SOFTCLOCK - call _C_LABEL(softintr_dispatch) - addl $4,%esp -#ifdef MULTIPROCESSOR - call _C_LABEL(x86_softintunlock) -#endif - decl CPUVAR(IDEPTH) - jmp *%esi - -/* - * Trap and fault vector routines - * - * On exit from the kernel to user mode, we always need to check for ASTs. In - * addition, we need to do this atomically; otherwise an interrupt may occur - * which causes an AST, but it won't get processed until the next kernel entry - * (possibly the next clock tick). Thus, we disable interrupt before checking, - * and only enable them again on the final `iret' or before calling the AST - * handler. - */ - -#define TRAP(a) pushl $(a) ; jmp _C_LABEL(alltraps) -#define ZTRAP(a) pushl $0 ; TRAP(a) - -#ifdef IPKDB -#define BPTTRAP(a) pushl $0; pushl $(a); jmp _C_LABEL(bpttraps) -#else -#define BPTTRAP(a) ZTRAP(a) -#endif - - - .text -IDTVEC(trap00) - ZTRAP(T_DIVIDE) -IDTVEC(trap01) - BPTTRAP(T_TRCTRAP) -IDTVEC(trap02) - ZTRAP(T_NMI) -IDTVEC(trap03) - BPTTRAP(T_BPTFLT) -IDTVEC(trap04) - ZTRAP(T_OFLOW) -IDTVEC(trap05) - ZTRAP(T_BOUND) -IDTVEC(trap06) - ZTRAP(T_PRIVINFLT) -IDTVEC(trap07) -#if NNPX > 0 - pushl $0 # dummy error code - pushl $T_DNA - INTRENTRY -#ifdef XENDEBUG_LOW - pushl %esp -#endif - pushl CPUVAR(SELF) - call *_C_LABEL(npxdna_func) - addl $4,%esp -#ifdef XENDEBUG_LOW - addl $4,%esp -#endif - testl %eax,%eax - jz calltrap - INTRFASTEXIT -#else - ZTRAP(T_DNA) -#endif -IDTVEC(trap08) - TRAP(T_DOUBLEFLT) -IDTVEC(trap09) - ZTRAP(T_FPOPFLT) -IDTVEC(trap0a) - TRAP(T_TSSFLT) -IDTVEC(trap0b) - TRAP(T_SEGNPFLT) -IDTVEC(trap0c) - TRAP(T_STKFLT) -IDTVEC(trap0d) - TRAP(T_PROTFLT) -#ifndef XEN -IDTVEC(trap0e) -#ifndef I586_CPU - TRAP(T_PAGEFLT) -#else - pushl $T_PAGEFLT - INTRENTRY - testb $PGEX_U,TF_ERR(%esp) - jnz calltrap - movl %cr2,%eax - subl _C_LABEL(pentium_idt),%eax - cmpl $(6*8),%eax - jne calltrap - movb $T_PRIVINFLT,TF_TRAPNO(%esp) - jmp calltrap -#endif -#endif - -IDTVEC(intrspurious) -IDTVEC(trap0f) - /* - * The Pentium Pro local APIC may erroneously call this vector for a - * default IR7. Just ignore it. - * - * (The local APIC does this when CPL is raised while it's on the - * way to delivering an interrupt.. presumably enough has been set - * up that it's inconvenient to abort delivery completely..) - */ - iret - -IDTVEC(trap10) -#if NNPX > 0 - /* - * Handle like an interrupt so that we can call npxintr to clear the - * error. It would be better to handle npx interrupts as traps but - * this is difficult for nested interrupts. - */ - pushl $0 # dummy error code - pushl $T_ASTFLT - INTRENTRY - pushl CPUVAR(ILEVEL) - pushl %esp - incl _C_LABEL(uvmexp)+V_TRAP - call _C_LABEL(npxintr) - addl $8,%esp - INTRFASTEXIT -#else - ZTRAP(T_ARITHTRAP) -#endif -IDTVEC(trap11) - TRAP(T_ALIGNFLT) -IDTVEC(trap12) -IDTVEC(trap13) -IDTVEC(trap14) -IDTVEC(trap15) -IDTVEC(trap16) -IDTVEC(trap17) -IDTVEC(trap18) -IDTVEC(trap19) -IDTVEC(trap1a) -IDTVEC(trap1b) -IDTVEC(trap1c) -IDTVEC(trap1d) -IDTVEC(trap1e) -IDTVEC(trap1f) - /* 18 - 31 reserved for future exp */ - ZTRAP(T_RESERVED) - -IDTVEC(exceptions) -#ifndef XENDEBUG_LOW - .long _C_LABEL(Xtrap00), _C_LABEL(Xtrap01) - .long _C_LABEL(Xtrap02), _C_LABEL(Xtrap03) - .long _C_LABEL(Xtrap04), _C_LABEL(Xtrap05) - .long _C_LABEL(Xtrap06), _C_LABEL(Xtrap07) - .long _C_LABEL(Xtrap08), _C_LABEL(Xtrap09) - .long _C_LABEL(Xtrap0a), _C_LABEL(Xtrap0b) - .long _C_LABEL(Xtrap0c), _C_LABEL(Xtrap0d) - .long _C_LABEL(Xtrap0e), _C_LABEL(Xtrap0f) - .long _C_LABEL(Xtrap10), _C_LABEL(Xtrap11) - .long _C_LABEL(Xtrap12), _C_LABEL(Xtrap13) - .long _C_LABEL(Xtrap14), _C_LABEL(Xtrap15) - .long _C_LABEL(Xtrap16), _C_LABEL(Xtrap17) - .long _C_LABEL(Xtrap18), _C_LABEL(Xtrap19) - .long _C_LABEL(Xtrap1a), _C_LABEL(Xtrap1b) - .long _C_LABEL(Xtrap1c), _C_LABEL(Xtrap1d) - .long _C_LABEL(Xtrap1e), _C_LABEL(Xtrap1f) -#else - .long _C_LABEL(divide_error), _C_LABEL(debug) - .long _C_LABEL(Xtrap02), _C_LABEL(Xtrap03) #int3) - .long _C_LABEL(overflow), _C_LABEL(bounds) - .long _C_LABEL(invalid_op), _C_LABEL(device_not_available) - .long _C_LABEL(double_fault), _C_LABEL(coprocessor_segment_overrun) - .long _C_LABEL(invalid_TSS), _C_LABEL(segment_not_present) - .long _C_LABEL(stack_segment) - #.long _C_LABEL(general_protection) - .long _C_LABEL(Xtrap0d) - #.long _C_LABEL(page_fault) - .long _C_LABEL(Xtrap0e) - .long _C_LABEL(spurious_interrupt_bug) - .long _C_LABEL(coprocessor_error), _C_LABEL(alignment_check) - .long _C_LABEL(machine_check), _C_LABEL(simd_coprocessor_error) - .long _C_LABEL(Xtrap14), _C_LABEL(Xtrap15) - .long _C_LABEL(Xtrap16), _C_LABEL(Xtrap17) - .long _C_LABEL(Xtrap18), _C_LABEL(Xtrap19) - .long _C_LABEL(Xtrap1a), _C_LABEL(Xtrap1b) - .long _C_LABEL(Xtrap1c), _C_LABEL(Xtrap1d) - .long _C_LABEL(Xtrap1e), _C_LABEL(Xtrap1f) -#endif - - -IDTVEC(tss_trap08) -1: - str %ax - GET_TSS - movzwl (%eax),%eax - GET_TSS - pushl $T_DOUBLEFLT - pushl %eax - call _C_LABEL(trap_tss) - addl $12,%esp - iret - jmp 1b - -/* LINTSTUB: Ignore */ -NENTRY(alltraps) - INTRENTRY -calltrap: -#ifdef DIAGNOSTIC - movl CPUVAR(ILEVEL),%ebx -#endif /* DIAGNOSTIC */ - pushl %esp - call _C_LABEL(trap) - addl $4,%esp - testb $CHK_UPL,TF_CS(%esp) - jnz alltraps_checkast -#ifdef VM86 - testl $PSL_VM,TF_EFLAGS(%esp) - jz 6f -#else - jmp 6f -#endif -alltraps_checkast: - /* Check for ASTs on exit to user mode. */ - CLI(%eax) - CHECK_ASTPENDING(%eax) - jz 3f -5: CLEAR_ASTPENDING(%eax) - STI(%eax) - movl $T_ASTFLT,TF_TRAPNO(%esp) - pushl %esp - call _C_LABEL(trap) - addl $4,%esp - jmp alltraps_checkast /* re-check ASTs */ -3: CHECK_DEFERRED_SWITCH(%eax) - jnz 9f -6: STIC(%eax) - jz 4f - call _C_LABEL(stipending) - #testl %eax,%eax /* XXXcl */ - #jnz 1b -4: -#ifndef DIAGNOSTIC - INTRFASTEXIT -#else - cmpl CPUVAR(ILEVEL),%ebx - jne 3f - INTRFASTEXIT -3: pushl $4f - call _C_LABEL(printf) - addl $4,%esp -#ifdef DDB - int $3 -#endif /* DDB */ - movl %ebx,CPUVAR(ILEVEL) - jmp alltraps_checkast /* re-check ASTs */ -4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n" -#endif /* DIAGNOSTIC */ -9: STI(%eax) - call _C_LABEL(pmap_load) - jmp alltraps_checkast /* re-check ASTs */ - -/* LINTSTUB: Ignore */ -IDTVEC(trap0e) - INTRENTRY - movl TF_TRAPNO(%esp),%eax - movl $T_PAGEFLT,TF_TRAPNO(%esp) -#ifdef DIAGNOSTIC - movl CPUVAR(ILEVEL),%ebx -#endif /* DIAGNOSTIC */ - #pushl %esp - pushl %eax - movl %esp,%eax - addl $4,%eax - pushl %eax - call _C_LABEL(trap) - addl $4,%esp - addl $4,%esp - testb $CHK_UPL,TF_CS(%esp) - jnz trap0e_checkast -#ifdef VM86 - testl $PSL_VM,TF_EFLAGS(%esp) - jz 6f -#else - jmp 6f -#endif -trap0e_checkast: - /* Check for ASTs on exit to user mode. */ - CLI(%eax) - CHECK_ASTPENDING(%eax) - jz 3f -5: CLEAR_ASTPENDING(%eax) - STI(%eax) - movl $T_ASTFLT,TF_TRAPNO(%esp) - pushl %esp - call _C_LABEL(trap) - addl $4,%esp - jmp trap0e_checkast /* re-check ASTs */ -3: CHECK_DEFERRED_SWITCH(%eax) - jnz 9f -6: STIC(%eax) - jz 4f - call _C_LABEL(stipending) - #testl %eax,%eax /* XXXcl */ - #jnz 1b -4: -#ifndef DIAGNOSTIC - INTRFASTEXIT -#else - cmpl CPUVAR(ILEVEL),%ebx - jne 3f - INTRFASTEXIT -3: pushl $4f - call _C_LABEL(printf) - addl $4,%esp -#ifdef DDB - int $3 -#endif /* DDB */ - movl %ebx,CPUVAR(ILEVEL) - jmp trap0e_checkast /* re-check ASTs */ -4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n" -#endif /* DIAGNOSTIC */ -9: STI(%eax) - call _C_LABEL(pmap_load) - jmp trap0e_checkast /* re-check ASTs */ - -#ifdef IPKDB -/* LINTSTUB: Ignore */ -NENTRY(bpttraps) - INTRENTRY - call _C_LABEL(ipkdb_trap_glue) - testl %eax,%eax - jz calltrap - INTRFASTEXIT - -ipkdbsetup: - popl %ecx - - /* Disable write protection: */ - movl %cr0,%eax - pushl %eax - andl $~CR0_WP,%eax - movl %eax,%cr0 - - /* Substitute Protection & Page Fault handlers: */ - movl _C_LABEL(idt),%edx - pushl 13*8(%edx) - pushl 13*8+4(%edx) - pushl 14*8(%edx) - pushl 14*8+4(%edx) - movl $fault,%eax - movw %ax,13*8(%edx) - movw %ax,14*8(%edx) - shrl $16,%eax - movw %ax,13*8+6(%edx) - movw %ax,14*8+6(%edx) - - pushl %ecx - ret - -ipkdbrestore: - popl %ecx - - /* Restore Protection & Page Fault handlers: */ - movl _C_LABEL(idt),%edx - popl 14*8+4(%edx) - popl 14*8(%edx) - popl 13*8+4(%edx) - popl 13*8(%edx) - - /* Restore write protection: */ - popl %edx - movl %edx,%cr0 - - pushl %ecx - ret -#endif /* IPKDB */ - - -/* - * If an error is detected during trap, syscall, or interrupt exit, trap() will - * change %eip to point to one of these labels. We clean up the stack, if - * necessary, and resume as if we were handling a general protection fault. - * This will cause the process to get a SIGBUS. - */ -/* LINTSTUB: Var: char resume_iret[1]; */ -NENTRY(resume_iret) - ZTRAP(T_PROTFLT) -/* LINTSTUB: Var: char resume_pop_ds[1]; */ -NENTRY(resume_pop_ds) - movl %es,TF_ES(%esp) - movl $GSEL(GDATA_SEL, SEL_KPL),%eax - movw %ax,%es -/* LINTSTUB: Var: char resume_pop_es[1]; */ -NENTRY(resume_pop_es) - movl %fs,TF_FS(%esp) - movl $GSEL(GDATA_SEL, SEL_KPL),%eax - movw %ax,%fs -/* LINTSTUB: Var: char resume_pop_fs[1]; */ -NENTRY(resume_pop_fs) - movl %gs,TF_GS(%esp) - movl $GSEL(GDATA_SEL, SEL_KPL),%eax - movw %ax,%gs -/* LINTSTUB: Var: char resume_pop_gs[1]; */ -NENTRY(resume_pop_gs) - movl $T_PROTFLT,TF_TRAPNO(%esp) - jmp calltrap - -#ifdef IPKDB -/* LINTSTUB: Func: int ipkdbfbyte(u_char *c) */ -NENTRY(ipkdbfbyte) - pushl %ebp - movl %esp,%ebp - call ipkdbsetup - movl 8(%ebp),%edx - movzbl (%edx),%eax -faultexit: - call ipkdbrestore - popl %ebp - ret - -/* LINTSTUB: Func: int ipkdbsbyte(u_char *c, int i) */ -NENTRY(ipkdbsbyte) - pushl %ebp - movl %esp,%ebp - call ipkdbsetup - movl 8(%ebp),%edx - movl 12(%ebp),%eax - movb %al,(%edx) - call ipkdbrestore - popl %ebp - ret - -fault: - popl %eax /* error code */ - movl $faultexit,%eax - movl %eax,(%esp) - movl $-1,%eax - iret -#endif /* IPKDB */ - - - -# A note on the "critical region" in our callback handler. -# We want to avoid stacking callback handlers due to events occurring -# during handling of the last event. To do this, we keep events disabled -# until weve done all processing. HOWEVER, we must enable events before -# popping the stack frame (cant be done atomically) and so it would still -# be possible to get enough handler activations to overflow the stack. -# Although unlikely, bugs of that kind are hard to track down, so wed -# like to avoid the possibility. -# So, on entry to the handler we detect whether we interrupted an -# existing activation in its critical region -- if so, we pop the current -# activation and restart the handler using the previous one. -ENTRY(hypervisor_callback) - pushl $0 # dummy error code - pushl $T_ASTFLT - INTRENTRY - movl TF_EIP(%esp),%eax - cmpl $scrit,%eax - jb 11f - cmpl $ecrit,%eax - jb critical_region_fixup -11: pushl CPUVAR(ILEVEL) - push %esp - call do_hypervisor_callback - add $8,%esp - movl HYPERVISOR_shared_info,%esi - xorl %eax,%eax - movb TF_CS(%esp),%cl - test $CHK_UPL,%cl # slow return to ring 2 or 3 - je safesti - movl CPUVAR(ILEVEL),%ebx - jmp doreti_checkast -safesti:XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks -scrit: /**** START OF CRITICAL REGION ****/ - testb $1,evtchn_upcall_pending(%esi) - jnz 14f # process more events if necessary... - INTRFASTEXIT -critiret: -14: XEN_BLOCK_EVENTS(%esi) - jmp 11b -ecrit: /**** END OF CRITICAL REGION ****/ -# [How we do the fixup]. We want to merge the current stack frame with the -# just-interrupted frame. How we do this depends on where in the critical -# region the interrupted handler was executing, and so how many saved -# registers are in each frame. We do this quickly using the lookup table -# 'critical_fixup_table'. For each byte offset in the critical region, it -# provides the number of bytes which have already been popped from the -# interrupted stack frame. -critical_region_fixup: - cmpl $(critiret-1),%eax # eip points to iret? - jne 1f - movl $(TF_PUSHSIZE+0x8),%eax - jmp 2f -1: xorl %eax,%eax -2: - # %eax contains num bytes popped - mov %esp,%esi - add %eax,%esi # %esi points at end of src region - mov %esp,%edi - add $(TF_PUSHSIZE+0x8+0xC),%edi # %edi points at end of dst region - mov %eax,%ecx - shr $2,%ecx # convert words to bytes - je 16f # skip loop if nothing to copy -15: subl $4,%esi # pre-decrementing copy loop - subl $4,%edi - movl (%esi),%eax - movl %eax,(%edi) - loop 15b -16: movl %edi,%esp # final %edi is top of merged stack - jmp 11b - - -# Hypervisor uses this for application faults while it executes. -ENTRY(failsafe_callback) - pop %ds - pop %es - pop %fs - pop %gs - call _C_LABEL(xen_failsafe_handler) - iret - -#ifdef XENDEBUG_LOW - -ES = 0x20 -ORIG_EAX = 0x24 -EIP = 0x28 -CS = 0x2C - -#define SAVE_ALL \ - cld; \ - pushl %es; \ - pushl %ds; \ - pushl %eax; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ - pushl %ebx; \ - movl $GSEL(GDATA_SEL, SEL_KPL),%edx; \ - movl %edx,%ds; \ - movl %edx,%es; - -#define RESTORE_ALL \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ - popl %esi; \ - popl %edi; \ - popl %ebp; \ - popl %eax; \ - popl %ds; \ - popl %es; \ - addl $4,%esp; \ - iret; \ - -ret_from_exception: - movb CS(%esp),%cl - test $2,%cl # slow return to ring 2 or 3 - jne safesti - RESTORE_ALL - - -ENTRY(divide_error) - pushl $0 # no error code - pushl $do_divide_error -do_exception: - pushl %ds - pushl %eax - xorl %eax,%eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - decl %eax # eax = -1 - pushl %ecx - pushl %ebx - cld - movl %es,%ecx - movl ORIG_EAX(%esp), %esi # get the error code - movl ES(%esp), %edi # get the function address - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) - movl %esp,%edx - pushl %esi # push the error code - pushl %edx # push the pt_regs pointer - movl $(__KERNEL_DS),%edx - movl %edx,%ds - movl %edx,%es - call *%edi - addl $8,%esp - jmp ret_from_exception - -ENTRY(coprocessor_error) - pushl $0 - pushl $do_coprocessor_error - jmp do_exception - -ENTRY(simd_coprocessor_error) - pushl $0 - pushl $do_simd_coprocessor_error - jmp do_exception - -ENTRY(device_not_available) - iret - -ENTRY(debug) - pushl $0 - pushl $do_debug - jmp do_exception - -ENTRY(int3) - pushl $0 - pushl $do_int3 - jmp do_exception - -ENTRY(overflow) - pushl $0 - pushl $do_overflow - jmp do_exception - -ENTRY(bounds) - pushl $0 - pushl $do_bounds - jmp do_exception - -ENTRY(invalid_op) - pushl $0 - pushl $do_invalid_op - jmp do_exception - -ENTRY(coprocessor_segment_overrun) - pushl $0 - pushl $do_coprocessor_segment_overrun - jmp do_exception - -ENTRY(double_fault) - pushl $do_double_fault - jmp do_exception - -ENTRY(invalid_TSS) - pushl $do_invalid_TSS - jmp do_exception - -ENTRY(segment_not_present) - pushl $do_segment_not_present - jmp do_exception - -ENTRY(stack_segment) - pushl $do_stack_segment - jmp do_exception - -ENTRY(general_protection) - pushl $do_general_protection - jmp do_exception - -ENTRY(alignment_check) - pushl $do_alignment_check - jmp do_exception - -# This handler is special, because it gets an extra value on its stack, -# which is the linear faulting address. -ENTRY(page_fault) - pushl %ds - pushl %eax - xorl %eax,%eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - decl %eax # eax = -1 - pushl %ecx - pushl %ebx - cld - movl %es,%ecx - movl ORIG_EAX(%esp), %esi # get the error code - movl ES(%esp), %edi # get the faulting address - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) - movl %esp,%edx - pushl %edi # push the faulting address - pushl %esi # push the error code - pushl %edx # push the pt_regs pointer - movl $(__KERNEL_DS),%edx - movl %edx,%ds - movl %edx,%es - call do_page_fault - addl $12,%esp - jmp ret_from_exception - -ENTRY(machine_check) - pushl $0 - pushl $do_machine_check - jmp do_exception - -ENTRY(spurious_interrupt_bug) - pushl $0 - pushl $do_spurious_interrupt_bug - jmp do_exception -#endif diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/i386/xen_machdep.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/xen_machdep.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,680 +0,0 @@ -/* $NetBSD: xen_machdep.c,v 1.1.2.1 2004/05/22 15:57:33 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.1.2.1 2004/05/22 15:57:33 he Exp $"); - -#include "opt_xen.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/mount.h> - -#include <uvm/uvm.h> - -#include <machine/gdt.h> -#include <machine/xenfunc.h> -#include <machine/xenpmap.h> - -/* #define XENDEBUG */ -/* #define XENDEBUG_LOW */ - -#ifdef XENDEBUG -#define XENPRINTF(x) printf x -#define XENPRINTK(x) printk x -#define XENPRINTK2(x) /* printk x */ - -static char XBUF[256]; -#else -#define XENPRINTF(x) -#define XENPRINTK(x) -#define XENPRINTK2(x) -#endif -void printk(char *, ...); -#define PRINTF(x) printf x -#define PRINTK(x) printk x - -shared_info_t *HYPERVISOR_shared_info; -union start_info_union start_info_union; - -void xen_failsafe_handler(void); - -void -xen_failsafe_handler(void) -{ - - panic("xen_failsafe_handler called!\n"); -} - - -void -xen_update_descriptor(union descriptor *table, union descriptor *entry) -{ - paddr_t pa; - pt_entry_t *ptp; - - ptp = kvtopte((vaddr_t)table); - pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME); - if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1])) - panic("HYPERVISOR_update_descriptor failed\n"); -} - -void -xen_set_ldt(vaddr_t base, uint32_t entries) -{ - vaddr_t va; - pt_entry_t *ptp, *maptp; - - for (va = base; va < base + entries * sizeof(union descriptor); - va += PAGE_SIZE) { - KASSERT(va >= VM_MIN_KERNEL_ADDRESS); - ptp = kvtopte(va); - maptp = (pt_entry_t *)vtomach((vaddr_t)ptp); - XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base, - entries, ptp, maptp)); - PTE_CLEARBITS(ptp, maptp, PG_RW); - } - PTE_UPDATES_FLUSH(); - - xpq_queue_set_ldt(base, entries); - xpq_flush_queue(); -} - -void -lgdt(struct region_descriptor *rdp) -{ - - panic("lgdt %p %08x\n", (void *)rdp->rd_base, rdp->rd_limit); -} - -void -xen_parse_cmdline(int what, union xen_cmdline_parseinfo *xcp) -{ - char *cmd_line, *opt, *s; - int b, i, ipidx = 0; - uint32_t xi_ip[5]; - - cmd_line = xen_start_info.cmd_line; - - switch (what) { - case XEN_PARSE_BOOTDEV: - xcp->xcp_bootdev[0] = 0; - break; - case XEN_PARSE_CONSOLE: - xcp->xcp_console[0] = 0; - break; - } - - while (cmd_line && *cmd_line) { - opt = cmd_line; - cmd_line = strchr(opt, ' '); - if (cmd_line) - *cmd_line = 0; - - switch (what) { - case XEN_PARSE_BOOTDEV: - if (strncasecmp(opt, "bootdev=", 8) == 0) - strncpy(xcp->xcp_bootdev, opt + 8, - sizeof(xcp->xcp_console)); - break; - - case XEN_PARSE_NETINFO: - if (xcp->xcp_netinfo.xi_root && - strncasecmp(opt, "nfsroot=", 8) == 0) - strncpy(xcp->xcp_netinfo.xi_root, opt + 8, - MNAMELEN); - - if (strncasecmp(opt, "ip=", 3) == 0) { - memset(xi_ip, 0, sizeof(xi_ip)); - opt += 3; - ipidx = 0; - while (opt && *opt) { - s = opt; - opt = strchr(opt, ':'); - if (opt) - *opt = 0; - - switch (ipidx) { - case 0: /* ip */ - case 1: /* nfs server */ - case 2: /* gw */ - case 3: /* mask */ - case 4: /* host */ - if (*s == 0) - break; - for (i = 0; i < 4; i++) { - b = strtoul(s, &s, 10); - xi_ip[ipidx] = b + 256 - * xi_ip[ipidx]; - if (*s != '.') - break; - s++; - } - if (i < 3) - xi_ip[ipidx] = 0; - break; - case 5: /* interface */ - if (!strncmp(s, "xennet", 6)) - s += 6; - else if (!strncmp(s, "eth", 3)) - s += 3; - else - break; - if (xcp->xcp_netinfo.xi_ifno - == strtoul(s, NULL, 10)) - memcpy(xcp-> - xcp_netinfo.xi_ip, - xi_ip, - sizeof(xi_ip)); - break; - } - ipidx++; - - if (opt) - *opt++ = ':'; - } - } - break; - - case XEN_PARSE_CONSOLE: - if (strncasecmp(opt, "console=", 8) == 0) - strncpy(xcp->xcp_console, opt + 8, - sizeof(xcp->xcp_console)); - break; - - } - - if (cmd_line) - *cmd_line++ = ' '; - } -} - - - - - -#define XEN_PAGE_OFFSET 0xC0100000 - -static pd_entry_t -xpmap_get_bootpde(paddr_t va) -{ - - return ((pd_entry_t *)xen_start_info.pt_base)[va >> PDSHIFT]; -} - -static pd_entry_t -xpmap_get_vbootpde(paddr_t va) -{ - pd_entry_t pde; - - pde = xpmap_get_bootpde(va); - if ((pde & PG_V) == 0) - return (pde & ~PG_FRAME); - return (pde & ~PG_FRAME) | - (xpmap_mtop(pde & PG_FRAME) + KERNBASE); -} - -static pt_entry_t * -xpmap_get_bootptep(paddr_t va) -{ - pd_entry_t pde; - - pde = xpmap_get_vbootpde(va); - if ((pde & PG_V) == 0) - return (void *)-1; - return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]); -} - -static pt_entry_t -xpmap_get_bootpte(paddr_t va) -{ - - return xpmap_get_bootptep(va)[0]; -} - -#if defined(XENDEBUG) -static void -xpmap_dump_pt(pt_entry_t *ptp, int p) -{ - pt_entry_t pte; - int j; - int bufpos; - - pte = xpmap_ptom((uint32_t)ptp - KERNBASE); - PRINTK(("%03x: %p(%p) %08x\n", p, ptp, (void *)pte, p << PDSHIFT)); - - bufpos = 0; - for (j = 0; j < PTES_PER_PTP; j++) { - if ((ptp[j] & PG_V) == 0) - continue; - pte = ptp[j] /* & PG_FRAME */; - bufpos += sprintf(XBUF + bufpos, "%x:%03x:%08x ", - p, j, pte); - if (bufpos > 70) { - int k; - sprintf(XBUF + bufpos, "\n"); - PRINTK((XBUF)); - bufpos = 0; - for (k = 0; k < 1000000; k++); - } - } - if (bufpos) { - PRINTK((XBUF)); - PRINTK(("\n")); - bufpos = 0; - } -} -#endif - -void -xpmap_init(void) -{ - pd_entry_t *xen_pdp; - pt_entry_t *ptp, *sysptp; - pt_entry_t pte; - uint32_t i, j; - int bufpos; -#if defined(XENDEBUG_LOW) - extern char kernel_text, _etext, __bss_start, end, *esym; -#endif - - xpmap_phys_to_machine_mapping = (void *)xen_start_info.mfn_list; - - xen_pdp = (pd_entry_t *)xen_start_info.pt_base; - - XENPRINTK(("text %p data %p bss %p end %p esym %p\n", &kernel_text, - &_etext, &__bss_start, &end, esym)); - XENPRINTK(("xpmap_init PTD %p nkpde %d upages %d xen_PTD %p p2m-map %p\n", - (void *)PTDpaddr, nkpde, UPAGES, xen_pdp, - xpmap_phys_to_machine_mapping)); - - bufpos = 0; - - XENPRINTK(("shared_inf %08x\n", (paddr_t)xen_start_info.shared_info)); - XENPRINTK(("c0100000: %08x\n", - xpmap_get_bootpte(0xc0100000))); - - /* Map kernel. */ - - /* Map kernel data/bss/tables. */ - - /* Map ISA I/O memory. */ - - /* Map kernel PDEs. */ - - /* Install a PDE recursively mapping page directory as a page table! */ - - sysptp = (pt_entry_t *)(PTDpaddr + ((1 + UPAGES) << PAGE_SHIFT)); - - /* make xen's PDE and PTE pages read-only in our pagetable */ - for (i = 0; i < xen_start_info.nr_pt_frames; i++) { - /* mark PTE page read-only in our table */ - sysptp[((xen_start_info.pt_base + - (i << PAGE_SHIFT) - KERNBASE_LOCORE) & - (PD_MASK | PT_MASK)) >> PAGE_SHIFT] &= ~PG_RW; - } - - xpq_flush_queue(); - - for (i = 0; i < 1 + UPAGES + nkpde; i++) { - /* mark PTE page read-only in xen's table */ - ptp = xpmap_get_bootptep(PTDpaddr + (i << PAGE_SHIFT)); - xpq_queue_pte_update( - (void *)xpmap_ptom((unsigned long)ptp - KERNBASE), *ptp & ~PG_RW); - XENPRINTK(("%03x: %p(%p) -> %08x\n", i, ptp, - (unsigned long)ptp - KERNTEXTOFF, *ptp)); - - /* mark PTE page read-only in our table */ - sysptp[((PTDpaddr + (i << PAGE_SHIFT) - KERNBASE_LOCORE) & - (PD_MASK | PT_MASK)) >> PAGE_SHIFT] &= ~PG_RW; - - /* update our pte's */ - ptp = (pt_entry_t *)(PTDpaddr + (i << PAGE_SHIFT)); -#if 0 - pte = xpmap_ptom((uint32_t)ptp - KERNBASE); - XENPRINTK(("%03x: %p(%p) %08x\n", i, ptp, pte, i << PDSHIFT)); -#endif - for (j = 0; j < PTES_PER_PTP; j++) { - if ((ptp[j] & PG_V) == 0) - continue; - if (ptp[j] == 0xffffffff) - ptp[j] = xen_start_info.shared_info | - (PG_V|PG_RW); - if (ptp[j] >= KERNTEXTOFF) { - pte = ptp[j]; - ptp[j] = (pte & ~PG_FRAME) | - (xpmap_get_bootpte(pte & PG_FRAME) & - PG_FRAME); - } -#if defined(XENDEBUG) && 0 - pte = ptp[j] /* & PG_FRAME */; - bufpos += sprintf(XBUF + bufpos, "%x:%03x:%08x ", - i, j, pte); - if (bufpos > 70) { - int k; - sprintf(XBUF + bufpos, "\n"); - XENPRINTK((XBUF)); - bufpos = 0; - for (k = 0; k < 1000000; k++); - } - } - if (bufpos) { - XENPRINTK((XBUF)); - bufpos = 0; -#endif - } - if (i == 0) - i = 1 + UPAGES - 1; - } - -#if 0 - for (i = 0x300; i < 0x305; i++) - if (((pt_entry_t *)xen_start_info.pt_base)[i] & PG_V) - xpmap_dump_pt((pt_entry_t *) - (xpmap_mtop(((pt_entry_t *)xen_start_info.pt_base)[i] & - PG_FRAME) + KERNBASE), i); - xpmap_dump_pt((pt_entry_t *)xen_start_info.pt_base, 0); -#endif - - XENPRINTK(("switching pdp: %p, %08lx, %p, %p, %p\n", (void *)PTDpaddr, - PTDpaddr - KERNBASE, - (void *)xpmap_ptom(PTDpaddr - KERNBASE), - (void *)xpmap_get_bootpte(PTDpaddr), - (void *)xpmap_mtop(xpmap_ptom(PTDpaddr - KERNBASE)))); - -#if defined(XENDEBUG) - xpmap_dump_pt((pt_entry_t *)PTDpaddr, 0); -#endif - - xpq_flush_queue(); - - xpq_queue_pin_table(xpmap_get_bootpte(PTDpaddr) & PG_FRAME, - XPQ_PIN_L2_TABLE); - xpq_queue_pt_switch(xpmap_get_bootpte(PTDpaddr) & PG_FRAME); - xpq_queue_unpin_table( - xpmap_get_bootpte(xen_start_info.pt_base) & PG_FRAME); - - /* make xen's PDE and PTE pages writable in our pagetable */ - for (i = 0; i < xen_start_info.nr_pt_frames; i++) { - /* mark PTE page writable in our table */ - ptp = &sysptp[((xen_start_info.pt_base + - (i << PAGE_SHIFT) - KERNBASE_LOCORE) & - (PD_MASK | PT_MASK)) >> PAGE_SHIFT]; - xpq_queue_pte_update( - (void *)xpmap_ptom((unsigned long)ptp - KERNBASE), *ptp | - PG_RW); - } - - xpq_flush_queue(); - XENPRINTK(("pt_switch done!\n")); -} - -/* - * Do a binary search to find out where physical memory ends on the - * real hardware. Xen will fail our updates if they are beyond the - * last available page (max_page in xen/common/memory.c). - */ -paddr_t -find_pmap_mem_end(vaddr_t va) -{ - mmu_update_t r; - int start, end, ok; - pt_entry_t old; - - start = xen_start_info.nr_pages; - end = HYPERVISOR_VIRT_START >> PAGE_SHIFT; - - r.ptr = (unsigned long)&PTE_BASE[x86_btop(va)]; - old = PTE_BASE[x86_btop(va)]; - - while (start + 1 < end) { - r.val = (((start + end) / 2) << PAGE_SHIFT) | PG_V; - - if (HYPERVISOR_mmu_update(&r, 1, &ok) < 0) - end = (start + end) / 2; - else - start = (start + end) / 2; - } - r.val = old; - if (HYPERVISOR_mmu_update(&r, 1, &ok) < 0) - printf("pmap_mem_end find: old update failed %08x\n", - old); - - return end << PAGE_SHIFT; -} - - -#if 0 -void xpmap_find_memory(paddr_t); -void -xpmap_find_memory(paddr_t first_avail) -{ - char buf[256]; - uint32_t i; - int bufpos; - paddr_t p; - - bufpos = 0; - for (i = ((first_avail - KERNTEXTOFF) >> PAGE_SHIFT); - i < xen_start_info.nr_pages; i++) { - /* if (xpmap_phys_to_machine_mapping[i] */ - bufpos += sprintf(buf + bufpos, "%03x:%08x:%08x ", - i, (uint32_t)xpmap_phys_to_machine_mapping[i], - (uint32_t)xpmap_mtop(xpmap_phys_to_machine_mapping[i] << - PAGE_SHIFT)); - p = xpmap_phys_to_machine_mapping[i]; - uvm_page_physload(p, p + 1, p, p + 1, VM_FREELIST_DEFAULT); - - if (bufpos > 70) { - int k; - sprintf(buf + bufpos, "\n"); - XENPRINTK((buf)); - bufpos = 0; - for (k = 0; k < 1000000; k++); - } - } - if (bufpos) { - XENPRINTK((buf)); - bufpos = 0; - } -} -#endif - - -#ifdef XENDEBUG -void xpq_debug_dump(void); -#endif - -#define XPQUEUE_SIZE 2048 -typedef union xpq_queue { - struct { - pd_entry_t *ptr; - pd_entry_t val; - } pde; - struct { - pt_entry_t *ptr; - pt_entry_t val; - } pte; - struct { - paddr_t ptr; - uint32_t val; - } pa; -} xpq_queue_t; -static xpq_queue_t xpq_queue[XPQUEUE_SIZE]; -static int xpq_idx = 0; - -void -xpq_flush_queue() -{ - int i, ok; - - XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx)); - for (i = 0; i < xpq_idx; i++) - XENPRINTK2(("%d: %p %08x\n", i, xpq_queue[i].pde.ptr, - xpq_queue[i].pde.val)); - if (xpq_idx != 0 && - HYPERVISOR_mmu_update((mmu_update_t *)xpq_queue, xpq_idx, &ok) < 0) - panic("HYPERVISOR_mmu_update failed\n"); - xpq_idx = 0; -} - -static inline void -xpq_increment_idx(void) -{ - - xpq_idx++; - if (__predict_false(xpq_idx == XPQUEUE_SIZE)) - xpq_flush_queue(); -} - -void -xpq_queue_invlpg(vaddr_t va) -{ - - XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va)); - xpq_queue[xpq_idx].pa.ptr = (va & PG_FRAME) | MMU_EXTENDED_COMMAND; - xpq_queue[xpq_idx].pa.val = MMUEXT_INVLPG; - xpq_increment_idx(); -} - -void -xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val) -{ - - xpq_queue[xpq_idx].pde.ptr = ptr; - xpq_queue[xpq_idx].pde.val = val; - xpq_increment_idx(); -} - -void -xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val) -{ - - xpq_queue[xpq_idx].pte.ptr = ptr; - xpq_queue[xpq_idx].pte.val = val; - xpq_increment_idx(); -} - -void -xpq_queue_unchecked_pte_update(pt_entry_t *ptr, pt_entry_t val) -{ - - xpq_queue[xpq_idx].pa.ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE; - /* XXXcl UNCHECKED_PT_UPDATE */ - xpq_queue[xpq_idx].pa.val = val; - xpq_increment_idx(); -} - -void -xpq_queue_pt_switch(paddr_t pa) -{ - - XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa)); - xpq_queue[xpq_idx].pa.ptr = pa | MMU_EXTENDED_COMMAND; - xpq_queue[xpq_idx].pa.val = MMUEXT_NEW_BASEPTR; - xpq_increment_idx(); -} - -void -xpq_queue_pin_table(paddr_t pa, int type) -{ - - XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa)); - xpq_queue[xpq_idx].pa.ptr = pa | MMU_EXTENDED_COMMAND; - switch (type) { - case XPQ_PIN_L1_TABLE: - xpq_queue[xpq_idx].pa.val = MMUEXT_PIN_L1_TABLE; - break; - case XPQ_PIN_L2_TABLE: - xpq_queue[xpq_idx].pa.val = MMUEXT_PIN_L2_TABLE; - break; - } - xpq_increment_idx(); -} - -void -xpq_queue_unpin_table(paddr_t pa) -{ - - XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa)); - xpq_queue[xpq_idx].pa.ptr = pa | MMU_EXTENDED_COMMAND; - xpq_queue[xpq_idx].pa.val = MMUEXT_UNPIN_TABLE; - xpq_increment_idx(); -} - -void -xpq_queue_set_ldt(vaddr_t va, uint32_t entries) -{ - - XENPRINTK2(("xpq_queue_set_ldt\n")); - KASSERT(va == (va & PG_FRAME)); - xpq_queue[xpq_idx].pa.ptr = MMU_EXTENDED_COMMAND | va; - xpq_queue[xpq_idx].pa.val = MMUEXT_SET_LDT | - (entries << MMUEXT_CMD_SHIFT); - xpq_increment_idx(); -} - -void -xpq_queue_tlb_flush() -{ - - XENPRINTK2(("xpq_queue_tlb_flush\n")); - xpq_queue[xpq_idx].pa.ptr = MMU_EXTENDED_COMMAND; - xpq_queue[xpq_idx].pa.val = MMUEXT_TLB_FLUSH; - xpq_increment_idx(); -} - -#ifdef XENDEBUG -void -xpq_debug_dump() -{ - int i; - - XENPRINTK2(("idx: %d\n", xpq_idx)); - for (i = 0; i < xpq_idx; i++) { - sprintf(XBUF, "%p %08x ", xpq_queue[i].pte.ptr, - xpq_queue[i].pte.val); - if (++i < xpq_idx) - sprintf(XBUF + strlen(XBUF), "%p %08x ", - xpq_queue[i].pte.ptr, xpq_queue[i].pte.val); - if (++i < xpq_idx) - sprintf(XBUF + strlen(XBUF), "%p %08x ", - xpq_queue[i].pte.ptr, xpq_queue[i].pte.val); - if (++i < xpq_idx) - sprintf(XBUF + strlen(XBUF), "%p %08x ", - xpq_queue[i].pte.ptr, xpq_queue[i].pte.val); - XENPRINTK2(("%d: %s\n", xpq_idx, XBUF)); - } -} -#endif diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/ctrl_if.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/ctrl_if.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,139 +0,0 @@ -/****************************************************************************** - * ctrl_if.h - * - * Management functions for special interface to the domain controller. - * - * Copyright (c) 2004, K A Fraser - */ - -#ifndef __ASM_XEN__CTRL_IF_H__ -#define __ASM_XEN__CTRL_IF_H__ - -typedef control_msg_t ctrl_msg_t; - -/* - * Callback function type. Called for asynchronous processing of received - * request messages, and responses to previously-transmitted request messages. - * The parameters are (@msg, @id). - * @msg: Original request/response message (not a copy). The message can be - * modified in-place by the handler (e.g., a response callback can - * turn a request message into a response message in place). The message - * is no longer accessible after the callback handler returns -- if the - * message is required to persist for longer then it must be copied. - * @id: (Response callbacks only) The 'id' that was specified when the - * original request message was queued for transmission. - */ -typedef void (*ctrl_msg_handler_t)(ctrl_msg_t *, unsigned long); - -/* - * Send @msg to the domain controller. Execute @hnd when a response is - * received, passing the response message and the specified @id. This - * operation will not block: it will return -EAGAIN if there is no space. - * Notes: - * 1. The @msg is copied if it is transmitted and so can be freed after this - * function returns. - * 2. If @hnd is NULL then no callback is executed. - */ -int -ctrl_if_send_message_noblock( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id); - -/* - * Send @msg to the domain controller. Execute @hnd when a response is - * received, passing the response message and the specified @id. This - * operation will block until the message is sent, or a signal is received - * for the calling process (unless @wait_state is TASK_UNINTERRUPTIBLE). - * Notes: - * 1. The @msg is copied if it is transmitted and so can be freed after this - * function returns. - * 2. If @hnd is NULL then no callback is executed. - */ -int -ctrl_if_send_message_block( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id, - long wait_state); - -/* - * Send @msg to the domain controller. Block until the response is received, - * and then copy it into the provided buffer, @rmsg. - */ -int -ctrl_if_send_message_and_get_response( - ctrl_msg_t *msg, - ctrl_msg_t *rmsg, - long wait_state); - -#ifdef notyet -/* - * Request a callback when there is /possibly/ space to immediately send a - * message to the domain controller. This function returns 0 if there is - * already space to trasnmit a message --- in this case the callback task /may/ - * still be executed. If this function returns 1 then the callback /will/ be - * executed when space becomes available. - */ -int -ctrl_if_enqueue_space_callback( - struct tq_struct *task); -#endif - -/* - * Send a response (@msg) to a message from the domain controller. This will - * never block. - * Notes: - * 1. The @msg is copied and so can be freed after this function returns. - * 2. The @msg may be the original request message, modified in-place. - */ -void -ctrl_if_send_response( - ctrl_msg_t *msg); - -/* - * Register a receiver for typed messages from the domain controller. The - * handler (@hnd) is called for every received message of specified @type. - * Returns TRUE (non-zero) if the handler was successfully registered. - * If CALLBACK_IN_BLOCKING CONTEXT is specified in @flags then callbacks will - * occur in a context in which it is safe to yield (i.e., process context). - */ -#define CALLBACK_IN_BLOCKING_CONTEXT 1 -int ctrl_if_register_receiver( - uint8_t type, - ctrl_msg_handler_t hnd, - unsigned int flags); - -/* - * Unregister a receiver for typed messages from the domain controller. The - * handler (@hnd) will not be executed after this function returns. - */ -void -ctrl_if_unregister_receiver( - uint8_t type, ctrl_msg_handler_t hnd); - -/* Suspend/resume notifications. */ -void ctrl_if_suspend(void); -void ctrl_if_resume(void); - -/* Start-of-day setup. */ -void ctrl_if_early_init(void); -void ctrl_if_init(void); - -/* - * Returns TRUE if there are no outstanding message requests at the domain - * controller. This can be used to ensure that messages have really flushed - * through when it is not possible to use the response-callback interface. - * WARNING: If other subsystems are using the control interface then this - * function might never return TRUE! - */ -int ctrl_if_transmitter_empty(void); /* !! DANGEROUS FUNCTION !! */ - -/* - * Manually discard response messages from the domain controller. - * WARNING: This is usually done automatically -- this function should only - * be called when normal interrupt mechanisms are disabled! - */ -void ctrl_if_discard_responses(void); /* !! DANGEROUS FUNCTION !! */ - -#endif /* __ASM_XEN__CONTROL_IF_H__ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/evtchn.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/evtchn.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,53 +0,0 @@ -/* $NetBSD$ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _XEN_EVENTS_H_ -#define _XEN_EVENTS_H_ - -#define NR_IRQS 32 - -extern int evtchn_to_irq[]; - -/* typedef unsigned int (*ev_handler_t)(int, struct pt_regs *); */ -typedef int (*ev_handler_t)(void *); - -void events_default_setup(void); -void init_events(void); -unsigned int do_event(int, struct intrframe *); -int event_set_handler(int, ev_handler_t, void *, int); - -int bind_virq_to_irq(int); -void unbind_virq_from_irq(int); -int bind_evtchn_to_irq(int); - -#endif /* _XEN_EVENTS_H_ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/frameasm.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/frameasm.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,130 +0,0 @@ -/* $NetBSD: frameasm.h,v 1.1 2004/03/11 21:44:08 cl Exp $ */ -/* NetBSD: frameasm.h,v 1.4 2004/02/20 17:35:01 yamt Exp */ - -#ifndef _I386_FRAMEASM_H_ -#define _I386_FRAMEASM_H_ - -#ifdef _KERNEL_OPT -#include "opt_multiprocessor.h" -#endif - -/* XXX assym.h */ -#define TRAP_INSTR int $0x82 -#define __HYPERVISOR_stack_switch 4 -#define __HYPERVISOR_fpu_taskswitch 7 - -#ifndef TRAPLOG -#define TLOG /**/ -#else -/* - * Fill in trap record - */ -#define TLOG \ -9: \ - movl %fs:CPU_TLOG_OFFSET, %eax; \ - movl %fs:CPU_TLOG_BASE, %ebx; \ - addl $SIZEOF_TREC,%eax; \ - andl $SIZEOF_TLOG-1,%eax; \ - addl %eax,%ebx; \ - movl %eax,%fs:CPU_TLOG_OFFSET; \ - movl %esp,TREC_SP(%ebx); \ - movl $9b,TREC_HPC(%ebx); \ - movl TF_EIP(%esp),%eax; \ - movl %eax,TREC_IPC(%ebx); \ - rdtsc ; \ - movl %eax,TREC_TSC(%ebx); \ - movl $MSR_LASTBRANCHFROMIP,%ecx; \ - rdmsr ; \ - movl %eax,TREC_LBF(%ebx); \ - incl %ecx ; \ - rdmsr ; \ - movl %eax,TREC_LBT(%ebx); \ - incl %ecx ; \ - rdmsr ; \ - movl %eax,TREC_IBF(%ebx); \ - incl %ecx ; \ - rdmsr ; \ - movl %eax,TREC_IBT(%ebx) -#endif - -/* - * These are used on interrupt or trap entry or exit. - */ -#define INTRENTRY \ - cld; \ - subl $TF_PUSHSIZE,%esp ; \ - movl %gs,TF_GS(%esp) ; \ - movl %fs,TF_FS(%esp) ; \ - movl %eax,TF_EAX(%esp) ; \ - movl %es,TF_ES(%esp) ; \ - movl %ds,TF_DS(%esp) ; \ - movl $GSEL(GDATA_SEL, SEL_KPL),%eax ; \ - movl %edi,TF_EDI(%esp) ; \ - movl %esi,TF_ESI(%esp) ; \ - movl %eax,%ds ; \ - movl %ebp,TF_EBP(%esp) ; \ - movl %eax,%es ; \ - movl %ebx,TF_EBX(%esp) ; \ - movl %eax,%gs ; \ - movl %edx,TF_EDX(%esp) ; \ - movl $GSEL(GCPU_SEL, SEL_KPL),%eax ; \ - movl %ecx,TF_ECX(%esp) ; \ - movl %eax,%fs ; \ - TLOG - -#define INTRFASTEXIT \ - movl TF_GS(%esp),%gs ; \ - movl TF_FS(%esp),%fs ; \ - movl TF_ES(%esp),%es ; \ - movl TF_DS(%esp),%ds ; \ - movl TF_EDI(%esp),%edi ; \ - movl TF_ESI(%esp),%esi ; \ - movl TF_EBP(%esp),%ebp ; \ - movl TF_EBX(%esp),%ebx ; \ - movl TF_EDX(%esp),%edx ; \ - movl TF_ECX(%esp),%ecx ; \ - movl TF_EAX(%esp),%eax ; \ - addl $(TF_PUSHSIZE+8),%esp ; \ - iret - -#define DO_DEFERRED_SWITCH(reg) \ - cmpl $0, CPUVAR(WANT_PMAPLOAD) ; \ - jz 1f ; \ - call _C_LABEL(pmap_load) ; \ - 1: - -#define CHECK_DEFERRED_SWITCH(reg) \ - cmpl $0, CPUVAR(WANT_PMAPLOAD) - -#define CHECK_ASTPENDING(reg) movl CPUVAR(CURLWP),reg ; \ - cmpl $0, reg ; \ - je 1f ; \ - movl L_PROC(reg),reg ; \ - cmpl $0, P_MD_ASTPENDING(reg); \ - 1: -#define CLEAR_ASTPENDING(reg) movl $0, P_MD_ASTPENDING(reg) - -#if !defined(XEN) -#define CLI(reg) cli -#define STI(reg) sti -#else -/* XXX assym.h */ -#define EVENTS_MASK 136 -/* Offsets into shared_info_t. */ -#define evtchn_upcall_pending /* 0 */ -#define evtchn_upcall_mask 1 - -#define XEN_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg) -#define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg) -#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(%reg) - -#define CLI(reg) movl _C_LABEL(HYPERVISOR_shared_info),reg ; \ - XEN_BLOCK_EVENTS(reg) -#define STI(reg) movl _C_LABEL(HYPERVISOR_shared_info),reg ; \ - XEN_UNBLOCK_EVENTS(reg) -#define STIC(reg) movl _C_LABEL(HYPERVISOR_shared_info),reg ; \ - XEN_UNBLOCK_EVENTS(reg) ; \ - testb $1,evtchn_upcall_pending(reg) -#endif - -#endif /* _I386_FRAMEASM_H_ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,517 +0,0 @@ -/* $NetBSD: hypervisor.h,v 1.1.2.2 2004/06/17 09:23:19 tron Exp $ */ - -/* - * - * Communication to/from hypervisor. - * - * Copyright (c) 2002-2004, K A Fraser - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - - -#ifndef _XEN_HYPERVISOR_H_ -#define _XEN_HYPERVISOR_H_ - - -struct hypervisor_attach_args { - const char *haa_busname; -}; - -struct xencons_attach_args { - const char *xa_device; -}; - -struct xen_npx_attach_args { - const char *xa_device; -}; - - -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t -#define s8 int8_t -#define s16 int16_t -#define s32 int32_t -#define s64 int64_t - -/* include the hypervisor interface */ -#include <sys/systm.h> -#include <machine/xen-public/xen.h> -#include <machine/xen-public/dom0_ops.h> -#include <machine/xen-public/event_channel.h> -#include <machine/xen-public/io/domain_controller.h> -#include <machine/xen-public/io/netif.h> -#include <machine/xen-public/io/blkif.h> - -#undef u8 -#undef u16 -#undef u32 -#undef u64 -#undef s8 -#undef s16 -#undef s32 -#undef s64 - - -/* - * a placeholder for the start of day information passed up from the hypervisor - */ -union start_info_union -{ - start_info_t start_info; - char padding[512]; -}; -extern union start_info_union start_info_union; -#define xen_start_info (start_info_union.start_info) - - -/* hypervisor.c */ -void do_hypervisor_callback(struct intrframe *regs); -void hypervisor_notify_via_evtchn(unsigned int); -void hypervisor_enable_irq(unsigned int); -void hypervisor_disable_irq(unsigned int); -void hypervisor_acknowledge_irq(unsigned int); - -/* hypervisor_machdep.c */ -void hypervisor_unmask_event(unsigned int); -void hypervisor_mask_event(unsigned int); -void hypervisor_clear_event(unsigned int); -void hypervisor_force_callback(void); - -/* - * Assembler stubs for hyper-calls. - */ - -static inline int -HYPERVISOR_set_trap_table(trap_info_t *table) -{ - int ret; - unsigned long ign1; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_set_trap_table), "1" (table) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_mmu_update(mmu_update_t *req, int count, int *success_count) -{ - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count), - "3" (success_count) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_set_callbacks( - unsigned long event_selector, unsigned long event_address, - unsigned long failsafe_selector, unsigned long failsafe_address) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector), - "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_fpu_taskswitch(int set) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_yield(void) -{ - int ret; - unsigned long ign1; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_block(void) -{ - int ret; - unsigned long ign1; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_shutdown(void) -{ - int ret; - unsigned long ign1; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift)) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_reboot(void) -{ - int ret; - unsigned long ign1; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift)) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_suspend(unsigned long srec) -{ - int ret; - unsigned long ign1, ign2; - - /* NB. On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=S" (ign2) - : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), - "S" (srec) : "memory"); - - return ret; -} - -static inline long -HYPERVISOR_set_timer_op(uint64_t timeout) -{ - int ret; - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_lo), "c" (timeout_hi) - : "memory"); - - return ret; -} - -static inline int -HYPERVISOR_dom0_op(dom0_op_t *dom0_op) -{ - int ret; - unsigned long ign1; - - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op) - : "memory"); - - return ret; -} - -static inline int -HYPERVISOR_set_debugreg(int reg, unsigned long value) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value) - : "memory" ); - - return ret; -} - -static inline unsigned long -HYPERVISOR_get_debugreg(int reg) -{ - unsigned long ret; - unsigned long ign1; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_get_debugreg), "1" (reg) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_update_descriptor(unsigned long pa, unsigned long word1, - unsigned long word2) -{ - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_update_descriptor), "1" (pa), "2" (word1), - "3" (word2) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_dom_mem_op(unsigned int op, unsigned long *extent_list, - unsigned long nr_extents, unsigned int extent_order) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4, ign5; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4), - "=D" (ign5) - : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list), - "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_multicall(void *call_list, int nr_calls) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_update_va_mapping(unsigned long va, unsigned long new_val, - unsigned long flags) -{ - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_update_va_mapping), - "1" (va), "2" (new_val), "3" (flags) - : "memory" ); - - if (__predict_false(ret < 0)) - panic("Failed update VA mapping: %08lx, %08lx, %08lx", - va, new_val, flags); - - return ret; -} - -static inline int -HYPERVISOR_event_channel_op(void *op) -{ - int ret; - unsigned long ign1; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_event_channel_op), "1" (op) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_xen_version(int cmd) -{ - int ret; - unsigned long ign1; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_xen_version), "1" (cmd) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_console_io(int cmd, int count, char *str) -{ - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_physdev_op(void *physdev_op) -{ - int ret; - unsigned long ign1; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) -{ - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (count), "3" (uop) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, - unsigned long new_val, unsigned long flags, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_update_va_mapping_otherdomain), - "1" (va), "2" (new_val), "3" (flags), "4" (domid) : - "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type) - : "memory" ); - - return ret; -} - -#endif /* _XEN_HYPERVISOR_H_ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/if_xennetvar.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/if_xennetvar.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,117 +0,0 @@ -/* $NetBSD: if_xennetvar.h,v 1.1.2.1 2004/05/22 15:59:31 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _XEN_IF_XENNETVAR_H_ -#define _XEN_IF_XENNETVAR_H_ - -#include <machine/xen.h> - -union xennet_bufarray { - struct { - struct mbuf *xbtx_m; - } xb_tx; - struct { - vaddr_t xbrx_va; - paddr_t xbrx_pa; - struct xennet_softc *xbrx_sc; - } xb_rx; - int xb_next; -}; - -struct xennet_txbuf { - SLIST_ENTRY(xennet_txbuf) xt_next; - struct xennet_softc *xt_sc; - paddr_t xt_pa; - u_char xt_buf[0]; -}; -#define TXBUF_PER_PAGE 2 -#define TXBUF_BUFSIZE (PAGE_SIZE / TXBUF_PER_PAGE) - sizeof(struct xennet_txbuf) - -struct xennet_softc { - struct device sc_dev; /* base device glue */ - struct ethercom sc_ethercom; /* Ethernet common part */ - - int sc_ifno; - - uint8_t sc_enaddr[6]; - -#ifdef mediacode - struct ifmedia sc_media; -#endif - - /* What is the status of our connection to the remote backend? */ -#define BEST_CLOSED 0 -#define BEST_DISCONNECTED 1 -#define BEST_CONNECTED 2 - unsigned int sc_backend_state; - - unsigned int sc_evtchn; - unsigned int sc_irq; - - netif_tx_interface_t *sc_tx; - netif_rx_interface_t *sc_rx; - struct vm_page *sc_pg_tx; - struct vm_page *sc_pg_rx; - - uint32_t sc_tx_entries; - uint32_t sc_tx_resp_cons; - - uint32_t sc_rx_resp_cons; - uint32_t sc_rx_bufs_to_notify; - - union xennet_bufarray sc_tx_bufa[NETIF_TX_RING_SIZE]; - union xennet_bufarray sc_rx_bufa[NETIF_TX_RING_SIZE]; - - SLIST_HEAD(, xennet_txbuf) sc_tx_bufs; - -#if NRND > 0 - rndsource_element_t sc_rnd_source; -#endif -}; - -struct xennet_attach_args { - const char *xa_device; - int xa_handle; -}; - -struct nfs_diskless; - -int xennet_scan(struct device *, struct xennet_attach_args *, cfprint_t); -void xennet_scan_finish(struct device *); -void xennet_start(struct ifnet *); -int xennet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); -void xennet_watchdog(struct ifnet *ifp); -int xennet_bootstatic_callback(struct nfs_diskless *); - -#endif /* _XEN_IF_XENNETVAR_H_ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/pmap.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/pmap.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,533 +0,0 @@ -/* $NetBSD: pmap.h,v 1.1.2.1 2004/05/22 15:59:58 he Exp $ */ -/* NetBSD: pmap.h,v 1.79 2004/02/20 17:35:01 yamt Exp */ - -/* - * - * Copyright (c) 1997 Charles D. Cranor and Washington University. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgment: - * This product includes software developed by Charles D. Cranor and - * Washington University. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * pmap.h: see pmap.c for the history of this pmap module. - */ - -#ifndef _I386_PMAP_H_ -#define _I386_PMAP_H_ - -#if defined(_KERNEL_OPT) -#include "opt_user_ldt.h" -#include "opt_largepages.h" -#endif - -#include "opt_xen.h" - -#include <machine/cpufunc.h> -#include <machine/pte.h> -#include <machine/xenfunc.h> -#include <machine/xenpmap.h> -#include <machine/segments.h> -#include <uvm/uvm_object.h> - -/* - * see pte.h for a description of i386 MMU terminology and hardware - * interface. - * - * a pmap describes a processes' 4GB virtual address space. this - * virtual address space can be broken up into 1024 4MB regions which - * are described by PDEs in the PDP. the PDEs are defined as follows: - * - * (ranges are inclusive -> exclusive, just like vm_map_entry start/end) - * (the following assumes that KERNBASE is 0xc0000000) - * - * PDE#s VA range usage - * 0->766 0x0 -> 0xbfc00000 user address space - * 767 0xbfc00000-> recursive mapping of PDP (used for - * 0xc0000000 linear mapping of PTPs) - * 768->1023 0xc0000000-> kernel address space (constant - * 0xffc00000 across all pmap's/processes) - * 1023 0xffc00000-> "alternate" recursive PDP mapping - * <end> (for other pmaps) - * - * - * note: a recursive PDP mapping provides a way to map all the PTEs for - * a 4GB address space into a linear chunk of virtual memory. in other - * words, the PTE for page 0 is the first int mapped into the 4MB recursive - * area. the PTE for page 1 is the second int. the very last int in the - * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB - * address). - * - * all pmap's PD's must have the same values in slots 768->1023 so that - * the kernel is always mapped in every process. these values are loaded - * into the PD at pmap creation time. - * - * at any one time only one pmap can be active on a processor. this is - * the pmap whose PDP is pointed to by processor register %cr3. this pmap - * will have all its PTEs mapped into memory at the recursive mapping - * point (slot #767 as show above). when the pmap code wants to find the - * PTE for a virtual address, all it has to do is the following: - * - * address of PTE = (767 * 4MB) + (VA / PAGE_SIZE) * sizeof(pt_entry_t) - * = 0xbfc00000 + (VA / 4096) * 4 - * - * what happens if the pmap layer is asked to perform an operation - * on a pmap that is not the one which is currently active? in that - * case we take the PA of the PDP of non-active pmap and put it in - * slot 1023 of the active pmap. this causes the non-active pmap's - * PTEs to get mapped in the final 4MB of the 4GB address space - * (e.g. starting at 0xffc00000). - * - * the following figure shows the effects of the recursive PDP mapping: - * - * PDP (%cr3) - * +----+ - * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000 - * | | - * | | - * | 767| -> points back to PDP (%cr3) mapping VA 0xbfc00000 -> 0xc0000000 - * | 768| -> first kernel PTP (maps 0xc0000000 -> 0xf0400000) - * | | - * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end) - * +----+ - * - * note that the PDE#767 VA (0xbfc00000) is defined as "PTE_BASE" - * note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE" - * - * starting at VA 0xbfc00000 the current active PDP (%cr3) acts as a - * PTP: - * - * PTP#767 == PDP(%cr3) => maps VA 0xbfc00000 -> 0xc0000000 - * +----+ - * | 0| -> maps the contents of PTP#0 at VA 0xbfc00000->0xbfc01000 - * | | - * | | - * | 767| -> maps contents of PTP#767 (the PDP) at VA 0xbffbf000 - * | 768| -> maps contents of first kernel PTP - * | | - * |1023| - * +----+ - * - * note that mapping of the PDP at PTP#767's VA (0xbffbf000) is - * defined as "PDP_BASE".... within that mapping there are two - * defines: - * "PDP_PDE" (0xbfeffbfc) is the VA of the PDE in the PDP - * which points back to itself. - * "APDP_PDE" (0xbfeffffc) is the VA of the PDE in the PDP which - * establishes the recursive mapping of the alternate pmap. - * to set the alternate PDP, one just has to put the correct - * PA info in *APDP_PDE. - * - * note that in the APTE_BASE space, the APDP appears at VA - * "APDP_BASE" (0xfffff000). - */ -/* XXX MP should we allocate one APDP_PDE per processor?? */ - -/* - * the following defines identify the slots used as described above. - */ - -#define PDSLOT_PTE ((KERNBASE/NBPD)-1) /* 767: for recursive PDP map */ -#define PDSLOT_KERN (KERNBASE/NBPD) /* 768: start of kernel space */ -#define PDSLOT_APTE ((unsigned)1023-16) /* 1023: alternative recursive slot */ - -/* - * the following defines give the virtual addresses of various MMU - * data structures: - * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings - * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD - * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP - */ - -#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) -#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) -#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * PAGE_SIZE))) -#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * PAGE_SIZE))) -#define PDP_PDE (PDP_BASE + PDSLOT_PTE) -#define APDP_PDE (PDP_BASE + PDSLOT_APTE) - -/* - * the follow define determines how many PTPs should be set up for the - * kernel by locore.s at boot time. this should be large enough to - * get the VM system running. once the VM system is running, the - * pmap module can add more PTPs to the kernel area on demand. - */ - -#ifndef NKPTP -#define NKPTP 4 /* 16MB to start */ -#endif -#define NKPTP_MIN 4 /* smallest value we allow */ -#define NKPTP_MAX (1024 - (KERNBASE/NBPD) - 1) - /* largest value (-1 for APTP space) */ - -/* - * pdei/ptei: generate index into PDP/PTP from a VA - */ -#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) -#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) - -/* - * PTP macros: - * a PTP's index is the PD index of the PDE that points to it - * a PTP's offset is the byte-offset in the PTE space that this PTP is at - * a PTP's VA is the first VA mapped by that PTP - * - * note that PAGE_SIZE == number of bytes in a PTP (4096 bytes == 1024 entries) - * NBPD == number of bytes a PTP can map (4MB) - */ - -#define ptp_i2o(I) ((I) * PAGE_SIZE) /* index => offset */ -#define ptp_o2i(O) ((O) / PAGE_SIZE) /* offset => index */ -#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ -#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ - -/* - * PG_AVAIL usage: we make use of the ignored bits of the PTE - */ - -#define PG_W PG_AVAIL1 /* "wired" mapping */ -#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */ -#define PG_X PG_AVAIL3 /* executable mapping */ - -/* - * Number of PTE's per cache line. 4 byte pte, 32-byte cache line - * Used to avoid false sharing of cache lines. - */ -#define NPTECL 8 - -#ifdef _KERNEL -/* - * pmap data structures: see pmap.c for details of locking. - */ - -struct pmap; -typedef struct pmap *pmap_t; - -/* - * we maintain a list of all non-kernel pmaps - */ - -LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ - -/* - * the pmap structure - * - * note that the pm_obj contains the simple_lock, the reference count, - * page list, and number of PTPs within the pmap. - * - * XXX If we ever support processor numbers higher than 31, we'll have - * XXX to rethink the CPU mask. - */ - -struct pmap { - struct uvm_object pm_obj; /* object (lck by object lock) */ -#define pm_lock pm_obj.vmobjlock - LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */ - pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */ - u_int32_t pm_pdirpa; /* PA of PD (read-only after create) */ - struct vm_page *pm_ptphint; /* pointer to a PTP in our pmap */ - struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ - - vaddr_t pm_hiexec; /* highest executable mapping */ - int pm_flags; /* see below */ - - union descriptor *pm_ldt; /* user-set LDT */ - int pm_ldt_len; /* number of LDT entries */ - int pm_ldt_sel; /* LDT selector */ - u_int32_t pm_cpus; /* mask of CPUs using pmap */ -}; - -/* pm_flags */ -#define PMF_USER_LDT 0x01 /* pmap has user-set LDT */ - -/* - * for each managed physical page we maintain a list of <PMAP,VA>'s - * which it is mapped at. the list is headed by a pv_head structure. - * there is one pv_head per managed phys page (allocated at boot time). - * the pv_head structure points to a list of pv_entry structures (each - * describes one mapping). - */ - -struct pv_entry { /* locked by its list's pvh_lock */ - SPLAY_ENTRY(pv_entry) pv_node; /* splay-tree node */ - struct pmap *pv_pmap; /* the pmap */ - vaddr_t pv_va; /* the virtual address */ - struct vm_page *pv_ptp; /* the vm_page of the PTP */ -}; - -/* - * pv_entrys are dynamically allocated in chunks from a single page. - * we keep track of how many pv_entrys are in use for each page and - * we can free pv_entry pages if needed. there is one lock for the - * entire allocation system. - */ - -struct pv_page_info { - TAILQ_ENTRY(pv_page) pvpi_list; - struct pv_entry *pvpi_pvfree; - int pvpi_nfree; -}; - -/* - * number of pv_entry's in a pv_page - * (note: won't work on systems where NPBG isn't a constant) - */ - -#define PVE_PER_PVPAGE ((PAGE_SIZE - sizeof(struct pv_page_info)) / \ - sizeof(struct pv_entry)) - -/* - * a pv_page: where pv_entrys are allocated from - */ - -struct pv_page { - struct pv_page_info pvinfo; - struct pv_entry pvents[PVE_PER_PVPAGE]; -}; - -/* - * global kernel variables - */ - -/* PTDpaddr: is the physical address of the kernel's PDP */ -extern u_long PTDpaddr; - -extern struct pmap kernel_pmap_store; /* kernel pmap */ -extern int nkpde; /* current # of PDEs for kernel */ -extern int pmap_pg_g; /* do we support PG_G? */ - -/* - * macros - */ - -#define pmap_kernel() (&kernel_pmap_store) -#define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count) -#define pmap_wired_count(pmap) ((pmap)->pm_stats.wired_count) -#define pmap_update(pmap) /* nothing (yet) */ - -#define pmap_clear_modify(pg) pmap_clear_attrs(pg, PG_M) -#define pmap_clear_reference(pg) pmap_clear_attrs(pg, PG_U) -#define pmap_copy(DP,SP,D,L,S) -#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M) -#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U) -#define pmap_move(DP,SP,D,L,S) -#define pmap_phys_address(ppn) x86_ptob(ppn) -#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ - - -/* - * prototypes - */ - -void pmap_activate(struct lwp *); -void pmap_bootstrap(vaddr_t); -boolean_t pmap_clear_attrs(struct vm_page *, int); -void pmap_deactivate(struct lwp *); -void pmap_deactivate2(struct lwp *); -void pmap_page_remove (struct vm_page *); -void pmap_remove(struct pmap *, vaddr_t, vaddr_t); -boolean_t pmap_test_attrs(struct vm_page *, int); -void pmap_write_protect(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); -int pmap_exec_fixup(struct vm_map *, struct trapframe *, - struct pcb *); -void pmap_load(void); -int pmap_enter_ma(struct pmap *, vaddr_t, paddr_t, vm_prot_t, - int); - -vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */ - -void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, int32_t *); -void pmap_tlb_shootnow(int32_t); -void pmap_do_tlb_shootdown(struct cpu_info *); - -#define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */ - -/* - * Do idle page zero'ing uncached to avoid polluting the cache. - */ -boolean_t pmap_pageidlezero(paddr_t); -#define PMAP_PAGEIDLEZERO(pa) pmap_pageidlezero((pa)) - -/* - * inline functions - */ - -/*ARGSUSED*/ -static __inline void -pmap_remove_all(struct pmap *pmap) -{ - /* Nothing. */ -} - -/* - * pmap_update_pg: flush one page from the TLB (or flush the whole thing - * if hardware doesn't support one-page flushing) - */ - -__inline static void __attribute__((__unused__)) -pmap_update_pg(vaddr_t va) -{ -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) - tlbflush(); - else -#endif - invlpg((u_int) va); -} - -/* - * pmap_update_2pg: flush two pages from the TLB - */ - -__inline static void __attribute__((__unused__)) -pmap_update_2pg(vaddr_t va, vaddr_t vb) -{ -#if defined(I386_CPU) - if (cpu_class == CPUCLASS_386) - tlbflush(); - else -#endif - { - invlpg((u_int) va); - invlpg((u_int) vb); - } -} - -/* - * pmap_page_protect: change the protection of all recorded mappings - * of a managed page - * - * => this function is a frontend for pmap_page_remove/pmap_clear_attrs - * => we only have to worry about making the page more protected. - * unprotecting a page is done on-demand at fault time. - */ - -__inline static void __attribute__((__unused__)) -pmap_page_protect(struct vm_page *pg, vm_prot_t prot) -{ - if ((prot & VM_PROT_WRITE) == 0) { - if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { - (void) pmap_clear_attrs(pg, PG_RW); - } else { - pmap_page_remove(pg); - } - } -} - -/* - * pmap_protect: change the protection of pages in a pmap - * - * => this function is a frontend for pmap_remove/pmap_write_protect - * => we only have to worry about making the page more protected. - * unprotecting a page is done on-demand at fault time. - */ - -__inline static void __attribute__((__unused__)) -pmap_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) -{ - if ((prot & VM_PROT_WRITE) == 0) { - if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { - pmap_write_protect(pmap, sva, eva, prot); - } else { - pmap_remove(pmap, sva, eva); - } - } -} - -/* - * various address inlines - * - * vtopte: return a pointer to the PTE mapping a VA, works only for - * user and PT addresses - * - * kvtopte: return a pointer to the PTE mapping a kernel VA - */ - -#include <lib/libkern/libkern.h> - -static __inline pt_entry_t * __attribute__((__unused__)) -vtopte(vaddr_t va) -{ - - KASSERT(va < (PDSLOT_KERN << PDSHIFT)); - - return (PTE_BASE + x86_btop(va)); -} - -static __inline pt_entry_t * __attribute__((__unused__)) -kvtopte(vaddr_t va) -{ - - KASSERT(va >= (PDSLOT_KERN << PDSHIFT)); - -#ifdef LARGEPAGES - { - pd_entry_t *pde; - - pde = PDP_BASE + pdei(va); - if (*pde & PG_PS) - return ((pt_entry_t *)pde); - } -#endif - - return (PTE_BASE + x86_btop(va)); -} - -/* - * vtomach: virtual address to machine address. For use by - * machine-dependent code only. - */ - -static inline paddr_t __attribute__((__unused__)) -vtomach(vaddr_t va) -{ - pt_entry_t pte; - - pte = PTE_GET(&PTE_BASE[x86_btop(va)]); - return xpmap_ptom((pte & PG_FRAME) | (va & ~PG_FRAME)); -} - -#define pmap_cpu_has_pg_n() (cpu_class != CPUCLASS_386) -#define pmap_cpu_has_invlpg() (cpu_class != CPUCLASS_386) - -paddr_t vtophys(vaddr_t); -vaddr_t pmap_map(vaddr_t, paddr_t, paddr_t, vm_prot_t); - -void pmap_kenter_ma(vaddr_t, paddr_t, vm_prot_t); - -#if defined(USER_LDT) -void pmap_ldt_cleanup(struct lwp *); -#define PMAP_FORK -#endif /* USER_LDT */ - -/* - * Hooks for the pool allocator. - */ -#define POOL_VTOPHYS(va) vtophys((vaddr_t) (va)) - -#endif /* _KERNEL */ -#endif /* _I386_PMAP_H_ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/xbdvar.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/xbdvar.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,60 +0,0 @@ -/* $NetBSD: xbdvar.h,v 1.5 2004/05/07 14:15:11 cl Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _XEN_XBDVAR_H_ -#define _XEN_XBDVAR_H_ - -struct xbd_softc { - struct device sc_dev; /* base device glue */ - struct dk_softc sc_dksc; /* generic disk interface */ - unsigned long sc_xd_device; /* cookie identifying device */ - struct dk_intf *sc_di; /* pseudo-disk interface */ - struct simplelock sc_slock; /* our lock */ - int sc_shutdown; /* about to be removed */ -#if NRND > 0 - rndsource_element_t sc_rnd_source; -#endif -}; - -struct xbd_attach_args { - const char *xa_device; - vdisk_t *xa_xd; - struct dk_intf *xa_dkintf; - struct sysctlnode *xa_diskcookies; -}; - -int xbd_scan(struct device *, struct xbd_attach_args *, cfprint_t); -void xbd_scan_finish(struct device *); - -#endif /* _XEN_XBDVAR_H_ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/xen.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/xen.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,249 +0,0 @@ -/* $NetBSD: xen.h,v 1.1.2.2 2004/06/17 09:23:19 tron Exp $ */ - -/* - * - * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team) - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -#ifndef _XEN_H -#define _XEN_H - -#ifndef _LOCORE - -struct xen_netinfo { - uint32_t xi_ifno; - char *xi_root; - uint32_t xi_ip[5]; -}; - -union xen_cmdline_parseinfo { - char xcp_bootdev[16]; /* sizeof(dv_xname) */ - struct xen_netinfo xcp_netinfo; - char xcp_console[16]; -}; - -#define XEN_PARSE_BOOTDEV 0 -#define XEN_PARSE_NETINFO 1 -#define XEN_PARSE_CONSOLE 2 - -void xen_parse_cmdline(int, union xen_cmdline_parseinfo *); - -void xenconscn_attach(void); - -void xenmachmem_init(void); -void xenprivcmd_init(void); -void xenvfr_init(void); - -void idle_block(void); - -#ifdef XENDEBUG -void printk(const char *, ...); -void vprintk(const char *, va_list); -#endif - -#endif - -#endif /* _XEN_H */ - -/****************************************************************************** - * os.h - * - * random collection of macros and definition - */ - -#ifndef _OS_H_ -#define _OS_H_ - -/* - * These are the segment descriptors provided for us by the hypervisor. - * For now, these are hardwired -- guest OSes cannot update the GDT - * or LDT. - * - * It shouldn't be hard to support descriptor-table frobbing -- let me - * know if the BSD or XP ports require flexibility here. - */ - - -/* - * these are also defined in xen-public/xen.h but can't be pulled in as - * they are used in start of day assembly. Need to clean up the .h files - * a bit more... - */ - -#ifndef FLAT_RING1_CS -#define FLAT_RING1_CS 0x0819 -#define FLAT_RING1_DS 0x0821 -#define FLAT_RING3_CS 0x082b -#define FLAT_RING3_DS 0x0833 -#endif - -#define __KERNEL_CS FLAT_RING1_CS -#define __KERNEL_DS FLAT_RING1_DS - -/* Everything below this point is not included by assembler (.S) files. */ -#ifndef _LOCORE - -/* some function prototypes */ -void trap_init(void); - - -/* - * STI/CLI equivalents. These basically set and clear the virtual - * event_enable flag in the shared_info structure. Note that when - * the enable bit is set, there may be pending events to be handled. - * We may therefore call into do_hypervisor_callback() directly. - */ - -#define __save_flags(x) \ -do { \ - (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ -} while (0) - -#define __restore_flags(x) \ -do { \ - shared_info_t *_shared = HYPERVISOR_shared_info; \ - __insn_barrier(); \ - if ((_shared->vcpu_data[0].evtchn_upcall_mask = (x)) == 0) { \ - __insn_barrier(); \ - if (__predict_false(_shared->vcpu_data[0].evtchn_upcall_pending)) \ - hypervisor_force_callback(); \ - } \ -} while (0) - -#define __cli() \ -do { \ - HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \ - __insn_barrier(); \ -} while (0) - -#define __sti() \ -do { \ - shared_info_t *_shared = HYPERVISOR_shared_info; \ - __insn_barrier(); \ - _shared->vcpu_data[0].evtchn_upcall_mask = 0; \ - __insn_barrier(); /* unmask then check (avoid races) */ \ - if (__predict_false(_shared->vcpu_data[0].evtchn_upcall_pending)) \ - hypervisor_force_callback(); \ -} while (0) - -#define cli() __cli() -#define sti() __sti() -#define save_flags(x) __save_flags(x) -#define restore_flags(x) __restore_flags(x) -#define save_and_cli(x) do { \ - __save_flags(x); \ - __cli(); \ -} while (/* CONSTCOND */ 0) -#define save_and_sti(x) __save_and_sti(x) - -#ifdef MULTIPROCESSOR -#define __LOCK_PREFIX "lock; " -#else -#define __LOCK_PREFIX "" -#endif - -static __inline__ uint32_t -x86_atomic_xchg(uint32_t *ptr, unsigned long val) -{ - unsigned long result; - - __asm __volatile("xchgl %0,%1" - :"=r" (result) - :"m" (*ptr), "0" (val) - :"memory"); - - return result; -} - -static __inline__ int -x86_atomic_test_and_clear_bit(volatile void *ptr, int bitno) -{ - int result; - - __asm __volatile(__LOCK_PREFIX - "btrl %2,%1 ;" - "sbbl %0,%0" - :"=r" (result), "=m" (*(volatile uint32_t *)(ptr)) - :"Ir" (bitno) : "memory"); - return result; -} - -static __inline__ int -x86_atomic_test_and_set_bit(volatile void *ptr, int bitno) -{ - int result; - - __asm __volatile(__LOCK_PREFIX - "btsl %2,%1 ;" - "sbbl %0,%0" - :"=r" (result), "=m" (*(volatile uint32_t *)(ptr)) - :"Ir" (bitno) : "memory"); - return result; -} - -static __inline int -x86_constant_test_bit(const volatile void *ptr, int bitno) -{ - return ((1UL << (bitno & 31)) & - (((const volatile uint32_t *) ptr)[bitno >> 5])) != 0; -} - -static __inline int -x86_variable_test_bit(const volatile void *ptr, int bitno) -{ - int result; - - __asm __volatile( - "btl %2,%1 ;" - "sbbl %0,%0" - :"=r" (result) - :"m" (*(volatile uint32_t *)(ptr)), "Ir" (bitno)); - return result; -} - -#define x86_atomic_test_bit(ptr, bitno) \ - (__builtin_constant_p(bitno) ? \ - x86_constant_test_bit((ptr),(bitno)) : \ - x86_variable_test_bit((ptr),(bitno))) - -static __inline void -x86_atomic_set_bit(volatile void *ptr, int bitno) -{ - __asm __volatile(__LOCK_PREFIX - "btsl %1,%0" - :"=m" (*(volatile uint32_t *)(ptr)) - :"Ir" (bitno)); -} - -static __inline void -x86_atomic_clear_bit(volatile void *ptr, int bitno) -{ - __asm __volatile(__LOCK_PREFIX - "btrl %1,%0" - :"=m" (*(volatile uint32_t *)(ptr)) - :"Ir" (bitno)); -} - -#endif /* !__ASSEMBLY__ */ - -#endif /* _OS_H_ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/xenfunc.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/xenfunc.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,135 +0,0 @@ -/* $NetBSD: xenfunc.h,v 1.1.2.1 2004/05/22 15:59:31 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _XEN_XENFUNC_H_ -#define _XEN_XENFUNC_H_ - -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/evtchn.h> -#include <machine/xenpmap.h> -#include <machine/pte.h> - -#ifdef XENDEBUG_LOW -#define __PRINTK(x) printk x -#else -#define __PRINTK(x) -#endif - -void xen_set_ldt(vaddr_t, uint32_t); -void xen_update_descriptor(union descriptor *, union descriptor *); - -static __inline void -invlpg(u_int addr) -{ - xpq_queue_invlpg(addr); - xpq_flush_queue(); -} - -static __inline void -lldt(u_short sel) -{ - - /* __PRINTK(("ldt %x\n", IDXSELN(sel))); */ - if (sel == GSEL(GLDT_SEL, SEL_KPL)) - xen_set_ldt((vaddr_t)ldt, NLDT); - else - xen_set_ldt(cpu_info_primary.ci_gdt[IDXSELN(sel)].ld.ld_base, - cpu_info_primary.ci_gdt[IDXSELN(sel)].ld.ld_entries); -} - -static __inline void -ltr(u_short sel) -{ - __PRINTK(("XXX ltr not supported\n")); -} - -static __inline void -lcr0(u_int val) -{ - __PRINTK(("XXX lcr0 not supported\n")); -} - -static __inline u_int -rcr0(void) -{ - __PRINTK(("XXX rcr0 not supported\n")); - return 0; -} - -#define lcr3(_v) _lcr3((_v), __FILE__, __LINE__) -static __inline void -_lcr3(u_int val, char *file, int line) -{ -/* __PRINTK(("lcr3 %08x at %s:%d\n", val, file, line)); */ - xpq_queue_pt_switch(xpmap_ptom(val) & PG_FRAME); - xpq_flush_queue(); -} - -static __inline void -tlbflush(void) -{ - xpq_queue_tlb_flush(); - xpq_flush_queue(); -} - -static __inline u_int -rdr6(void) -{ - u_int val; - - val = HYPERVISOR_get_debugreg(6); - return val; -} - -static __inline void -ldr6(u_int val) -{ - - HYPERVISOR_set_debugreg(6, val); -} - -static __inline void -disable_intr(void) -{ - __cli(); -} - -static __inline void -enable_intr(void) -{ - __sti(); -} - -#endif /* _XEN_XENFUNC_H_ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/include/xenpmap.h --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/xenpmap.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,193 +0,0 @@ -/* $NetBSD: xenpmap.h,v 1.1.2.1 2004/05/22 15:59:58 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _XEN_XENPMAP_H_ -#define _XEN_XENPMAP_H_ - -#define INVALID_P2M_ENTRY (~0UL) - -void xpq_queue_invlpg(vaddr_t); -void xpq_queue_pde_update(pd_entry_t *, pd_entry_t); -void xpq_queue_pte_update(pt_entry_t *, pt_entry_t); -void xpq_queue_unchecked_pte_update(pt_entry_t *, pt_entry_t); -void xpq_queue_pt_switch(paddr_t); -void xpq_flush_queue(void); -void xpq_queue_set_ldt(vaddr_t, uint32_t); -void xpq_queue_tlb_flush(void); -void xpq_queue_pin_table(paddr_t, int); -void xpq_queue_unpin_table(paddr_t); - -extern paddr_t *xpmap_phys_to_machine_mapping; - -#define XPQ_PIN_L1_TABLE 1 -#define XPQ_PIN_L2_TABLE 2 - -#ifndef XEN -#define PDE_GET(_pdp) \ - *(_pdp) -#define PDE_SET(_pdp,_mapdp,_npde) \ - *(_mapdp) = (_npde) -#define PDE_CLEAR(_pdp,_mapdp) \ - *(_mapdp) = 0 -#define PTE_SET(_ptp,_maptp,_npte) \ - *(_maptp) = (_npte) -#define PTE_CLEAR(_ptp,_maptp) \ - *(_maptp) = 0 -#define PTE_ATOMIC_SET(_ptp,_maptp,_npte,_opte) \ - (_opte) = x86_atomic_testset_ul((_maptp), (_npte)) -#define PTE_ATOMIC_CLEAR(_ptp,_maptp,_opte) \ - (_opte) = x86_atomic_testset_ul((_maptp), 0) -#define PDE_CLEARBITS(_pdp,_mapdp,_bits) \ - *(_mapdp) &= ~(_bits) -#define PTE_ATOMIC_CLEARBITS(_ptp,_maptp,_bits) \ - x86_atomic_clearbits_l((_maptp), (_bits)) -#define PTE_SETBITS(_ptp,_maptp,_bits) \ - *(_maptp) |= (_bits) -#define PTE_ATOMIC_SETBITS(_ptp,_maptp,_bits) \ - x86_atomic_setbits_l((_maptp), (_bits)) -#else -paddr_t *xpmap_phys_to_machine_mapping; - -#define PDE_GET(_pdp) \ - (pmap_valid_entry(*(_pdp)) ? xpmap_mtop(*(_pdp)) : *(_pdp)) -#define PDE_SET(_pdp,_mapdp,_npde) do { \ - xpq_queue_pde_update((_mapdp), xpmap_ptom((_npde))); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PDE_CLEAR(_pdp,_mapdp) do { \ - xpq_queue_pde_update((_mapdp), 0); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_GET(_ptp) \ - (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : *(_ptp)) -#define PTE_GET_MA(_ptp) \ - *(_ptp) -#define PTE_SET(_ptp,_maptp,_npte) do { \ - xpq_queue_pte_update((_maptp), xpmap_ptom((_npte))); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_SET_MA(_ptp,_maptp,_npte) do { \ - xpq_queue_pte_update((_maptp), (_npte)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_SET_MA_UNCHECKED(_ptp,_maptp,_npte) do { \ - xpq_queue_unchecked_pte_update((_maptp), (_npte)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_CLEAR(_ptp,_maptp) do { \ - xpq_queue_pte_update((_maptp), 0); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_ATOMIC_SET(_ptp,_maptp,_npte,_opte) do { \ - (_opte) = PTE_GET(_ptp); \ - xpq_queue_pte_update((_maptp), xpmap_ptom((_npte))); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_ATOMIC_SET_MA(_ptp,_maptp,_npte,_opte) do { \ - (_opte) = *(_ptp); \ - xpq_queue_pte_update((_maptp), (_npte)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_ATOMIC_CLEAR(_ptp,_maptp,_opte) do { \ - (_opte) = PTE_GET(_ptp); \ - xpq_queue_pte_update((_maptp), 0); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_ATOMIC_CLEAR_MA(_ptp,_maptp,_opte) do { \ - (_opte) = *(_ptp); \ - xpq_queue_pte_update((_maptp), 0); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PDE_CLEARBITS(_pdp,_mapdp,_bits) do { \ - xpq_queue_pte_update((_mapdp), *(_pdp) & ~((_bits) & ~PG_FRAME)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_CLEARBITS(_ptp,_maptp,_bits) do { \ - xpq_queue_pte_update((_maptp), *(_ptp) & ~((_bits) & ~PG_FRAME)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PDE_ATOMIC_CLEARBITS(_pdp,_mapdp,_bits) do { \ - xpq_queue_pde_update((_mapdp), *(_pdp) & ~((_bits) & ~PG_FRAME)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_ATOMIC_CLEARBITS(_ptp,_maptp,_bits) do { \ - xpq_queue_pte_update((_maptp), *(_ptp) & ~((_bits) & ~PG_FRAME)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_SETBITS(_ptp,_maptp,_bits) do { \ - xpq_queue_pte_update((_maptp), *(_ptp) | ((_bits) & ~PG_FRAME)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PDE_ATOMIC_SETBITS(_pdp,_mapdp,_bits) do { \ - xpq_queue_pde_update((_mapdp), *(_pdp) | ((_bits) & ~PG_FRAME)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_ATOMIC_SETBITS(_ptp,_maptp,_bits) do { \ - xpq_queue_pte_update((_maptp), *(_ptp) | ((_bits) & ~PG_FRAME)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PDE_COPY(_dpdp,_madpdp,_spdp) do { \ - xpq_queue_pde_update((_madpdp), *(_spdp)); \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) -#define PTE_UPDATES_FLUSH() do { \ - xpq_flush_queue(); \ -} while (/*CONSTCOND*/0) - -#endif - -#define XPMAP_OFFSET (KERNTEXTOFF - KERNBASE_LOCORE) -static __inline paddr_t -xpmap_mtop(paddr_t mpa) -{ - return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT) + - XPMAP_OFFSET) | (mpa & ~PG_FRAME); -} - -static __inline paddr_t -xpmap_ptom(paddr_t ppa) -{ - return (xpmap_phys_to_machine_mapping[(ppa - - XPMAP_OFFSET) >> PAGE_SHIFT] << PAGE_SHIFT) - | (ppa & ~PG_FRAME); -} - -static __inline paddr_t -xpmap_ptom_masked(paddr_t ppa) -{ - return (xpmap_phys_to_machine_mapping[(ppa - - XPMAP_OFFSET) >> PAGE_SHIFT] << PAGE_SHIFT); -} - -#endif /* _XEN_XENPMAP_H_ */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/x86/bus_space.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/x86/bus_space.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,505 +0,0 @@ -/* $NetBSD: bus_space.c,v 1.2.2.1 2004/05/22 15:57:25 he Exp $ */ -/* NetBSD: bus_space.c,v 1.2 2003/03/14 18:47:53 christos Exp */ - -/*- - * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace - * Simulation Facility, NASA Ames Research Center. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: bus_space.c,v 1.2.2.1 2004/05/22 15:57:25 he Exp $"); - -#include "opt_xen.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/extent.h> - -#include <uvm/uvm_extern.h> - -#include <machine/bus.h> - -#include <dev/isa/isareg.h> -#include <machine/isa_machdep.h> - -#include <machine/hypervisor.h> -#include <machine/xenpmap.h> - -/* - * Extent maps to manage I/O and memory space. Allocate - * storage for 8 regions in each, initially. Later, ioport_malloc_safe - * will indicate that it's safe to use malloc() to dynamically allocate - * region descriptors. - * - * N.B. At least two regions are _always_ allocated from the iomem - * extent map; (0 -> ISA hole) and (end of ISA hole -> end of RAM). - * - * The extent maps are not static! Machine-dependent ISA and EISA - * routines need access to them for bus address space allocation. - */ -static long ioport_ex_storage[EXTENT_FIXED_STORAGE_SIZE(8) / sizeof(long)]; -static long iomem_ex_storage[EXTENT_FIXED_STORAGE_SIZE(8) / sizeof(long)]; -struct extent *ioport_ex; -struct extent *iomem_ex; -static int ioport_malloc_safe; - -int x86_mem_add_mapping __P((bus_addr_t, bus_size_t, - int, bus_space_handle_t *)); - -void -x86_bus_space_init() -{ - /* - * Initialize the I/O port and I/O mem extent maps. - * Note: we don't have to check the return value since - * creation of a fixed extent map will never fail (since - * descriptor storage has already been allocated). - * - * N.B. The iomem extent manages _all_ physical addresses - * on the machine. When the amount of RAM is found, the two - * extents of RAM are allocated from the map (0 -> ISA hole - * and end of ISA hole -> end of RAM). - */ - ioport_ex = extent_create("ioport", 0x0, 0xffff, M_DEVBUF, - (caddr_t)ioport_ex_storage, sizeof(ioport_ex_storage), - EX_NOCOALESCE|EX_NOWAIT); - iomem_ex = extent_create("iomem", 0x0, 0xffffffff, M_DEVBUF, - (caddr_t)iomem_ex_storage, sizeof(iomem_ex_storage), - EX_NOCOALESCE|EX_NOWAIT); - - /* We are privileged guest os - should have IO privileges. */ - if (xen_start_info.flags & SIF_PRIVILEGED) { - dom0_op_t op; - op.cmd = DOM0_IOPL; - op.u.iopl.domain = DOMID_SELF; - op.u.iopl.iopl = 1; - if (HYPERVISOR_dom0_op(&op) != 0) - panic("Unable to obtain IOPL, " - "despite being SIF_PRIVILEGED"); - } -} - -void -x86_bus_space_mallocok() -{ - - ioport_malloc_safe = 1; -} - -int -x86_memio_map(t, bpa, size, flags, bshp) - bus_space_tag_t t; - bus_addr_t bpa; - bus_size_t size; - int flags; - bus_space_handle_t *bshp; -{ - int error; - struct extent *ex; - - /* - * Pick the appropriate extent map. - */ - if (t == X86_BUS_SPACE_IO) { - if (flags & BUS_SPACE_MAP_LINEAR) - return (EOPNOTSUPP); - ex = ioport_ex; - } else if (t == X86_BUS_SPACE_MEM) - ex = iomem_ex; - else - panic("x86_memio_map: bad bus space tag"); - - /* - * Before we go any further, let's make sure that this - * region is available. - */ - error = extent_alloc_region(ex, bpa, size, - EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0)); - if (error) - return (error); - - /* - * For I/O space, that's all she wrote. - */ - if (t == X86_BUS_SPACE_IO) { - *bshp = bpa; - return (0); - } - - /* - * For memory space, map the bus physical address to - * a kernel virtual address. - */ - error = x86_mem_add_mapping(bpa, size, - (flags & BUS_SPACE_MAP_CACHEABLE) != 0, bshp); - if (error) { - if (extent_free(ex, bpa, size, EX_NOWAIT | - (ioport_malloc_safe ? EX_MALLOCOK : 0))) { - printf("x86_memio_map: pa 0x%lx, size 0x%lx\n", - bpa, size); - printf("x86_memio_map: can't free region\n"); - } - } - - return (error); -} - -int -_x86_memio_map(t, bpa, size, flags, bshp) - bus_space_tag_t t; - bus_addr_t bpa; - bus_size_t size; - int flags; - bus_space_handle_t *bshp; -{ - - /* - * For I/O space, just fill in the handle. - */ - if (t == X86_BUS_SPACE_IO) { - if (flags & BUS_SPACE_MAP_LINEAR) - return (EOPNOTSUPP); - *bshp = bpa; - return (0); - } - - /* - * For memory space, map the bus physical address to - * a kernel virtual address. - */ - return (x86_mem_add_mapping(bpa, size, - (flags & BUS_SPACE_MAP_CACHEABLE) != 0, bshp)); -} - -int -x86_memio_alloc(t, rstart, rend, size, alignment, boundary, flags, - bpap, bshp) - bus_space_tag_t t; - bus_addr_t rstart, rend; - bus_size_t size, alignment, boundary; - int flags; - bus_addr_t *bpap; - bus_space_handle_t *bshp; -{ - struct extent *ex; - u_long bpa; - int error; - - /* - * Pick the appropriate extent map. - */ - if (t == X86_BUS_SPACE_IO) { - if (flags & BUS_SPACE_MAP_LINEAR) - return (EOPNOTSUPP); - ex = ioport_ex; - } else if (t == X86_BUS_SPACE_MEM) - ex = iomem_ex; - else - panic("x86_memio_alloc: bad bus space tag"); - - /* - * Sanity check the allocation against the extent's boundaries. - */ - if (rstart < ex->ex_start || rend > ex->ex_end) - panic("x86_memio_alloc: bad region start/end"); - - /* - * Do the requested allocation. - */ - error = extent_alloc_subregion(ex, rstart, rend, size, alignment, - boundary, - EX_FAST | EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0), - &bpa); - - if (error) - return (error); - - /* - * For I/O space, that's all she wrote. - */ - if (t == X86_BUS_SPACE_IO) { - *bshp = *bpap = bpa; - return (0); - } - - /* - * For memory space, map the bus physical address to - * a kernel virtual address. - */ - error = x86_mem_add_mapping(bpa, size, - (flags & BUS_SPACE_MAP_CACHEABLE) != 0, bshp); - if (error) { - if (extent_free(iomem_ex, bpa, size, EX_NOWAIT | - (ioport_malloc_safe ? EX_MALLOCOK : 0))) { - printf("x86_memio_alloc: pa 0x%lx, size 0x%lx\n", - bpa, size); - printf("x86_memio_alloc: can't free region\n"); - } - } - - *bpap = bpa; - - return (error); -} - -int -x86_mem_add_mapping(bpa, size, cacheable, bshp) - bus_addr_t bpa; - bus_size_t size; - int cacheable; - bus_space_handle_t *bshp; -{ - u_long pa, endpa; - vaddr_t va; - pt_entry_t *pte; - pt_entry_t *maptp; - int32_t cpumask = 0; - - pa = x86_trunc_page(bpa); - endpa = x86_round_page(bpa + size); - -#ifdef DIAGNOSTIC - if (endpa <= pa) - panic("x86_mem_add_mapping: overflow"); -#endif - - if (bpa >= IOM_BEGIN && (bpa + size) <= IOM_END) { - va = (vaddr_t)ISA_HOLE_VADDR(pa); - } else { - va = uvm_km_valloc(kernel_map, endpa - pa); - if (va == 0) - return (ENOMEM); - } - - *bshp = (bus_space_handle_t)(va + (bpa & PGOFSET)); - - for (; pa < endpa; pa += PAGE_SIZE, va += PAGE_SIZE) { - pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE); - - /* - * PG_N doesn't exist on 386's, so we assume that - * the mainboard has wired up device space non-cacheable - * on those machines. - * - * Note that it's not necessary to use atomic ops to - * fiddle with the PTE here, because we don't care - * about mod/ref information. - * - * XXX should hand this bit to pmap_kenter_pa to - * save the extra invalidate! - * - * XXX extreme paranoia suggests tlb shootdown belongs here. - */ - if (pmap_cpu_has_pg_n()) { - pte = kvtopte(va); - maptp = (pt_entry_t *)vtomach((vaddr_t)pte); - if (cacheable) - PTE_CLEARBITS(pte, maptp, PG_N); - else - PTE_SETBITS(pte, maptp, PG_N); - pmap_tlb_shootdown(pmap_kernel(), va, *pte, - &cpumask); - } - } - - pmap_tlb_shootnow(cpumask); - pmap_update(pmap_kernel()); - - return 0; -} - -/* - * void _x86_memio_unmap(bus_space_tag bst, bus_space_handle bsh, - * bus_size_t size, bus_addr_t *adrp) - * - * This function unmaps memory- or io-space mapped by the function - * _x86_memio_map(). This function works nearly as same as - * x86_memio_unmap(), but this function does not ask kernel - * built-in extents and returns physical address of the bus space, - * for the convenience of the extra extent manager. - */ -void -_x86_memio_unmap(t, bsh, size, adrp) - bus_space_tag_t t; - bus_space_handle_t bsh; - bus_size_t size; - bus_addr_t *adrp; -{ - u_long va, endva; - bus_addr_t bpa; - - /* - * Find the correct extent and bus physical address. - */ - if (t == X86_BUS_SPACE_IO) { - bpa = bsh; - } else if (t == X86_BUS_SPACE_MEM) { - if (bsh >= atdevbase && (bsh + size) <= (atdevbase + IOM_SIZE)) { - bpa = (bus_addr_t)ISA_PHYSADDR(bsh); - } else { - - va = x86_trunc_page(bsh); - endva = x86_round_page(bsh + size); - -#ifdef DIAGNOSTIC - if (endva <= va) { - panic("_x86_memio_unmap: overflow"); - } -#endif - -#if __NetBSD_Version__ > 104050000 - if (pmap_extract(pmap_kernel(), va, &bpa) == FALSE) { - panic("_x86_memio_unmap:" - " wrong virtual address"); - } - bpa += (bsh & PGOFSET); -#else - bpa = pmap_extract(pmap_kernel(), va) + (bsh & PGOFSET); -#endif - - pmap_kremove(va, endva - va); - /* - * Free the kernel virtual mapping. - */ - uvm_km_free(kernel_map, va, endva - va); - } - } else { - panic("_x86_memio_unmap: bad bus space tag"); - } - - if (adrp != NULL) { - *adrp = bpa; - } -} - -void -x86_memio_unmap(t, bsh, size) - bus_space_tag_t t; - bus_space_handle_t bsh; - bus_size_t size; -{ - struct extent *ex; - u_long va, endva; - bus_addr_t bpa; - - /* - * Find the correct extent and bus physical address. - */ - if (t == X86_BUS_SPACE_IO) { - ex = ioport_ex; - bpa = bsh; - } else if (t == X86_BUS_SPACE_MEM) { - ex = iomem_ex; - - if (bsh >= atdevbase && - (bsh + size) <= (atdevbase + IOM_SIZE)) { - bpa = (bus_addr_t)ISA_PHYSADDR(bsh); - goto ok; - } - - va = x86_trunc_page(bsh); - endva = x86_round_page(bsh + size); - -#ifdef DIAGNOSTIC - if (endva <= va) - panic("x86_memio_unmap: overflow"); -#endif - - (void) pmap_extract(pmap_kernel(), va, &bpa); - bpa += (bsh & PGOFSET); - - pmap_kremove(va, endva - va); - /* - * Free the kernel virtual mapping. - */ - uvm_km_free(kernel_map, va, endva - va); - } else - panic("x86_memio_unmap: bad bus space tag"); - -ok: - if (extent_free(ex, bpa, size, - EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) { - printf("x86_memio_unmap: %s 0x%lx, size 0x%lx\n", - (t == X86_BUS_SPACE_IO) ? "port" : "pa", bpa, size); - printf("x86_memio_unmap: can't free region\n"); - } -} - -void -x86_memio_free(t, bsh, size) - bus_space_tag_t t; - bus_space_handle_t bsh; - bus_size_t size; -{ - - /* x86_memio_unmap() does all that we need to do. */ - x86_memio_unmap(t, bsh, size); -} - -int -x86_memio_subregion(t, bsh, offset, size, nbshp) - bus_space_tag_t t; - bus_space_handle_t bsh; - bus_size_t offset, size; - bus_space_handle_t *nbshp; -{ - - *nbshp = bsh + offset; - return (0); -} - -paddr_t -x86_memio_mmap(t, addr, off, prot, flags) - bus_space_tag_t t; - bus_addr_t addr; - off_t off; - int prot; - int flags; -{ - - /* Can't mmap I/O space. */ - if (t == X86_BUS_SPACE_IO) - return (-1); - - /* - * "addr" is the base address of the device we're mapping. - * "off" is the offset into that device. - * - * Note we are called for each "page" in the device that - * the upper layers want to map. - */ - return (x86_btop(addr + off)); -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/xen/clock.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/clock.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,273 +0,0 @@ -/* $NetBSD: clock.c,v 1.1.2.2 2004/07/17 16:43:56 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "opt_xen.h" - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: clock.c,v 1.1.2.2 2004/07/17 16:43:56 he Exp $"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/time.h> -#include <sys/kernel.h> -#include <sys/device.h> - -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/evtchn.h> -#include <machine/cpu_counter.h> - -#include <dev/clock_subr.h> - -#include "config_time.h" /* for CONFIG_TIME */ - -static int xen_timer_handler(void *, struct intrframe *); - -/* These are peridically updated in shared_info, and then copied here. */ -static uint64_t shadow_tsc_stamp; -static uint64_t shadow_system_time; -static unsigned long shadow_time_version; -static struct timeval shadow_tv; - -static int timeset; - -static uint64_t processed_system_time; - -#define NS_PER_TICK (1000000000ULL/hz) - -/* - * Reads a consistent set of time-base values from Xen, into a shadow data - * area. Must be called at splclock. - */ -static void -get_time_values_from_xen(void) -{ - do { - shadow_time_version = HYPERVISOR_shared_info->time_version2; - __insn_barrier(); - shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec; - shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec; - shadow_tsc_stamp = HYPERVISOR_shared_info->tsc_timestamp; - shadow_system_time = HYPERVISOR_shared_info->system_time; - __insn_barrier(); - } while (shadow_time_version != HYPERVISOR_shared_info->time_version1); -} - -static uint64_t -get_tsc_offset_ns(void) -{ - uint32_t tsc_delta; - struct cpu_info *ci = curcpu(); - - tsc_delta = cpu_counter32() - shadow_tsc_stamp; - return tsc_delta * 1000000000 / cpu_frequency(ci); -} - -void -inittodr(time_t base) -{ - int s; - - /* - * if the file system time is more than a year older than the - * kernel, warn and then set the base time to the CONFIG_TIME. - */ - if (base && base < (CONFIG_TIME-SECYR)) { - printf("WARNING: preposterous time in file system\n"); - base = CONFIG_TIME; - } - - s = splclock(); - get_time_values_from_xen(); - splx(s); - - time.tv_usec = shadow_tv.tv_usec; - time.tv_sec = shadow_tv.tv_sec + rtc_offset * 60; -#ifdef DEBUG_CLOCK - printf("readclock: %ld (%ld)\n", time.tv_sec, base); -#endif - if (base != 0 && base < time.tv_sec - 5*SECYR) - printf("WARNING: file system time much less than clock time\n"); - else if (base > time.tv_sec + 5*SECYR) { - printf("WARNING: clock time much less than file system time\n"); - printf("WARNING: using file system time\n"); - goto fstime; - } - - timeset = 1; - return; - -fstime: - timeset = 1; - time.tv_sec = base; - printf("WARNING: CHECK AND RESET THE DATE!\n"); -} - -void -resettodr() -{ -#ifdef DOM0OPS - dom0_op_t op; - int s; -#endif -#ifdef DEBUG_CLOCK - struct clock_ymdhms dt; -#endif - - /* - * We might have been called by boot() due to a crash early - * on. Don't reset the clock chip in this case. - */ - if (!timeset) - return; - -#ifdef DEBUG_CLOCK - clock_secs_to_ymdhms(time.tv_sec - rtc_offset * 60, &dt); - - printf("setclock: %d/%d/%d %02d:%02d:%02d\n", dt.dt_year, - dt.dt_mon, dt.dt_day, dt.dt_hour, dt.dt_min, dt.dt_sec); -#endif -#ifdef DOM0OPS - if (xen_start_info.dom_id == 0) { - s = splclock(); - - op.cmd = DOM0_SETTIME; - op.u.settime.secs = time.tv_sec - rtc_offset * 60; - op.u.settime.usecs = time.tv_usec; - op.u.settime.system_time = shadow_system_time; - HYPERVISOR_dom0_op(&op); - - splx(s); - } -#endif -} - -void -startrtclock() -{ - -} - -/* - * Wait approximately `n' microseconds. - */ -void -xen_delay(int n) -{ - uint64_t when; - - get_time_values_from_xen(); - when = shadow_system_time + n * 1000; - while (shadow_system_time < when) - get_time_values_from_xen(); -} - -void -xen_microtime(struct timeval *tv) -{ - - *tv = time; -} - -void -xen_initclocks() -{ - int irq = bind_virq_to_irq(VIRQ_TIMER); - - get_time_values_from_xen(); - processed_system_time = shadow_system_time; - - event_set_handler(irq, (int (*)(void *))xen_timer_handler, - NULL, IPL_CLOCK); - hypervisor_enable_irq(irq); -} - -static int -xen_timer_handler(void *arg, struct intrframe *regs) -{ - int64_t delta; - -#if defined(I586_CPU) || defined(I686_CPU) - static int microset_iter; /* call cc_microset once/sec */ - struct cpu_info *ci = curcpu(); - - /* - * If we have a cycle counter, do the microset thing. - */ - if (ci->ci_feature_flags & CPUID_TSC) { - if ( -#if defined(MULTIPROCESSOR) - CPU_IS_PRIMARY(ci) && -#endif - (microset_iter--) == 0) { - microset_iter = hz - 1; -#if defined(MULTIPROCESSOR) - x86_broadcast_ipi(X86_IPI_MICROSET); -#endif - cc_microset_time = time; - cc_microset(ci); - } - } -#endif - - get_time_values_from_xen(); - - delta = (int64_t)(shadow_system_time + get_tsc_offset_ns() - - processed_system_time); - while (delta >= NS_PER_TICK) { - hardclock(regs); - delta -= NS_PER_TICK; - processed_system_time += NS_PER_TICK; - } - - return 0; -} - -void -setstatclockrate(int arg) -{ -} - -void -idle_block(void) -{ - - /* - * We set the timer to when we expect the next timer - * interrupt. We could set the timer to later if we could - * easily find out when we will have more work (callouts) to - * process from hardclock. - */ - if (HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK) == 0) - HYPERVISOR_block(); -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/xen/ctrl_if.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/ctrl_if.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,550 +0,0 @@ -/****************************************************************************** - * ctrl_if.c - * - * Management functions for special interface to the domain controller. - * - * Copyright (c) 2004, K A Fraser - */ - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/malloc.h> - -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/ctrl_if.h> -#include <machine/evtchn.h> - -void printk(char *, ...); -#if 0 -#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a ) -#else -#define DPRINTK(_f, _a...) ((void)0) -#endif - -/* - * Only used by initial domain which must create its own control-interface - * event channel. This value is picked up by the user-space domain controller - * via an ioctl. - */ -int initdom_ctrlif_domcontroller_port = -1; - -/* static */ int ctrl_if_evtchn = -1; -static int ctrl_if_irq; -static struct simplelock ctrl_if_lock; - -static CONTROL_RING_IDX ctrl_if_tx_resp_cons; -static CONTROL_RING_IDX ctrl_if_rx_req_cons; - -/* Incoming message requests. */ - /* Primary message type -> message handler. */ -static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256]; - /* Primary message type -> callback in process context? */ -static unsigned long ctrl_if_rxmsg_blocking_context[256/sizeof(unsigned long)]; -#if 0 - /* Is it late enough during bootstrap to use schedule_task()? */ -static int safe_to_schedule_task; -#endif - /* Queue up messages to be handled in process context. */ -static ctrl_msg_t ctrl_if_rxmsg_deferred[CONTROL_RING_SIZE]; -static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_prod; -static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_cons; - -/* Incoming message responses: message identifier -> message handler/id. */ -static struct { - ctrl_msg_handler_t fn; - unsigned long id; -} ctrl_if_txmsg_id_mapping[CONTROL_RING_SIZE]; - -/* For received messages that must be deferred to process context. */ -static void __ctrl_if_rxmsg_deferred(void *unused); - -#ifdef notyet -/* Deferred callbacks for people waiting for space in the transmit ring. */ -static int DECLARE_TASK_QUEUE(ctrl_if_tx_tq); -#endif - -static void *ctrl_if_softintr = NULL; - -static int ctrl_if_tx_wait; -static void __ctrl_if_tx_tasklet(unsigned long data); - -static void __ctrl_if_rx_tasklet(unsigned long data); - -#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048)) -#define TX_FULL(_c) \ - (((_c)->tx_req_prod - ctrl_if_tx_resp_cons) == CONTROL_RING_SIZE) - -static void ctrl_if_notify_controller(void) -{ - hypervisor_notify_via_evtchn(ctrl_if_evtchn); -} - -static void ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id) -{ - msg->length = 0; - ctrl_if_send_response(msg); -} - -static void __ctrl_if_tx_tasklet(unsigned long data) -{ - control_if_t *ctrl_if = get_ctrl_if(); - ctrl_msg_t *msg; - int was_full = TX_FULL(ctrl_if); - CONTROL_RING_IDX rp; - - rp = ctrl_if->tx_resp_prod; - __insn_barrier(); /* Ensure we see all requests up to 'rp'. */ - - while ( ctrl_if_tx_resp_cons != rp ) - { - msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if_tx_resp_cons)]; - - DPRINTK("Rx-Rsp %u/%u :: %d/%d\n", - ctrl_if_tx_resp_cons, - ctrl_if->tx_resp_prod, - msg->type, msg->subtype); - - /* Execute the callback handler, if one was specified. */ - if ( msg->id != 0xFF ) - { - (*ctrl_if_txmsg_id_mapping[msg->id].fn)( - msg, ctrl_if_txmsg_id_mapping[msg->id].id); - __insn_barrier(); /* Execute, /then/ free. */ - ctrl_if_txmsg_id_mapping[msg->id].fn = NULL; - } - - /* - * Step over the message in the ring /after/ finishing reading it. As - * soon as the index is updated then the message may get blown away. - */ - __insn_barrier(); - ctrl_if_tx_resp_cons++; - } - - if ( was_full && !TX_FULL(ctrl_if) ) - { - wakeup(&ctrl_if_tx_wait); -#ifdef notyet - run_task_queue(&ctrl_if_tx_tq); -#endif - } -} - -static void __ctrl_if_rxmsg_deferred(void *unused) -{ - ctrl_msg_t *msg; - CONTROL_RING_IDX dp; - - dp = ctrl_if_rxmsg_deferred_prod; - __insn_barrier(); /* Ensure we see all deferred requests up to 'dp'. */ - - while ( ctrl_if_rxmsg_deferred_cons != dp ) - { - msg = &ctrl_if_rxmsg_deferred[ - MASK_CONTROL_IDX(ctrl_if_rxmsg_deferred_cons)]; - (*ctrl_if_rxmsg_handler[msg->type])(msg, 0); - ctrl_if_rxmsg_deferred_cons++; - } -} - -static void __ctrl_if_rx_tasklet(unsigned long data) -{ - control_if_t *ctrl_if = get_ctrl_if(); - ctrl_msg_t msg, *pmsg; - CONTROL_RING_IDX rp, dp; - - dp = ctrl_if_rxmsg_deferred_prod; - rp = ctrl_if->rx_req_prod; - __insn_barrier(); /* Ensure we see all requests up to 'rp'. */ - - while ( ctrl_if_rx_req_cons != rp ) - { - pmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons)]; - memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg)); - - DPRINTK("Rx-Req %u/%u :: %d/%d\n", - ctrl_if_rx_req_cons-1, - ctrl_if->rx_req_prod, - msg.type, msg.subtype); - - if ( msg.length != 0 ) - memcpy(msg.msg, pmsg->msg, msg.length); - - if ( x86_atomic_test_bit( - (unsigned long *)&ctrl_if_rxmsg_blocking_context, - msg.type) ) - memcpy(&ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(dp++)], - &msg, offsetof(ctrl_msg_t, msg) + msg.length); - else - (*ctrl_if_rxmsg_handler[msg.type])(&msg, 0); - - ctrl_if_rx_req_cons++; - } - - if ( dp != ctrl_if_rxmsg_deferred_prod ) - { - __insn_barrier(); - ctrl_if_rxmsg_deferred_prod = dp; - if (ctrl_if_softintr) - softintr_schedule(ctrl_if_softintr); - } -} - -static int ctrl_if_interrupt(void *arg) -{ - control_if_t *ctrl_if = get_ctrl_if(); - - if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod ) - __ctrl_if_tx_tasklet(0); - - if ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod ) - __ctrl_if_rx_tasklet(0); - - return 0; -} - -int -ctrl_if_send_message_noblock( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id) -{ - control_if_t *ctrl_if = get_ctrl_if(); - unsigned long flags; - int i; - int s; - - save_and_cli(flags); - simple_lock(&ctrl_if_lock); - - if ( TX_FULL(ctrl_if) ) - { - simple_unlock(&ctrl_if_lock); - restore_flags(flags); - s = splhigh(); - if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod ) - __ctrl_if_tx_tasklet(0); - splx(s); - return EAGAIN; - } - - msg->id = 0xFF; - if ( hnd != NULL ) - { - for ( i = 0; ctrl_if_txmsg_id_mapping[i].fn != NULL; i++ ) - continue; - ctrl_if_txmsg_id_mapping[i].fn = hnd; - ctrl_if_txmsg_id_mapping[i].id = id; - msg->id = i; - } - - DPRINTK("Tx-Req %u/%u :: %d/%d\n", - ctrl_if->tx_req_prod, - ctrl_if_tx_resp_cons, - msg->type, msg->subtype); - - memcpy(&ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if->tx_req_prod)], - msg, sizeof(*msg)); - __insn_barrier(); /* Write the message before letting the controller peek at it. */ - ctrl_if->tx_req_prod++; - - simple_unlock(&ctrl_if_lock); - restore_flags(flags); - - ctrl_if_notify_controller(); - - return 0; -} - -int -ctrl_if_send_message_block( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id, - long wait_state) -{ - int rc; - - while ((rc = ctrl_if_send_message_noblock(msg, hnd, id)) == EAGAIN) { - /* XXXcl possible race -> add a lock and ltsleep */ -#if 1 - HYPERVISOR_yield(); -#else - rc = tsleep((caddr_t) &ctrl_if_tx_wait, PUSER | PCATCH, - "ctrl_if", 0); - if (rc) - break; -#endif - } - - return rc; -} - -/* Allow a reponse-callback handler to find context of a blocked requester. */ -struct rsp_wait { - ctrl_msg_t *msg; /* Buffer for the response message. */ - struct task_struct *task; /* The task that is blocked on the response. */ - int done; /* Indicate to 'task' that response is rcv'ed. */ -}; - -static void __ctrl_if_get_response(ctrl_msg_t *msg, unsigned long id) -{ - struct rsp_wait *wait = (struct rsp_wait *)id; - - memcpy(wait->msg, msg, sizeof(*msg)); - __insn_barrier(); - wait->done = 1; - - wakeup(wait); -} - -int -ctrl_if_send_message_and_get_response( - ctrl_msg_t *msg, - ctrl_msg_t *rmsg, - long wait_state) -{ - struct rsp_wait wait; - int rc; - - wait.msg = rmsg; - wait.done = 0; - - if ( (rc = ctrl_if_send_message_block(msg, __ctrl_if_get_response, - (unsigned long)&wait, - wait_state)) != 0 ) - return rc; - - for ( ; ; ) - { - if ( wait.done ) - break; - tsleep((caddr_t)&wait, PUSER | PCATCH, "ctrl_if", 0); - } - - return 0; -} - -#ifdef notyet -int -ctrl_if_enqueue_space_callback( - struct tq_struct *task) -{ - control_if_t *ctrl_if = get_ctrl_if(); - - /* Fast path. */ - if ( !TX_FULL(ctrl_if) ) - return 0; - - (void)queue_task(task, &ctrl_if_tx_tq); - - /* - * We may race execution of the task queue, so return re-checked status. If - * the task is not executed despite the ring being non-full then we will - * certainly return 'not full'. - */ - __insn_barrier(); - return TX_FULL(ctrl_if); -} -#endif - -void -ctrl_if_send_response( - ctrl_msg_t *msg) -{ - control_if_t *ctrl_if = get_ctrl_if(); - unsigned long flags; - ctrl_msg_t *dmsg; - - /* - * NB. The response may the original request message, modified in-place. - * In this situation we may have src==dst, so no copying is required. - */ - save_and_cli(flags); - simple_lock(&ctrl_if_lock); - - DPRINTK("Tx-Rsp %u :: %d/%d\n", - ctrl_if->rx_resp_prod, - msg->type, msg->subtype); - - dmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if->rx_resp_prod)]; - if ( dmsg != msg ) - memcpy(dmsg, msg, sizeof(*msg)); - - __insn_barrier(); /* Write the message before letting the controller peek at it. */ - ctrl_if->rx_resp_prod++; - - simple_unlock(&ctrl_if_lock); - restore_flags(flags); - - ctrl_if_notify_controller(); -} - -int -ctrl_if_register_receiver( - uint8_t type, - ctrl_msg_handler_t hnd, - unsigned int flags) -{ - unsigned long _flags; - int inuse; - - save_and_cli(_flags); - simple_lock(&ctrl_if_lock); - - inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler); - - if ( inuse ) - { - printf("Receiver %p already established for control " - "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type); - } - else - { - ctrl_if_rxmsg_handler[type] = hnd; - x86_atomic_clear_bit((unsigned long *)&ctrl_if_rxmsg_blocking_context, type); - if ( flags == CALLBACK_IN_BLOCKING_CONTEXT ) - { - x86_atomic_set_bit((unsigned long *)&ctrl_if_rxmsg_blocking_context, type); -#if 0 - if ( !safe_to_schedule_task ) - BUG(); -#endif - } - } - - simple_unlock(&ctrl_if_lock); - restore_flags(_flags); - - return !inuse; -} - -void -ctrl_if_unregister_receiver( - uint8_t type, - ctrl_msg_handler_t hnd) -{ - unsigned long flags; - - save_and_cli(flags); - simple_lock(&ctrl_if_lock); - - if ( ctrl_if_rxmsg_handler[type] != hnd ) - printf("Receiver %p is not registered for control " - "messages of type %d.\n", hnd, type); - else - ctrl_if_rxmsg_handler[type] = ctrl_if_rxmsg_default_handler; - - simple_unlock(&ctrl_if_lock); - restore_flags(flags); - - /* Ensure that @hnd will not be executed after this function returns. */ - if (ctrl_if_softintr) - softintr_schedule(ctrl_if_softintr); -} - -static void -ctrl_if_softintr_handler(void *arg) -{ - - if ( ctrl_if_rxmsg_deferred_cons != ctrl_if_rxmsg_deferred_prod ) - __ctrl_if_rxmsg_deferred(NULL); -} - -#ifdef notyet -void ctrl_if_suspend(void) -{ - free_irq(ctrl_if_irq, NULL); - unbind_evtchn_from_irq(ctrl_if_evtchn); -} -#endif - -void ctrl_if_resume(void) -{ - control_if_t *ctrl_if = get_ctrl_if(); - - if ( xen_start_info.flags & SIF_INITDOMAIN ) - { - /* - * The initial domain must create its own domain-controller link. - * The controller is probably not running at this point, but will - * pick up its end of the event channel from - */ - evtchn_op_t op; - op.cmd = EVTCHNOP_bind_interdomain; - op.u.bind_interdomain.dom1 = DOMID_SELF; - op.u.bind_interdomain.dom2 = DOMID_SELF; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("EVTCHNOP_bind_interdomain"); - xen_start_info.domain_controller_evtchn = op.u.bind_interdomain.port1; - initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2; - } - - /* Sync up with shared indexes. */ - ctrl_if_tx_resp_cons = ctrl_if->tx_resp_prod; - ctrl_if_rx_req_cons = ctrl_if->rx_resp_prod; - - ctrl_if_evtchn = xen_start_info.domain_controller_evtchn; - ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn); - - event_set_handler(ctrl_if_irq, &ctrl_if_interrupt, NULL, IPL_HIGH); - hypervisor_enable_irq(ctrl_if_irq); -} - -void ctrl_if_early_init(void) -{ - - simple_lock_init(&ctrl_if_lock); - - ctrl_if_evtchn = xen_start_info.domain_controller_evtchn; -} - -void ctrl_if_init(void) -{ - int i; - - for ( i = 0; i < 256; i++ ) - ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler; - - if (ctrl_if_evtchn == -1) - ctrl_if_early_init(); - - ctrl_if_softintr = softintr_establish(IPL_SOFTNET, - ctrl_if_softintr_handler, NULL); - - ctrl_if_resume(); -} - - -#if 0 -/* This is called after it is safe to call schedule_task(). */ -static int __init ctrl_if_late_setup(void) -{ - safe_to_schedule_task = 1; - return 0; -} -__initcall(ctrl_if_late_setup); -#endif - - -/* - * !! The following are DANGEROUS FUNCTIONS !! - * Use with care [for example, see xencons_force_flush()]. - */ - -int ctrl_if_transmitter_empty(void) -{ - return (get_ctrl_if()->tx_req_prod == ctrl_if_tx_resp_cons); -} - -void ctrl_if_discard_responses(void) -{ - ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod; -} - diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,367 +0,0 @@ -/* $NetBSD$ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * Copyright (c) 2004, K A Fraser. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/malloc.h> -#include <sys/reboot.h> - -#include <uvm/uvm.h> - -#include <machine/intrdefs.h> - -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/evtchn.h> -#include <machine/ctrl_if.h> -#include <machine/xenfunc.h> - -struct pic xenev_pic = { - .pic_dev = { - .dv_xname = "xen_fakepic", - }, - .pic_type = PIC_XEN, - .pic_lock = __SIMPLELOCK_UNLOCKED, -}; - -/* - * This lock protects updates to the following mapping and reference-count - * arrays. The lock does not need to be acquired to read the mapping tables. - */ -static struct simplelock irq_mapping_update_lock = SIMPLELOCK_INITIALIZER; - -/* IRQ <-> event-channel mappings. */ -int evtchn_to_irq[NR_EVENT_CHANNELS]; -int irq_to_evtchn[NR_IRQS]; - -/* IRQ <-> VIRQ mapping. */ -static int virq_to_irq[NR_VIRQS]; - -/* Reference counts for bindings to IRQs. */ -static int irq_bindcount[NR_IRQS]; - -#if 0 -static int xen_die_handler(void *); -#endif -static int xen_debug_handler(void *); - -void -events_default_setup() -{ - int i; - - /* No VIRQ -> IRQ mappings. */ - for (i = 0; i < NR_VIRQS; i++) - virq_to_irq[i] = -1; - - /* No event-channel -> IRQ mappings. */ - for (i = 0; i < NR_EVENT_CHANNELS; i++) { - evtchn_to_irq[i] = -1; - hypervisor_mask_event(i); /* No event channels are 'live' right now. */ - } - - /* No IRQ -> event-channel mappings. */ - for (i = 0; i < NR_IRQS; i++) - irq_to_evtchn[i] = -1; -} - -void -init_events() -{ - int irq; - - irq = bind_virq_to_irq(VIRQ_DEBUG); - event_set_handler(irq, &xen_debug_handler, NULL, IPL_DEBUG); - hypervisor_enable_irq(irq); - - /* This needs to be done early, but after the IRQ subsystem is - * alive. */ - ctrl_if_init(); - - enable_intr(); /* at long last... */ -} - -unsigned int -do_event(int irq, struct intrframe *regs) -{ - struct cpu_info *ci; - int ilevel; - struct intrhand *ih; - int (*ih_fun)(void *, void *); - extern struct uvmexp uvmexp; - - if (irq >= NR_IRQS) { -#ifdef DIAGNOSTIC - printf("event irq number %d > NR_IRQS\n", irq); -#endif - return ENOENT; - } - - if (0 && irq == 4) { - ci = &cpu_info_primary; - printf("do_event %d/%d called, ilevel %d\n", irq, - irq_to_evtchn[irq], ci->ci_ilevel); - } - - ci = &cpu_info_primary; - - hypervisor_acknowledge_irq(irq); - if (ci->ci_isources[irq] == NULL) { - hypervisor_enable_irq(irq); - return 0; - } - ilevel = ci->ci_ilevel; - if (ci->ci_isources[irq]->is_maxlevel <= ilevel) { - ci->ci_ipending |= 1 << irq; - /* leave masked */ - return 0; - } - uvmexp.intrs++; - ci->ci_isources[irq]->is_evcnt.ev_count++; - ci->ci_ilevel = ci->ci_isources[irq]->is_maxlevel; - /* sti */ - ci->ci_idepth++; -#ifdef MULTIPROCESSOR - x86_intlock(regs); -#endif - ih = ci->ci_isources[irq]->is_handlers; - while (ih != NULL) { - if (ih->ih_level <= ilevel) { -#ifdef MULTIPROCESSOR - x86_intunlock(regs); -#endif - ci->ci_ipending |= 1 << irq; - /* leave masked */ - ci->ci_idepth--; - splx(ilevel); - return 0; - } - ci->ci_ilevel = ih->ih_level; - ih_fun = (void *)ih->ih_fun; - ih_fun(ih->ih_arg, regs); - ih = ih->ih_next; - } -#ifdef MULTIPROCESSOR - x86_intunlock(regs); -#endif - hypervisor_enable_irq(irq); - ci->ci_idepth--; - splx(ilevel); - - if (0 && irq == 4) - printf("do_event %d done, ipending %08x\n", irq, - ci->ci_ipending); - - return 0; -} - -static int -find_unbound_irq(void) -{ - int irq; - - for (irq = 0; irq < NR_IRQS; irq++) - if (irq_bindcount[irq] == 0) - break; - - if (irq == NR_IRQS) - panic("No available IRQ to bind to: increase NR_IRQS!\n"); - - return irq; -} - -int -bind_virq_to_irq(int virq) -{ - evtchn_op_t op; - int evtchn, irq; - - simple_lock(&irq_mapping_update_lock); - - irq = virq_to_irq[virq]; - if (irq == -1) { - op.cmd = EVTCHNOP_bind_virq; - op.u.bind_virq.virq = virq; - if (HYPERVISOR_event_channel_op(&op) != 0) - panic("Failed to bind virtual IRQ %d\n", virq); - evtchn = op.u.bind_virq.port; - - irq = find_unbound_irq(); - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - - virq_to_irq[virq] = irq; - } - - irq_bindcount[irq]++; - - simple_unlock(&irq_mapping_update_lock); - - return irq; -} - -void -unbind_virq_from_irq(int virq) -{ - evtchn_op_t op; - int irq = virq_to_irq[virq]; - int evtchn = irq_to_evtchn[irq]; - - simple_lock(&irq_mapping_update_lock); - - irq_bindcount[irq]--; - if (irq_bindcount[irq] == 0) { - op.cmd = EVTCHNOP_close; - op.u.close.dom = DOMID_SELF; - op.u.close.port = evtchn; - if (HYPERVISOR_event_channel_op(&op) != 0) - panic("Failed to unbind virtual IRQ %d\n", virq); - - evtchn_to_irq[evtchn] = -1; - irq_to_evtchn[irq] = -1; - virq_to_irq[virq] = -1; - } - - simple_unlock(&irq_mapping_update_lock); -} - -int bind_evtchn_to_irq(int evtchn) -{ - int irq; - - simple_lock(&irq_mapping_update_lock); - - irq = evtchn_to_irq[evtchn]; - if (irq == -1) { - irq = find_unbound_irq(); - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - } - - irq_bindcount[irq]++; - - simple_unlock(&irq_mapping_update_lock); - - return irq; -} - -int -event_set_handler(int irq, ev_handler_t handler, void *arg, int level) -{ - struct intrsource *isp; - struct intrhand *ih; - struct cpu_info *ci; - - if (irq >= NR_IRQS) { -#ifdef DIAGNOSTIC - printf("irq number %d > NR_IRQS\n", irq); -#endif - return ENOENT; - } - -#if 0 - printf("event_set_handler irq %d/%d handler %p level %d\n", irq, - irq_to_evtchn[irq], handler, level); -#endif - /* XXXcl handle already bound irq */ - - MALLOC(isp, struct intrsource *, sizeof (struct intrsource), M_DEVBUF, - M_WAITOK|M_ZERO); - if (isp == NULL) - panic("can't allocate fixed interrupt source"); - MALLOC(ih, struct intrhand *, sizeof (struct intrhand), M_DEVBUF, - M_WAITOK|M_ZERO); - if (ih == NULL) - panic("can't allocate fixed interrupt source"); - - ci = &cpu_info_primary; - - isp->is_recurse = xenev_stubs[irq].ist_recurse; - isp->is_resume = xenev_stubs[irq].ist_resume; - ih->ih_level = level; - ih->ih_fun = handler; - ih->ih_arg = arg; - ih->ih_next = NULL; - isp->is_handlers = ih; - isp->is_pic = &xenev_pic; - ci->ci_isources[irq] = isp; - evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL, - ci->ci_dev->dv_xname, "xenev"); - - intr_calculatemasks(ci); - - return 0; -} - -void hypervisor_enable_irq(unsigned int irq) -{ - - hypervisor_unmask_event(irq_to_evtchn[irq]); -} - -void hypervisor_disable_irq(unsigned int irq) -{ - - hypervisor_mask_event(irq_to_evtchn[irq]); -} - -void hypervisor_acknowledge_irq(unsigned int irq) -{ - - hypervisor_mask_event(irq_to_evtchn[irq]); - hypervisor_clear_event(irq_to_evtchn[irq]); -} - -#if 0 -static int -xen_die_handler(void *arg) -{ - printf("hypervisor: DIE event received...\n"); - cpu_reboot(0, NULL); - /* NOTREACHED */ - return 0; -} -#endif - -static int -xen_debug_handler(void *arg) -{ - printf("debug event\n"); - return 0; -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/xen/hypervisor.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/hypervisor.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,242 +0,0 @@ -/* $NetBSD: hypervisor.c,v 1.7.2.1 2004/05/22 15:58:54 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: hypervisor.c,v 1.7.2.1 2004/05/22 15:58:54 he Exp $"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/device.h> - -#include "xencons.h" -#include "xennet.h" -#include "xbd.h" -#include "xenkbc.h" -#include "vga_xen.h" -#include "npx.h" - -#include "opt_xen.h" - -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/evtchn.h> - -#include <sys/dirent.h> -#include <sys/stat.h> -#include <sys/tree.h> -#include <sys/vnode.h> -#include <miscfs/specfs/specdev.h> -#include <miscfs/kernfs/kernfs.h> -#include <machine/kernfs_machdep.h> - -#if NXENNET > 0 -#include <net/if.h> -#include <net/if_ether.h> -#include <net/if_media.h> -#include <machine/if_xennetvar.h> -#endif - -#if NXBD > 0 -#include <sys/buf.h> -#include <sys/disk.h> -#include <dev/dkvar.h> -#include <machine/xbdvar.h> -#endif - -#if NXENKBC > 0 -#include <dev/pckbport/pckbportvar.h> -#include <machine/xenkbcvar.h> -#endif - -#if NVGA_XEN > 0 -#include <machine/bus.h> -#include <machine/vga_xenvar.h> -#endif - -int hypervisor_match(struct device *, struct cfdata *, void *); -void hypervisor_attach(struct device *, struct device *, void *); - -CFATTACH_DECL(hypervisor, sizeof(struct device), - hypervisor_match, hypervisor_attach, NULL, NULL); - -int hypervisor_print(void *, const char *); - -union hypervisor_attach_cookie { - const char *hac_device; /* first elem of all */ -#if NXENKBC > 0 - struct xenkbc_attach_args hac_xenkbc; -#endif -#if NVGA_XEN > 0 - struct xen_vga_attach_args hac_vga_xen; -#endif -#if NXENCONS > 0 - struct xencons_attach_args hac_xencons; -#endif -#if NXENNET > 0 - struct xennet_attach_args hac_xennet; -#endif -#if NXBD > 0 - struct xbd_attach_args hac_xbd; -#endif -#if NNPX > 0 - struct xen_npx_attach_args hac_xennpx; -#endif -}; - - -/* - * Probe for the hypervisor; always succeeds. - */ -int -hypervisor_match(parent, match, aux) - struct device *parent; - struct cfdata *match; - void *aux; -{ - struct hypervisor_attach_args *haa = aux; - - if (strcmp(haa->haa_busname, "hypervisor") == 0) - return 1; - return 0; -} - -static void -scan_finish(struct device *parent) -{ - -#if NXENNET > 0 - xennet_scan_finish(parent); -#endif -#if NXBD > 0 - xbd_scan_finish(parent); -#endif -} - -/* - * Attach the hypervisor. - */ -void -hypervisor_attach(parent, self, aux) - struct device *parent, *self; - void *aux; -{ - union hypervisor_attach_cookie hac; - - printf("\n"); - - init_events(); - -#if NXENKBC > 0 - hac.hac_xenkbc.xa_device = "xenkbc"; - config_found(self, &hac.hac_xenkbc, hypervisor_print); -#endif - -#if NVGA_XEN > 0 - hac.hac_vga_xen.xa_device = "vga_xen"; - hac.hac_vga_xen.xa_iot = X86_BUS_SPACE_IO; - hac.hac_vga_xen.xa_memt = X86_BUS_SPACE_MEM; - config_found(self, &hac.hac_vga_xen, hypervisor_print); -#endif - -#if NXENCONS > 0 - hac.hac_xencons.xa_device = "xencons"; - config_found(self, &hac.hac_xencons, hypervisor_print); -#endif -#if NXENNET > 0 - hac.hac_xennet.xa_device = "xennet"; - xennet_scan(self, &hac.hac_xennet, hypervisor_print); -#endif -#if NXBD > 0 - hac.hac_xbd.xa_device = "xbd"; - xbd_scan(self, &hac.hac_xbd, hypervisor_print); -#endif -#if NNPX > 0 - hac.hac_xennpx.xa_device = "npx"; - config_found(self, &hac.hac_xennpx, hypervisor_print); -#endif - xenkernfs_init(); -#ifdef DOM0OPS - if (xen_start_info.flags & SIF_PRIVILEGED) { - xenprivcmd_init(); - xenmachmem_init(); - xenvfr_init(); - } -#endif -#if NXENNET > 0 || NXBD > 0 - config_interrupts(self, scan_finish); -#endif -} - -int -hypervisor_print(aux, parent) - void *aux; - const char *parent; -{ - union hypervisor_attach_cookie *hac = aux; - - if (parent) - aprint_normal("%s at %s", hac->hac_device, parent); - return (UNCONF); -} - -void -hypervisor_notify_via_evtchn(unsigned int port) -{ - evtchn_op_t op; - - op.cmd = EVTCHNOP_send; - op.u.send.local_port = port; - (void)HYPERVISOR_event_channel_op(&op); -} - -#define READ_MODE (S_IRUSR|S_IRGRP|S_IROTH) -#define DIR_MODE (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) - -kernfs_parentdir_t *kernxen_pkt; - -void -xenkernfs_init() -{ - kernfs_entry_t *dkt; - - KERNFS_ALLOCENTRY(dkt, M_TEMP, M_WAITOK); - KERNFS_INITENTRY(dkt, DT_DIR, "xen", NULL, KFSsubdir, VDIR, DIR_MODE); - kernfs_addentry(NULL, dkt); - kernxen_pkt = KERNFS_ENTOPARENTDIR(dkt); - - KERNFS_ALLOCENTRY(dkt, M_TEMP, M_WAITOK); - KERNFS_INITENTRY(dkt, DT_REG, "cmdline", xen_start_info.cmd_line, - KFSstring, VREG, READ_MODE); - kernfs_addentry(kernxen_pkt, dkt); -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1284 +0,0 @@ -/* $NetBSD: if_xennet.c,v 1.1.2.1 2004/05/22 15:58:29 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: if_xennet.c,v 1.1.2.1 2004/05/22 15:58:29 he Exp $"); - -#include "opt_inet.h" -#include "rnd.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/mbuf.h> -#include <sys/syslog.h> -#include <sys/mount.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/device.h> -#include <sys/ioctl.h> -#include <sys/errno.h> -#if NRND > 0 -#include <sys/rnd.h> -#endif - -#include <net/if.h> -#include <net/if_types.h> -#include <net/if_dl.h> -#include <net/if_ether.h> - -#ifdef mediacode -#include <net/if_media.h> -#endif - -#ifdef INET -#include <netinet/in.h> -#include <netinet/if_inarp.h> -#include <netinet/in_systm.h> -#include <netinet/in_var.h> -#include <netinet/ip.h> -#endif - -#include <nfs/rpcv2.h> - -#include <nfs/nfsproto.h> -#include <nfs/nfs.h> -#include <nfs/nfsmount.h> -#include <nfs/nfsdiskless.h> - -#include "bpfilter.h" -#if NBPFILTER > 0 -#include <net/bpf.h> -#include <net/bpfdesc.h> -#endif - -#include <uvm/uvm_extern.h> -#include <uvm/uvm_page.h> - -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/evtchn.h> -#include <machine/ctrl_if.h> - -#include <machine/if_xennetvar.h> - -#ifdef DEBUG -#define XENNET_DEBUG -#endif -#if defined(XENNET_DEBUG) && !defined(DEBUG) -#define DEBUG -#endif -/* #define XENNET_DEBUG_DUMP */ - -#ifdef XENNET_DEBUG -#define XEDB_FOLLOW 0x01 -#define XEDB_INIT 0x02 -#define XEDB_EVENT 0x04 -#define XEDB_MBUF 0x08 -#define XEDB_MEM 0x10 -int xennet_debug = 0x0; -void printk(char *, ...); -#define DPRINTF(x) if (xennet_debug) printk x; -#define DPRINTFN(n,x) if (xennet_debug & (n)) printk x; -#else -#define DPRINTF(x) -#define DPRINTFN(n,x) -#endif -#define PRINTF(x) printf x; - -#ifdef XENNET_DEBUG_DUMP -static void xennet_hex_dump(unsigned char *, size_t, char *, int); -#endif - -int xennet_match (struct device *, struct cfdata *, void *); -void xennet_attach (struct device *, struct device *, void *); -static void xennet_ctrlif_rx(ctrl_msg_t *, unsigned long); -static int xennet_driver_count_connected(void); -static void xennet_driver_status_change(netif_fe_driver_status_t *); -static void xennet_interface_status_change(netif_fe_interface_status_t *); -static void xennet_tx_mbuf_free(struct mbuf *, caddr_t, size_t, void *); -static void xennet_rx_mbuf_free(struct mbuf *, caddr_t, size_t, void *); -static int xen_network_handler(void *); -static void network_tx_buf_gc(struct xennet_softc *); -static void network_alloc_rx_buffers(struct xennet_softc *); -static void network_alloc_tx_buffers(struct xennet_softc *); -void xennet_init(struct xennet_softc *); -void xennet_reset(struct xennet_softc *); -#ifdef mediacode -static int xennet_mediachange (struct ifnet *); -static void xennet_mediastatus(struct ifnet *, struct ifmediareq *); -#endif - -CFATTACH_DECL(xennet, sizeof(struct xennet_softc), - xennet_match, xennet_attach, NULL, NULL); - -#define TX_MAX_ENTRIES (NETIF_TX_RING_SIZE - 2) -#define RX_MAX_ENTRIES (NETIF_RX_RING_SIZE - 2) -#define TX_ENTRIES 128 -#define RX_ENTRIES 128 - -static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE]; -static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1]; -static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE]; - -/** Network interface info. */ -struct xennet_ctrl { - /** Number of interfaces. */ - int xc_interfaces; - /** Number of connected interfaces. */ - int xc_connected; - /** Error code. */ - int xc_err; - /** Driver status. */ - int xc_up; - - cfprint_t xc_cfprint; - struct device *xc_parent; -}; - -static struct xennet_ctrl netctrl = { -1, 0, 0 }; - -#ifdef mediacode -static int xennet_media[] = { - IFM_ETHER|IFM_AUTO, -}; -static int nxennet_media = (sizeof(xennet_media)/sizeof(xennet_media[0])); -#endif - - -static int -xennet_wait_for_interfaces(void) -{ - - while (netctrl.xc_interfaces != netctrl.xc_connected) - HYPERVISOR_yield(); - return 0; -} - -int -xennet_scan(struct device *self, struct xennet_attach_args *xneta, - cfprint_t print) -{ - ctrl_msg_t cmsg; - netif_fe_driver_status_t st; - - if ((xen_start_info.flags & SIF_INITDOMAIN) || - (xen_start_info.flags & SIF_NET_BE_DOMAIN)) - return 0; - - netctrl.xc_parent = self; - netctrl.xc_cfprint = print; - - printf("Initialising Xen virtual ethernet frontend driver.\n"); - - (void)ctrl_if_register_receiver(CMSG_NETIF_FE, xennet_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_NETIF_FE; - cmsg.subtype = CMSG_NETIF_FE_DRIVER_STATUS; - cmsg.length = sizeof(netif_fe_driver_status_t); - st.status = NETIF_DRIVER_STATUS_UP; - st.max_handle = 0; - memcpy(cmsg.msg, &st, sizeof(st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, 0); - - return 0; -} - -void -xennet_scan_finish(struct device *parent) -{ - int err; - - err = xennet_wait_for_interfaces(); - if (err) - ctrl_if_unregister_receiver(CMSG_NETIF_FE, xennet_ctrlif_rx); -} - -int -xennet_match(struct device *parent, struct cfdata *match, void *aux) -{ - struct xennet_attach_args *xa = (struct xennet_attach_args *)aux; - - if (strcmp(xa->xa_device, "xennet") == 0) - return 1; - return 0; -} - -void -xennet_attach(struct device *parent, struct device *self, void *aux) -{ - struct xennet_attach_args *xneta = (struct xennet_attach_args *)aux; - struct xennet_softc *sc = (struct xennet_softc *)self; - struct ifnet *ifp = &sc->sc_ethercom.ec_if; - int idx; - - aprint_normal(": Xen Virtual Network Interface\n"); - - sc->sc_ifno = xneta->xa_handle; - - /* Initialize ifnet structure. */ - memcpy(ifp->if_xname, sc->sc_dev.dv_xname, IFNAMSIZ); - ifp->if_softc = sc; - ifp->if_start = xennet_start; - ifp->if_ioctl = xennet_ioctl; - ifp->if_watchdog = xennet_watchdog; - ifp->if_flags = IFF_BROADCAST | IFF_NOTRAILERS; - -#ifdef mediacode - ifmedia_init(&sc->sc_media, 0, xennet_mediachange, - xennet_mediastatus); - for (idx = 0; idx < nxennet_media; idx++) - ifmedia_add(&sc->sc_media, xennet_media[idx], 0, NULL); - ifmedia_set(&sc->sc_media, xennet_media[0]); -#endif - - for (idx = 0; idx < NETIF_TX_RING_SIZE; idx++) - sc->sc_tx_bufa[idx].xb_next = idx + 1; - for (idx = 0; idx < NETIF_RX_RING_SIZE; idx++) - sc->sc_rx_bufa[idx].xb_next = idx + 1; -} - -static struct xennet_softc * -find_device(int handle) -{ - struct device *dv; - struct xennet_softc *xs = NULL; - - for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) { - if (dv->dv_cfattach == NULL || - dv->dv_cfattach->ca_attach != xennet_attach) - continue; - xs = (struct xennet_softc *)dv; - if (xs->sc_ifno == handle) - break; - } - return dv ? xs : NULL; -} - -static void -xennet_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - int respond = 1; - - DPRINTFN(XEDB_EVENT, ("> ctrlif_rx=%d\n", msg->subtype)); - switch (msg->subtype) { - case CMSG_NETIF_FE_INTERFACE_STATUS: - if (msg->length != sizeof(netif_fe_interface_status_t)) - goto error; - xennet_interface_status_change( - (netif_fe_interface_status_t *)&msg->msg[0]); - break; - - case CMSG_NETIF_FE_DRIVER_STATUS: - if (msg->length != sizeof(netif_fe_driver_status_t)) - goto error; - xennet_driver_status_change( - (netif_fe_driver_status_t *)&msg->msg[0]); - break; - - error: - default: - msg->length = 0; - break; - } - - if (respond) - ctrl_if_send_response(msg); -} - -static void -xennet_driver_status_change(netif_fe_driver_status_t *status) -{ - - DPRINTFN(XEDB_EVENT, ("xennet_driver_status_change(%d)\n", - status->status)); - - netctrl.xc_up = status->status; - xennet_driver_count_connected(); -} - -static int -xennet_driver_count_connected(void) -{ - struct device *dv; - struct xennet_softc *xs = NULL; - - netctrl.xc_interfaces = netctrl.xc_connected = 0; - for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) { - if (dv->dv_cfattach == NULL || - dv->dv_cfattach->ca_attach != xennet_attach) - continue; - xs = (struct xennet_softc *)dv; - netctrl.xc_interfaces++; - if (xs->sc_backend_state == BEST_CONNECTED) - netctrl.xc_connected++; - } - - return netctrl.xc_connected; -} - -static void -xennet_interface_status_change(netif_fe_interface_status_t *status) -{ - ctrl_msg_t cmsg; - netif_fe_interface_connect_t up; - struct xennet_softc *sc; - struct ifnet *ifp; - struct xennet_attach_args xneta; - - DPRINTFN(XEDB_EVENT, ("xennet_interface_status_change(%d,%d,%02x:%02x:%02x:%02x:%02x:%02x)\n", - status->status, - status->handle, - status->mac[0], status->mac[1], status->mac[2], - status->mac[3], status->mac[4], status->mac[5])); - - sc = find_device(status->handle); - if (sc == NULL) { - xneta.xa_device = "xennet"; - xneta.xa_handle = status->handle; - config_found(netctrl.xc_parent, &xneta, netctrl.xc_cfprint); - sc = find_device(status->handle); - if (sc == NULL) { - printf("Status change: invalid netif handle %u\n", - status->handle); - return; - } - } - ifp = &sc->sc_ethercom.ec_if; - - DPRINTFN(XEDB_EVENT, ("xennet_interface_status_change(%d,%p,%02x:%02x:%02x:%02x:%02x:%02x)\n", - status->handle, sc, - status->mac[0], status->mac[1], status->mac[2], - status->mac[3], status->mac[4], status->mac[5])); - - switch (status->status) { - case NETIF_INTERFACE_STATUS_CLOSED: - printf("Unexpected netif-CLOSED message in state %d\n", - sc->sc_backend_state); - break; - - case NETIF_INTERFACE_STATUS_DISCONNECTED: -#if 0 - if (sc->sc_backend_state != BEST_CLOSED) { - printk("Unexpected netif-DISCONNECTED message" - " in state %d\n", sc->sc_backend_state); - printk("Attempting to reconnect network interface\n"); - - /* Begin interface recovery. - * - * NB. Whilst we're recovering, we turn the - * carrier state off. We take measures to - * ensure that this device isn't used for - * anything. We also stop the queue for this - * device. Various different approaches - * (e.g. continuing to buffer packets) have - * been tested but don't appear to improve the - * overall impact on TCP connections. - * - * TODO: (MAW) Change the Xend<->Guest - * protocol so that a recovery is initiated by - * a special "RESET" message - disconnect - * could just mean we're not allowed to use - * this interface any more. - */ - - /* Stop old i/f to prevent errors whilst we - * rebuild the state. */ - spin_lock_irq(&np->tx_lock); - spin_lock(&np->rx_lock); - netif_stop_queue(dev); - np->backend_state = BEST_DISCONNECTED; - spin_unlock(&np->rx_lock); - spin_unlock_irq(&np->tx_lock); - - /* Free resources. */ - free_irq(np->irq, dev); - unbind_evtchn_from_irq(np->evtchn); - free_page((unsigned long)np->tx); - free_page((unsigned long)np->rx); - } -#endif - - if (sc->sc_backend_state == BEST_CLOSED) { - /* Move from CLOSED to DISCONNECTED state. */ - sc->sc_tx = (netif_tx_interface_t *) - uvm_km_valloc_align(kernel_map, PAGE_SIZE, PAGE_SIZE); - if (sc->sc_tx == NULL) - panic("netif: no tx va"); - sc->sc_rx = (netif_rx_interface_t *) - uvm_km_valloc_align(kernel_map, PAGE_SIZE, PAGE_SIZE); - if (sc->sc_rx == NULL) - panic("netif: no rx va"); - sc->sc_pg_tx = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); - if (sc->sc_pg_tx == NULL) { - panic("netif: no tx pages"); - } - pmap_kenter_pa((vaddr_t)sc->sc_tx, VM_PAGE_TO_PHYS(sc->sc_pg_tx), - VM_PROT_READ | VM_PROT_WRITE); - sc->sc_pg_rx = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); - if (sc->sc_pg_rx == NULL) { - panic("netif: no rx pages"); - } - pmap_kenter_pa((vaddr_t)sc->sc_rx, VM_PAGE_TO_PHYS(sc->sc_pg_rx), - VM_PROT_READ | VM_PROT_WRITE); - sc->sc_backend_state = BEST_DISCONNECTED; - } - - /* Construct an interface-CONNECT message for the - * domain controller. */ - cmsg.type = CMSG_NETIF_FE; - cmsg.subtype = CMSG_NETIF_FE_INTERFACE_CONNECT; - cmsg.length = sizeof(netif_fe_interface_connect_t); - up.handle = status->handle; - up.tx_shmem_frame = xpmap_ptom(VM_PAGE_TO_PHYS(sc->sc_pg_tx)) >> PAGE_SHIFT; - up.rx_shmem_frame = xpmap_ptom(VM_PAGE_TO_PHYS(sc->sc_pg_rx)) >> PAGE_SHIFT; - memcpy(cmsg.msg, &up, sizeof(up)); - - /* Tell the controller to bring up the interface. */ - ctrl_if_send_message_block(&cmsg, NULL, 0, 0); - break; - - case NETIF_INTERFACE_STATUS_CONNECTED: - if (sc->sc_backend_state == BEST_CLOSED) { - printf("Unexpected netif-CONNECTED message" - " in state %d\n", sc->sc_backend_state); - break; - } - - memcpy(sc->sc_enaddr, status->mac, ETHER_ADDR_LEN); -#if 0 - if (xen_start_info.flags & SIF_PRIVILEGED) { - /* XXX for domain-0 change out ethernet address to be - * different than the physical address since arp - * replies from other domains will report the physical - * address. - */ - if (sc->sc_enaddr[0] != 0xaa) - sc->sc_enaddr[0] = 0xaa; - else - sc->sc_enaddr[0] = 0xab; - } -#endif - - /* Recovery procedure: */ - - /* Step 1: Reinitialise variables. */ - sc->sc_rx_resp_cons = sc->sc_tx_resp_cons = /* sc->sc_tx_full = */ 0; - sc->sc_rx->event = sc->sc_tx->event = 1; - - /* Step 2: Rebuild the RX and TX ring contents. */ - network_alloc_rx_buffers(sc); - SLIST_INIT(&sc->sc_tx_bufs); - network_alloc_tx_buffers(sc); - - /* Step 3: All public and private state should now be - * sane. Get ready to start sending and receiving - * packets and give the driver domain a kick because - * we've probably just requeued some packets. - */ - sc->sc_backend_state = BEST_CONNECTED; - __insn_barrier(); - hypervisor_notify_via_evtchn(status->evtchn); - network_tx_buf_gc(sc); - - if_attach(ifp); - ether_ifattach(ifp, sc->sc_enaddr); - - sc->sc_evtchn = status->evtchn; - sc->sc_irq = bind_evtchn_to_irq(sc->sc_evtchn); - event_set_handler(sc->sc_irq, &xen_network_handler, sc, IPL_NET); - hypervisor_enable_irq(sc->sc_irq); - xennet_driver_count_connected(); - - aprint_normal("%s: MAC address %s\n", sc->sc_dev.dv_xname, - ether_sprintf(sc->sc_enaddr)); - -#if NRND > 0 - rnd_attach_source(&sc->sc_rnd_source, sc->sc_dev.dv_xname, - RND_TYPE_NET, 0); -#endif - break; - - default: - printf("Status change to unknown value %d\n", - status->status); - break; - } - DPRINTFN(XEDB_EVENT, ("xennet_interface_status_change()\n")); -} - -static void -xennet_tx_mbuf_free(struct mbuf *m, caddr_t buf, size_t size, void *arg) -{ - struct xennet_txbuf *txbuf = (struct xennet_txbuf *)arg; - - DPRINTFN(XEDB_MBUF, ("xennet_tx_mbuf_free %p pa %p\n", txbuf, - (void *)txbuf->xt_pa)); - SLIST_INSERT_HEAD(&txbuf->xt_sc->sc_tx_bufs, txbuf, xt_next); - pool_cache_put(&mbpool_cache, m); -} - -static void -xennet_rx_push_buffer(struct xennet_softc *sc, int id) -{ - NETIF_RING_IDX ringidx; - int nr_pfns; - - ringidx = sc->sc_rx->req_prod; - nr_pfns = 0; - - DPRINTFN(XEDB_MEM, ("readding page va %p pa %p ma %p/%p to rx_ring " - "at %d with id %d\n", - (void *)sc->sc_rx_bufa[id].xb_rx.xbrx_va, - (void *)sc->sc_rx_bufa[id].xb_rx.xbrx_pa, - (void *)(PTE_BASE[x86_btop - (sc->sc_rx_bufa[id].xb_rx.xbrx_va)] & - PG_FRAME), - (void *)xpmap_ptom(sc->sc_rx_bufa[id].xb_rx.xbrx_pa), - ringidx, id)); - - sc->sc_rx->ring[MASK_NETIF_RX_IDX(ringidx)].req.id = id; - - rx_pfn_array[nr_pfns] = xpmap_ptom(sc->sc_rx_bufa[id].xb_rx.xbrx_pa) - >> PAGE_SHIFT; - - /* Remove this page from pseudo phys map before - * passing back to Xen. */ - xpmap_phys_to_machine_mapping[(sc->sc_rx_bufa[id].xb_rx.xbrx_pa - XPMAP_OFFSET) >> PAGE_SHIFT] = - INVALID_P2M_ENTRY; - - rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping; - rx_mcl[nr_pfns].args[0] = sc->sc_rx_bufa[id].xb_rx.xbrx_va; - rx_mcl[nr_pfns].args[1] = 0; - rx_mcl[nr_pfns].args[2] = 0; - - nr_pfns++; - - sc->sc_rx_bufs_to_notify++; - - ringidx++; - - /* - * We may have allocated buffers which have entries - * outstanding in the page update queue -- make sure we flush - * those first! - */ - xpq_flush_queue(); - - /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; - - /* Give away a batch of pages. */ - rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op; - rx_mcl[nr_pfns].args[0] = MEMOP_decrease_reservation; - rx_mcl[nr_pfns].args[1] = (unsigned long)rx_pfn_array; - rx_mcl[nr_pfns].args[2] = (unsigned long)nr_pfns; - rx_mcl[nr_pfns].args[3] = 0; - rx_mcl[nr_pfns].args[4] = DOMID_SELF; - - /* Zap PTEs and give away pages in one big multicall. */ - (void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1); - - /* Check return status of HYPERVISOR_dom_mem_op(). */ - if ( rx_mcl[nr_pfns].result != nr_pfns ) - panic("Unable to reduce memory reservation\n"); - - /* Above is a suitable barrier to ensure backend will see requests. */ - sc->sc_rx->req_prod = ringidx; -} - -static void -xennet_rx_mbuf_free(struct mbuf *m, caddr_t buf, size_t size, void *arg) -{ - union xennet_bufarray *xb = (union xennet_bufarray *)arg; - struct xennet_softc *sc = xb->xb_rx.xbrx_sc; - int id = (xb - sc->sc_rx_bufa); - - DPRINTFN(XEDB_MBUF, ("xennet_rx_mbuf_free id %d, mbuf %p, buf %p, " - "size %d\n", id, m, buf, size)); - - xennet_rx_push_buffer(sc, id); - - pool_cache_put(&mbpool_cache, m); -} - -static int -xen_network_handler(void *arg) -{ - struct xennet_softc *sc = arg; - struct ifnet *ifp = &sc->sc_ethercom.ec_if; - netif_rx_response_t *rx; - paddr_t pa; - NETIF_RING_IDX ringidx; - mmu_update_t *mmu = rx_mmu; - multicall_entry_t *mcl = rx_mcl; - struct mbuf *m; - - network_tx_buf_gc(sc); - -#if NRND > 0 - rnd_add_uint32(&sc->sc_rnd_source, sc->sc_rx_resp_cons); -#endif - - again: - for (ringidx = sc->sc_rx_resp_cons; - ringidx != sc->sc_rx->resp_prod; - ringidx++) { - rx = &sc->sc_rx->ring[MASK_NETIF_RX_IDX(ringidx)].resp; - - if (rx->status < 0) - panic("rx->status < 0"); - /* XXXcl check rx->status for error */ - - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) { - printf("xennet: rx no mbuf\n"); - break; - } - - pa = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_pa; - - DPRINTFN(XEDB_EVENT, ("rx event %d for id %d, size %d, " - "status %d, ma %08lx, pa %08lx\n", ringidx, - rx->id, rx->status, rx->status, rx->addr, pa)); - - /* Remap the page. */ - mmu->ptr = (rx->addr & PG_FRAME) | MMU_MACHPHYS_UPDATE; - mmu->val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT; - mmu++; - mcl->op = __HYPERVISOR_update_va_mapping; - mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va; - mcl->args[1] = (rx->addr & PG_FRAME) | PG_V|PG_KW; - mcl->args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; // 0; - mcl++; - - xpmap_phys_to_machine_mapping - [(pa - XPMAP_OFFSET) >> PAGE_SHIFT] = - rx->addr >> PAGE_SHIFT; - - /* Do all the remapping work, and M->P updates, in one - * big hypercall. */ - if ((mcl - rx_mcl) != 0) { - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)rx_mmu; - mcl->args[1] = mmu - rx_mmu; - mcl->args[2] = 0; - mcl++; - (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); - } - if (0) - printf("page mapped at va %08lx -> %08x/%08lx\n", - sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va, - PTE_BASE[x86_btop(sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va)], - rx->addr); - mmu = rx_mmu; - mcl = rx_mcl; - - DPRINTFN(XEDB_MBUF, ("rx packet mbuf %p va %p pa %p/%p " - "ma %p\n", m, - (void *)sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va, - (void *)(xpmap_mtop(PTE_BASE[x86_btop - (sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va)] & PG_FRAME)), (void *)pa, - (void *)(PTE_BASE[x86_btop - (sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va)] & PG_FRAME))); - - m->m_len = m->m_pkthdr.len = rx->status; - m->m_pkthdr.rcvif = ifp; - if (sc->sc_rx->req_prod != sc->sc_rx->resp_prod) { - MEXTADD(m, (void *)(sc->sc_rx_bufa[rx->id].xb_rx. - xbrx_va + (rx->addr & PAGE_MASK)), rx->status, M_DEVBUF, - xennet_rx_mbuf_free, - &sc->sc_rx_bufa[rx->id]); - } else { - /* - * This was our last receive buffer, allocate - * memory, copy data and push the receive - * buffer back to the hypervisor. - */ - MEXTMALLOC(m, rx->status, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { - printf("xennet: rx no mbuf 2\n"); - m_free(m); - break; - } - memcpy(m->m_data, (void *)(sc->sc_rx_bufa[rx->id]. - xb_rx.xbrx_va + (rx->addr & PAGE_MASK)), rx->status); - xennet_rx_push_buffer(sc, rx->id); - } - -#ifdef XENNET_DEBUG_DUMP - xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "r", rx->id); -#endif - -#if NBPFILTER > 0 - /* - * Pass packet to bpf if there is a listener. - */ - if (ifp->if_bpf) - bpf_mtap(ifp->if_bpf, m); -#endif - - ifp->if_ipackets++; - - /* Pass the packet up. */ - (*ifp->if_input)(ifp, m); - } - - sc->sc_rx_resp_cons = ringidx; - sc->sc_rx->event = sc->sc_rx_resp_cons + 1; - - if (sc->sc_rx->resp_prod != ringidx) - goto again; - - return 0; -} - -static inline int -get_bufarray_entry(union xennet_bufarray *a) -{ - int idx; - - idx = a[0].xb_next; - a[0].xb_next = a[idx].xb_next; - return idx; -} - -static inline void -put_bufarray_entry(union xennet_bufarray *a, int idx) -{ - - a[idx].xb_next = a[0].xb_next; - a[0].xb_next = idx; -} - -static void -network_tx_buf_gc(struct xennet_softc *sc) -{ - struct ifnet *ifp = &sc->sc_ethercom.ec_if; - NETIF_RING_IDX idx, prod; - - do { - prod = sc->sc_tx->resp_prod; - - for (idx = sc->sc_tx_resp_cons; idx != prod; idx++) { - DPRINTFN(XEDB_EVENT, ("tx event at pos %d, status: " - "%d, id: %d, mbuf %p, buf %p\n", idx, - sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.status, - sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id, - sc->sc_tx_bufa[sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id].xb_tx.xbtx_m, - mtod(sc->sc_tx_bufa[sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id].xb_tx.xbtx_m, void *))); - m_freem(sc->sc_tx_bufa[sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id].xb_tx.xbtx_m); - put_bufarray_entry(sc->sc_tx_bufa, - sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].resp.id); - sc->sc_tx_entries--; /* atomic */ - } - - sc->sc_tx_resp_cons = prod; - - /* - * Set a new event, then check for race with update of - * tx_cons. - */ - sc->sc_tx->event = /* atomic */ - prod + (sc->sc_tx_entries >> 1) + 1; - __insn_barrier(); - } while (prod != sc->sc_tx->resp_prod); - - if (sc->sc_tx->resp_prod == sc->sc_tx->req_prod) - ifp->if_timer = 0; - /* KDASSERT(sc->sc_net_idx->tx_req_prod == */ - /* TX_RING_ADD(sc->sc_net_idx->tx_resp_prod, sc->sc_tx_entries)); */ -} - -static void -network_alloc_rx_buffers(struct xennet_softc *sc) -{ - vaddr_t rxpages, va; - paddr_t pa; - struct vm_page *pg; - int id, nr_pfns; - NETIF_RING_IDX ringidx; - int s; - - ringidx = sc->sc_rx->req_prod; - if ((ringidx - sc->sc_rx_resp_cons) > (RX_MAX_ENTRIES / 2)) - return; - - nr_pfns = 0; - - rxpages = uvm_km_valloc_align(kernel_map, RX_ENTRIES * PAGE_SIZE, - PAGE_SIZE); - - s = splnet(); - for (va = rxpages; va < rxpages + RX_ENTRIES * PAGE_SIZE; - va += PAGE_SIZE) { - pg = uvm_pagealloc(NULL, 0, NULL, 0); - if (pg == NULL) - panic("network_alloc_rx_buffers: no pages"); - pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ | VM_PROT_WRITE); - - id = get_bufarray_entry(sc->sc_rx_bufa); - sc->sc_rx_bufa[id].xb_rx.xbrx_va = va; - sc->sc_rx_bufa[id].xb_rx.xbrx_sc = sc; - - pa = VM_PAGE_TO_PHYS(pg); - DPRINTFN(XEDB_MEM, ("adding page va %p pa %p/%p " - "ma %p/%p to rx_ring at %d with id %d\n", (void *)va, - (void *)(VM_PAGE_TO_PHYS(pg) & PG_FRAME), (void *)xpmap_mtop(PTE_BASE[x86_btop(va)]), - (void *)(PTE_BASE[x86_btop(va)] & PG_FRAME), - (void *)xpmap_ptom(VM_PAGE_TO_PHYS(pg)), - ringidx, id)); - sc->sc_rx_bufa[id].xb_rx.xbrx_pa = pa; - sc->sc_rx->ring[MASK_NETIF_RX_IDX(ringidx)].req.id = id; - - rx_pfn_array[nr_pfns] = xpmap_ptom(pa) >> PAGE_SHIFT; - - /* Remove this page from pseudo phys map before - * passing back to Xen. */ - xpmap_phys_to_machine_mapping[(pa - XPMAP_OFFSET) >> PAGE_SHIFT] = - INVALID_P2M_ENTRY; - - rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping; - rx_mcl[nr_pfns].args[0] = va; - rx_mcl[nr_pfns].args[1] = 0; - rx_mcl[nr_pfns].args[2] = 0; - - nr_pfns++; - - sc->sc_rx_bufs_to_notify++; - - ringidx++; - if ((ringidx - sc->sc_rx_resp_cons) == RX_MAX_ENTRIES) - break; - } - - if (nr_pfns == 0) { - splx(s); - return; - } - - /* - * We may have allocated buffers which have entries - * outstanding in the page update queue -- make sure we flush - * those first! - */ - xpq_flush_queue(); - - /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; - - /* Give away a batch of pages. */ - rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op; - rx_mcl[nr_pfns].args[0] = MEMOP_decrease_reservation; - rx_mcl[nr_pfns].args[1] = (unsigned long)rx_pfn_array; - rx_mcl[nr_pfns].args[2] = (unsigned long)nr_pfns; - rx_mcl[nr_pfns].args[3] = 0; - rx_mcl[nr_pfns].args[4] = DOMID_SELF; - - /* Zap PTEs and give away pages in one big multicall. */ - (void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1); - - /* Check return status of HYPERVISOR_dom_mem_op(). */ - if (rx_mcl[nr_pfns].result != nr_pfns) - panic("Unable to reduce memory reservation\n"); - - /* Above is a suitable barrier to ensure backend will see requests. */ - sc->sc_rx->req_prod = ringidx; - - splx(s); - -} - -static void -network_alloc_tx_buffers(struct xennet_softc *sc) -{ - vaddr_t txpages, va; - struct vm_page *pg; - struct xennet_txbuf *txbuf; - int i; - - txpages = uvm_km_valloc_align(kernel_map, - (TX_ENTRIES / TXBUF_PER_PAGE) * PAGE_SIZE, PAGE_SIZE); - for (va = txpages; - va < txpages + (TX_ENTRIES / TXBUF_PER_PAGE) * PAGE_SIZE; - va += PAGE_SIZE) { - pg = uvm_pagealloc(NULL, 0, NULL, 0); - if (pg == NULL) - panic("network_alloc_tx_buffers: no pages"); - pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ | VM_PROT_WRITE); - - for (i = 0; i < TXBUF_PER_PAGE; i++) { - txbuf = (struct xennet_txbuf *) - (va + i * (PAGE_SIZE / TXBUF_PER_PAGE)); - txbuf->xt_sc = sc; - txbuf->xt_pa = VM_PAGE_TO_PHYS(pg) + - i * (PAGE_SIZE / TXBUF_PER_PAGE) + - sizeof(struct xennet_txbuf); - SLIST_INSERT_HEAD(&sc->sc_tx_bufs, txbuf, xt_next); - } - } -} - -/* - * Called at splnet. - */ -void -xennet_start(struct ifnet *ifp) -{ - struct xennet_softc *sc = ifp->if_softc; - struct mbuf *m, *new_m; - struct xennet_txbuf *txbuf; - netif_tx_request_t *txreq; - NETIF_RING_IDX idx; - paddr_t pa; - int bufid; - - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", sc->sc_dev.dv_xname)); - -#ifdef DIAGNOSTIC - IFQ_POLL(&ifp->if_snd, m); - if (m == 0) - panic("%s: No packet to start", sc->sc_dev.dv_xname); -#endif - -#if NRND > 0 - rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx->req_prod); -#endif - - if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING) - return; - - idx = sc->sc_tx->req_prod; - while (/*CONSTCOND*/1) { - - IFQ_POLL(&ifp->if_snd, m); - if (m == NULL) - break; - - switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) { - case M_EXT|M_EXT_CLUSTER: - pa = m->m_ext.ext_paddr + - (m->m_data - m->m_ext.ext_buf); - break; - default: - case 0: - pa = m->m_paddr + M_BUFOFFSET(m) + - (m->m_data - M_BUFADDR(m)); - break; - } - - if (m->m_pkthdr.len != m->m_len || - (pa ^ (pa + m->m_pkthdr.len)) & PG_FRAME) { - txbuf = SLIST_FIRST(&sc->sc_tx_bufs); - if (txbuf == NULL) { - // printf("xennet: no tx bufs\n"); - break; - } - - MGETHDR(new_m, M_DONTWAIT, MT_DATA); - if (new_m == NULL) { - printf("xennet: no mbuf\n"); - break; - } - - SLIST_REMOVE_HEAD(&sc->sc_tx_bufs, xt_next); - IFQ_DEQUEUE(&ifp->if_snd, m); - - KASSERT(m->m_flags & M_PKTHDR); - M_COPY_PKTHDR(new_m, m); - m_copydata(m, 0, m->m_pkthdr.len, txbuf->xt_buf); - MEXTADD(new_m, txbuf->xt_buf, m->m_pkthdr.len, - M_DEVBUF, xennet_tx_mbuf_free, txbuf); - new_m->m_ext.ext_paddr = txbuf->xt_pa; - new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len; - - m_freem(m); - m = new_m; - - pa = m->m_ext.ext_paddr + - (m->m_data - m->m_ext.ext_buf); - } else - IFQ_DEQUEUE(&ifp->if_snd, m); - - bufid = get_bufarray_entry(sc->sc_tx_bufa); - sc->sc_tx_bufa[bufid].xb_tx.xbtx_m = m; - - DPRINTFN(XEDB_MBUF, ("xennet_start id %d, mbuf %p, buf %p/%p, " - "size %d\n", bufid, m, mtod(m, void *), - (void *)pa, m->m_pkthdr.len)); -#ifdef XENNET_DEBUG_DUMP - xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s", bufid); -#endif - - txreq = &sc->sc_tx->ring[MASK_NETIF_TX_IDX(idx)].req; - txreq->id = bufid; - txreq->addr = xpmap_ptom(pa); - txreq->size = m->m_pkthdr.len; - - __insn_barrier(); - idx++; - sc->sc_tx->req_prod = idx; - - sc->sc_tx_entries++; /* XXX atomic */ - -#ifdef XENNET_DEBUG - DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, " - "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *), - (void *)*kvtopte(mtod(m, vaddr_t)), - (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))), - (void *)m->m_paddr, m->m_pkthdr.len, m->m_len)); -#endif - -#if NBPFILTER > 0 - /* - * Pass packet to bpf if there is a listener. - */ - if (ifp->if_bpf) - bpf_mtap(ifp->if_bpf, m); -#endif - } - - ifp->if_flags &= ~IFF_OACTIVE; - - network_tx_buf_gc(sc); - - __insn_barrier(); - if (sc->sc_tx->resp_prod != idx) - hypervisor_notify_via_evtchn(sc->sc_evtchn); - - ifp->if_timer = 5; - - ifp->if_opackets++; - - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n", - sc->sc_dev.dv_xname)); -} - -int -xennet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) -{ - struct xennet_softc *sc = ifp->if_softc; - struct ifaddr *ifa = (struct ifaddr *)data; -#ifdef mediacode - struct ifreq *ifr = (struct ifreq *)data; -#endif - int s, error = 0; - - s = splnet(); - - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n", sc->sc_dev.dv_xname)); - - switch(cmd) { - case SIOCSIFADDR: - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() SIOCSIFADDR\n", - sc->sc_dev.dv_xname)); - ifp->if_flags |= IFF_UP; - switch (ifa->ifa_addr->sa_family) { -#ifdef INET - case AF_INET: - xennet_init(sc); - arp_ifinit(ifp, ifa); - break; -#endif - default: - xennet_init(sc); - break; - } - break; - - case SIOCSIFFLAGS: - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() SIOCSIFFLAGS\n", - sc->sc_dev.dv_xname)); - break; - - case SIOCADDMULTI: - case SIOCDELMULTI: - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() SIOC*MULTI\n", - sc->sc_dev.dv_xname)); - break; - -#ifdef mediacode - case SIOCGIFMEDIA: - case SIOCSIFMEDIA: - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() SIOC*IFMEDIA\n", - sc->sc_dev.dv_xname)); - error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); - break; -#endif - - default: - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl(0x%lx) unknown cmd\n", - sc->sc_dev.dv_xname, cmd)); - error = EINVAL; - break; - } - - splx(s); - - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n", - sc->sc_dev.dv_xname, error)); - - return error; -} - -void -xennet_watchdog(struct ifnet *ifp) -{ - - panic("xennet_watchdog\n"); -} - -void -xennet_init(struct xennet_softc *sc) -{ - struct ifnet *ifp = &sc->sc_ethercom.ec_if; - - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n", sc->sc_dev.dv_xname)); - - if (ifp->if_flags & IFF_UP) { - if ((ifp->if_flags & IFF_RUNNING) == 0) - xennet_reset(sc); - - ifp->if_flags |= IFF_RUNNING; - ifp->if_flags &= ~IFF_OACTIVE; - ifp->if_timer = 0; - } else { - ifp->if_flags &= ~IFF_RUNNING; - xennet_reset(sc); - } -} - -void -xennet_reset(struct xennet_softc *sc) -{ - - DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n", sc->sc_dev.dv_xname)); -} - -#ifdef mediacode -/* - * Media change callback. - */ -static int -xennet_mediachange(struct ifnet *ifp) -{ - struct xennet_softc *sc = ifp->if_softc; - - switch IFM_SUBTYPE(sc->sc_media.ifm_media) { - case IFM_AUTO: - break; - default: - return (1); - break; - } - - return (0); -} - -/* - * Media status callback. - */ -static void -xennet_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) -{ - struct xennet_softc *sc = ifp->if_softc; - - if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_AUTO) - ifmr->ifm_active = sc->sc_media.ifm_cur->ifm_data; - - ifmr->ifm_status &= ~IFM_AVALID; -} -#endif - -int -xennet_bootstatic_callback(struct nfs_diskless *nd) -{ - struct ifnet *ifp = nd->nd_ifp; - struct xennet_softc *sc = (struct xennet_softc *)ifp->if_softc; - union xen_cmdline_parseinfo xcp; - struct sockaddr_in *sin; - - memset(&xcp, 0, sizeof(xcp.xcp_netinfo)); - xcp.xcp_netinfo.xi_ifno = sc->sc_ifno; - xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host; - xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp); - - nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]); - nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]); - nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]); - - sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr; - memset((caddr_t)sin, 0, sizeof(*sin)); - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]); - - return (NFS_BOOTSTATIC_HAS_MYIP|NFS_BOOTSTATIC_HAS_GWIP| - NFS_BOOTSTATIC_HAS_MASK|NFS_BOOTSTATIC_HAS_SERVADDR| - NFS_BOOTSTATIC_HAS_SERVER); -} - - -#ifdef XENNET_DEBUG_DUMP -#define XCHR(x) "0123456789abcdef"[(x) & 0xf] -static void -xennet_hex_dump(unsigned char *pkt, size_t len, char *type, int id) -{ - size_t i, j; - - printf("pkt %p len %d/%x type %s id %d\n", pkt, len, len, type, id); - printf("00000000 "); - for(i=0; i<len; i++) { - printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i])); - if ((i+1) % 16 == 8) - printf(" "); - if ((i+1) % 16 == 0) { - printf(" %c", '|'); - for(j=0; j<16; j++) - printf("%c", pkt[i-15+j]>=32 && - pkt[i-15+j]<127?pkt[i-15+j]:'.'); - printf("%c\n%c%c%c%c%c%c%c%c ", '|', - XCHR((i+1)>>28), XCHR((i+1)>>24), - XCHR((i+1)>>20), XCHR((i+1)>>16), - XCHR((i+1)>>12), XCHR((i+1)>>8), - XCHR((i+1)>>4), XCHR(i+1)); - } - } - printf("\n"); -} -#undef XCHR -#endif diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/xen/xbd.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xbd.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1689 +0,0 @@ -/* $NetBSD: xbd.c,v 1.9.2.1 2004/05/22 15:59:11 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xbd.c,v 1.9.2.1 2004/05/22 15:59:11 he Exp $"); - -#include "xbd.h" -#include "rnd.h" - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/errno.h> -#include <sys/buf.h> -#include <sys/malloc.h> -#include <sys/pool.h> -#include <sys/ioctl.h> -#include <sys/device.h> -#include <sys/disk.h> -#include <sys/disklabel.h> -#include <sys/fcntl.h> -#include <sys/vnode.h> -#include <sys/lock.h> -#include <sys/conf.h> -#include <sys/queue.h> -#include <sys/stat.h> -#include <sys/sysctl.h> -#include <sys/kernel.h> -#include <sys/kthread.h> - -#include <uvm/uvm.h> - -#if NRND > 0 -#include <sys/rnd.h> -#endif - -#include <dev/dkvar.h> -#include <machine/xbdvar.h> - -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/evtchn.h> -#include <machine/ctrl_if.h> - - -static void control_send(blkif_request_t *, blkif_response_t *); -static void send_interface_connect(void); - -static void xbd_attach(struct device *, struct device *, void *); -static int xbd_detach(struct device *, int); - -#if NXBD > 0 -int xbd_match(struct device *, struct cfdata *, void *); -CFATTACH_DECL(xbd, sizeof(struct xbd_softc), - xbd_match, xbd_attach, xbd_detach, NULL); - -extern struct cfdriver xbd_cd; -#endif - -#if NWD > 0 -int xbd_wd_match(struct device *, struct cfdata *, void *); -CFATTACH_DECL(wd, sizeof(struct xbd_softc), - xbd_wd_match, xbd_attach, xbd_detach, NULL); - -extern struct cfdriver wd_cd; -#endif - -#if NSD > 0 -int xbd_sd_match(struct device *, struct cfdata *, void *); -CFATTACH_DECL(sd, sizeof(struct xbd_softc), - xbd_sd_match, xbd_attach, xbd_detach, NULL); - -extern struct cfdriver sd_cd; -#endif - -#if NCD > 0 -int xbd_cd_match(struct device *, struct cfdata *, void *); -CFATTACH_DECL(cd, sizeof(struct xbd_softc), - xbd_cd_match, xbd_attach, xbd_detach, NULL); - -extern struct cfdriver cd_cd; -#endif - - -dev_type_open(xbdopen); -dev_type_close(xbdclose); -dev_type_read(xbdread); -dev_type_write(xbdwrite); -dev_type_ioctl(xbdioctl); -dev_type_ioctl(xbdioctl_cdev); -dev_type_strategy(xbdstrategy); -dev_type_dump(xbddump); -dev_type_size(xbdsize); - -#if NXBD > 0 -const struct bdevsw xbd_bdevsw = { - xbdopen, xbdclose, xbdstrategy, xbdioctl, - xbddump, xbdsize, D_DISK -}; - -const struct cdevsw xbd_cdevsw = { - xbdopen, xbdclose, xbdread, xbdwrite, xbdioctl_cdev, - nostop, notty, nopoll, nommap, nokqfilter, D_DISK -}; - -static dev_t xbd_major; -#endif - -#if NWD > 0 -const struct bdevsw wd_bdevsw = { - xbdopen, xbdclose, xbdstrategy, xbdioctl, - xbddump, xbdsize, D_DISK -}; - -const struct cdevsw wd_cdevsw = { - xbdopen, xbdclose, xbdread, xbdwrite, xbdioctl_cdev, - nostop, notty, nopoll, nommap, nokqfilter, D_DISK -}; - -static dev_t xbd_wd_major; -static dev_t xbd_wd_cdev_major; -#endif - -#if NSD > 0 -const struct bdevsw sd_bdevsw = { - xbdopen, xbdclose, xbdstrategy, xbdioctl, - xbddump, xbdsize, D_DISK -}; - -const struct cdevsw sd_cdevsw = { - xbdopen, xbdclose, xbdread, xbdwrite, xbdioctl_cdev, - nostop, notty, nopoll, nommap, nokqfilter, D_DISK -}; - -static dev_t xbd_sd_major; -static dev_t xbd_sd_cdev_major; -#endif - -#if NCD > 0 -const struct bdevsw cd_bdevsw = { - xbdopen, xbdclose, xbdstrategy, xbdioctl, - xbddump, xbdsize, D_DISK -}; - -const struct cdevsw cd_cdevsw = { - xbdopen, xbdclose, xbdread, xbdwrite, xbdioctl_cdev, - nostop, notty, nopoll, nommap, nokqfilter, D_DISK -}; - -static dev_t xbd_cd_major; -static dev_t xbd_cd_cdev_major; -#endif - - -static int xbdstart(struct dk_softc *, struct buf *); -static int xbd_response_handler(void *); -#if 0 -static void xbd_update_create_kthread(void *); -static void xbd_update_kthread(void *); -static int xbd_update_handler(void *); -#endif - -static int xbdinit(struct xbd_softc *, vdisk_t *, struct dk_intf *); - -/* Pseudo-disk Interface */ -static struct dk_intf dkintf_esdi = { - DTYPE_ESDI, - "Xen Virtual ESDI", - xbdopen, - xbdclose, - xbdstrategy, - xbdstart, -}; -#if NSD > 0 -static struct dk_intf dkintf_scsi = { - DTYPE_SCSI, - "Xen Virtual SCSI", - xbdopen, - xbdclose, - xbdstrategy, - xbdstart, -}; -#endif - -#if NXBD > 0 -static struct xbd_attach_args xbd_ata = { - .xa_device = "xbd", - .xa_dkintf = &dkintf_esdi, -}; -#endif - -#if NWD > 0 -static struct xbd_attach_args wd_ata = { - .xa_device = "wd", - .xa_dkintf = &dkintf_esdi, -}; -#endif - -#if NSD > 0 -static struct xbd_attach_args sd_ata = { - .xa_device = "sd", - .xa_dkintf = &dkintf_scsi, -}; -#endif - -#if NCD > 0 -static struct xbd_attach_args cd_ata = { - .xa_device = "cd", - .xa_dkintf = &dkintf_esdi, -}; -#endif - -static struct sysctlnode *diskcookies; - - -#if defined(XBDDEBUG) && !defined(DEBUG) -#define DEBUG -#endif - -#ifdef DEBUG -int xbddebug = 0; - -#define XBDB_FOLLOW 0x1 -#define XBDB_IO 0x2 -#define XBDB_SETUP 0x4 -#define XBDB_HOTPLUG 0x8 - -#define IFDEBUG(x,y) if (xbddebug & (x)) y -#define DPRINTF(x,y) IFDEBUG(x, printf y) -#define DPRINTF_FOLLOW(y) DPRINTF(XBDB_FOLLOW, y) -#define DEBUG_MARK_UNUSED(_xr) (_xr)->xr_sc = (void *)0xdeadbeef - -struct xbdreq *xbd_allxr; -#else -#define IFDEBUG(x,y) -#define DPRINTF(x,y) -#define DPRINTF_FOLLOW(y) -#define DEBUG_MARK_UNUSED(_xr) -#endif - -#ifdef DIAGNOSTIC -#define DIAGPANIC(x) panic x -#define DIAGCONDPANIC(x,y) if (x) panic y -#else -#define DIAGPANIC(x) -#define DIAGCONDPANIC(x,y) -#endif - - -struct xbdreq { - union { - SLIST_ENTRY(xbdreq) _unused; /* ptr. to next free xbdreq */ - SIMPLEQ_ENTRY(xbdreq) _suspended; - /* link when on suspended queue. */ - } _link; - struct xbdreq *xr_parent; /* ptr. to parent xbdreq */ - struct buf *xr_bp; /* ptr. to original I/O buf */ - daddr_t xr_bn; /* block no. to process */ - long xr_bqueue; /* bytes left to queue */ - long xr_bdone; /* bytes left */ - vaddr_t xr_data; /* ptr. to data to be proc. */ - vaddr_t xr_aligned; /* ptr. to aligned data */ - long xr_breq; /* bytes in this req. */ - struct xbd_softc *xr_sc; /* ptr. to xbd softc */ -}; -#define xr_unused _link._unused -#define xr_suspended _link._suspended - -SLIST_HEAD(,xbdreq) xbdreqs = - SLIST_HEAD_INITIALIZER(xbdreqs); -static SIMPLEQ_HEAD(, xbdreq) xbdr_suspended = - SIMPLEQ_HEAD_INITIALIZER(xbdr_suspended); - -#define CANGET_XBDREQ() (!SLIST_EMPTY(&xbdreqs)) - -#define GET_XBDREQ(_xr) do { \ - (_xr) = SLIST_FIRST(&xbdreqs); \ - if (__predict_true(_xr)) \ - SLIST_REMOVE_HEAD(&xbdreqs, xr_unused); \ -} while (/*CONSTCOND*/0) - -#define PUT_XBDREQ(_xr) do { \ - DEBUG_MARK_UNUSED(_xr); \ - SLIST_INSERT_HEAD(&xbdreqs, _xr, xr_unused); \ -} while (/*CONSTCOND*/0) - -static struct bufq_state bufq; -static int bufq_users = 0; - -#define XEN_MAJOR(_dev) ((_dev) >> 8) -#define XEN_MINOR(_dev) ((_dev) & 0xff) - -#define XEN_SCSI_DISK0_MAJOR 8 -#define XEN_SCSI_DISK1_MAJOR 65 -#define XEN_SCSI_DISK2_MAJOR 66 -#define XEN_SCSI_DISK3_MAJOR 67 -#define XEN_SCSI_DISK4_MAJOR 68 -#define XEN_SCSI_DISK5_MAJOR 69 -#define XEN_SCSI_DISK6_MAJOR 70 -#define XEN_SCSI_DISK7_MAJOR 71 -#define XEN_SCSI_DISK8_MAJOR 128 -#define XEN_SCSI_DISK9_MAJOR 129 -#define XEN_SCSI_DISK10_MAJOR 130 -#define XEN_SCSI_DISK11_MAJOR 131 -#define XEN_SCSI_DISK12_MAJOR 132 -#define XEN_SCSI_DISK13_MAJOR 133 -#define XEN_SCSI_DISK14_MAJOR 134 -#define XEN_SCSI_DISK15_MAJOR 135 -#define XEN_SCSI_CDROM_MAJOR 11 - -#define XEN_IDE0_MAJOR 3 -#define XEN_IDE1_MAJOR 22 -#define XEN_IDE2_MAJOR 33 -#define XEN_IDE3_MAJOR 34 -#define XEN_IDE4_MAJOR 56 -#define XEN_IDE5_MAJOR 57 -#define XEN_IDE6_MAJOR 88 -#define XEN_IDE7_MAJOR 89 -#define XEN_IDE8_MAJOR 90 -#define XEN_IDE9_MAJOR 91 - -#define XEN_BSHIFT 9 /* log2(XEN_BSIZE) */ -#define XEN_BSIZE (1 << XEN_BSHIFT) - -#define MAX_VBDS 64 -static int nr_vbds; -static vdisk_t *vbd_info; - -static blkif_ring_t *blk_ring = NULL; -static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */ -static BLKIF_RING_IDX req_prod; /* Private request producer. */ -static BLKIF_RING_IDX last_req_prod; /* Request producer at last trap. */ - -#define STATE_CLOSED 0 -#define STATE_DISCONNECTED 1 -#define STATE_CONNECTED 2 -static unsigned int state = STATE_CLOSED; -static unsigned int blkif_evtchn = 0; -static unsigned int blkif_irq = 0; -static unsigned int blkif_handle = 0; - -static int blkif_control_rsp_valid = 0; -static blkif_response_t blkif_control_rsp; - -/** Network interface info. */ -struct xbd_ctrl { - - cfprint_t xc_cfprint; - struct device *xc_parent; -}; - -static struct xbd_ctrl blkctrl; - -#define XBDUNIT(x) DISKUNIT(x) -#define GETXBD_SOFTC(_xs, x) if (!((_xs) = getxbd_softc(x))) return ENXIO -#define GETXBD_SOFTC_CDEV(_xs, x) do { \ - dev_t bx = devsw_chr2blk((x)); \ - if (bx == NODEV) \ - return ENXIO; \ - if (!((_xs) = getxbd_softc(bx))) \ - return ENXIO; \ -} while (/*CONSTCOND*/0) - -static struct xbd_softc * -getxbd_softc(dev_t dev) -{ - int unit = XBDUNIT(dev); - - DPRINTF_FOLLOW(("getxbd_softc(0x%x): major = %d unit = %d\n", dev, - major(dev), unit)); -#if NXBD > 0 - if (major(dev) == xbd_major) - return device_lookup(&xbd_cd, unit); -#endif -#if NWD > 0 - if (major(dev) == xbd_wd_major || major(dev) == xbd_wd_cdev_major) - return device_lookup(&wd_cd, unit); -#endif -#if NSD > 0 - if (major(dev) == xbd_sd_major || major(dev) == xbd_sd_cdev_major) - return device_lookup(&sd_cd, unit); -#endif -#if NCD > 0 - if (major(dev) == xbd_cd_major || major(dev) == xbd_cd_cdev_major) - return device_lookup(&cd_cd, unit); -#endif - return NULL; -} - -static int -get_vbd_info(vdisk_t *disk_info) -{ - vdisk_t *buf; - int nr; - blkif_request_t req; - blkif_response_t rsp; - paddr_t pa; - - buf = (vdisk_t *)uvm_km_kmemalloc1(kmem_map, NULL, - PAGE_SIZE, PAGE_SIZE, UVM_UNKNOWN_OFFSET, 0); - pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa); - /* Probe for disk information. */ - memset(&req, 0, sizeof(req)); - req.operation = BLKIF_OP_PROBE; - req.nr_segments = 1; - req.frame_and_sects[0] = xpmap_ptom_masked(pa) | 7; - - control_send(&req, &rsp); - nr = rsp.status > MAX_VBDS ? MAX_VBDS : rsp.status; - - if (rsp.status < 0) - printf("WARNING: Could not probe disks (%d)\n", rsp.status); - - memcpy(disk_info, buf, nr * sizeof(vdisk_t)); - - uvm_km_free(kmem_map, (vaddr_t)buf, PAGE_SIZE); - - return nr; -} - -static struct xbd_attach_args * -get_xbda(vdisk_t *xd) -{ - - switch (XEN_MAJOR(xd->device)) { -#if NSD > 0 - case XEN_SCSI_DISK0_MAJOR: - case XEN_SCSI_DISK1_MAJOR ... XEN_SCSI_DISK7_MAJOR: - case XEN_SCSI_DISK8_MAJOR ... XEN_SCSI_DISK15_MAJOR: - if (xd->capacity == 0) - return NULL; - return &sd_ata; - case XEN_SCSI_CDROM_MAJOR: - return &cd_ata; -#endif -#if NWD > 0 - case XEN_IDE0_MAJOR: - case XEN_IDE1_MAJOR: - case XEN_IDE2_MAJOR: - case XEN_IDE3_MAJOR: - case XEN_IDE4_MAJOR: - case XEN_IDE5_MAJOR: - case XEN_IDE6_MAJOR: - case XEN_IDE7_MAJOR: - case XEN_IDE8_MAJOR: - case XEN_IDE9_MAJOR: - if (xd->info & VDISK_CDROM) - return &cd_ata; - if (xd->capacity == 0) - return NULL; - return &wd_ata; -#endif - default: - if (xd->capacity == 0) - return NULL; - return &xbd_ata; - } - return NULL; -} - -static void -free_interface(void) -{ - - /* Prevent new requests being issued until we fix things up. */ - // simple_lock(&blkif_io_lock); - // recovery = 1; - state = STATE_DISCONNECTED; - // simple_unlock(&blkif_io_lock); - - /* Free resources associated with old device channel. */ - if (blk_ring) { - uvm_km_free(kmem_map, (vaddr_t)blk_ring, PAGE_SIZE); - blk_ring = NULL; - } - - if (blkif_irq) { -#if 0 - free_irq(blkif_irq, NULL); -#endif - blkif_irq = 0; - } - - if (blkif_evtchn) { -#if 0 - unbind_evtchn_from_irq(blkif_evtchn); -#endif - blkif_evtchn = 0; - } -} - -static void -close_interface(void){ -} - -static void -disconnect_interface(void) -{ - - if (blk_ring == NULL) - blk_ring = (blkif_ring_t *)uvm_km_kmemalloc1(kmem_map, NULL, - PAGE_SIZE, PAGE_SIZE, UVM_UNKNOWN_OFFSET, 0); - memset(blk_ring, 0, PAGE_SIZE); - blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = - last_req_prod = 0; - state = STATE_DISCONNECTED; - send_interface_connect(); -} - -static void -reset_interface(void) -{ - - printf("Recovering virtual block device driver\n"); - free_interface(); - disconnect_interface(); -} - -static void -connect_interface(blkif_fe_interface_status_t *status) -{ - // unsigned long flags; - struct xbd_attach_args *xbda; - vdisk_t *xd; - int i; - - blkif_evtchn = status->evtchn; - blkif_irq = bind_evtchn_to_irq(blkif_evtchn); - - event_set_handler(blkif_irq, &xbd_response_handler, NULL, IPL_BIO); - hypervisor_enable_irq(blkif_irq); - - /* Transition to connected in case we need to do - * a partition probe on a whole disk. */ - state = STATE_CONNECTED; - - /* Probe for discs attached to the interface. */ - // xlvbd_init(); - MALLOC(vbd_info, vdisk_t *, MAX_VBDS * sizeof(vdisk_t), - M_DEVBUF, M_WAITOK); - memset(vbd_info, 0, MAX_VBDS * sizeof(vdisk_t)); - nr_vbds = get_vbd_info(vbd_info); - if (nr_vbds <= 0) - goto out; - - for (i = 0; i < nr_vbds; i++) { - xd = &vbd_info[i]; - xbda = get_xbda(xd); - if (xbda) { - xbda->xa_xd = xd; - config_found(blkctrl.xc_parent, xbda, - blkctrl.xc_cfprint); - } - } - -#if 0 - /* Kick pending requests. */ - save_and_cli(flags); - // simple_lock(&blkif_io_lock); - kick_pending_request_queues(); - // simple_unlock(&blkif_io_lock); - restore_flags(flags); -#endif - return; - - out: - FREE(vbd_info, M_DEVBUF); - vbd_info = NULL; - return; -} - -static void -unexpected(blkif_fe_interface_status_t *status) -{ - - printf("Unexpected blkif status %d in state %d\n", - status->status, state); -} - -#if 0 -static struct device * -find_device(vdisk_t *xd) -{ - struct device *dv; - struct xbd_softc *xs = NULL; - - for (dv = alldevs.tqh_first; dv != NULL; dv = dv->dv_list.tqe_next) { - if (dv->dv_cfattach == NULL || - dv->dv_cfattach->ca_attach != xbd_attach) - continue; - xs = (struct xbd_softc *)dv; - if (xd == NULL || xs->sc_xd_device == xd->device) - break; - } - return dv; -} -#endif - -static void -blkif_status(blkif_fe_interface_status_t *status) -{ - - if (status->handle != blkif_handle) { - printf("Invalid blkif: handle=%u", status->handle); - return; - } - - switch (status->status) { - case BLKIF_INTERFACE_STATUS_CLOSED: - switch (state) { - case STATE_CLOSED: - unexpected(status); - break; - case STATE_DISCONNECTED: - case STATE_CONNECTED: - unexpected(status); - close_interface(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_DISCONNECTED: - switch (state) { - case STATE_CLOSED: - disconnect_interface(); - break; - case STATE_DISCONNECTED: - case STATE_CONNECTED: - unexpected(status); - reset_interface(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CONNECTED: - switch (state) { - case STATE_CLOSED: - unexpected(status); - disconnect_interface(); - connect_interface(status); - break; - case STATE_DISCONNECTED: - connect_interface(status); - break; - case STATE_CONNECTED: - unexpected(status); - connect_interface(status); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CHANGED: - switch (state) { - case STATE_CLOSED: - case STATE_DISCONNECTED: - unexpected(status); - break; - case STATE_CONNECTED: -#if 0 - vbd_update(); -#endif - break; - } - break; - - default: - printf(" Invalid blkif status: %d\n", status->status); - break; - } -} - - -static void -xbd_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - switch (msg->subtype) { - case CMSG_BLKIF_FE_INTERFACE_STATUS: - if (msg->length != sizeof(blkif_fe_interface_status_t)) - goto parse_error; - blkif_status((blkif_fe_interface_status_t *) - &msg->msg[0]); - break; - default: - goto parse_error; - } - - ctrl_if_send_response(msg); - return; - - parse_error: - msg->length = 0; - ctrl_if_send_response(msg); -} - -#if 0 -static void -enable_update_events(struct device *self) -{ - - kthread_create(xbd_update_create_kthread, self); - event_set_handler(_EVENT_VBD_UPD, &xbd_update_handler, self, IPL_BIO); - hypervisor_enable_event(_EVENT_VBD_UPD); -} -#endif - -static void -signal_requests_to_xen(void) -{ - - DPRINTF(XBDB_IO, ("signal_requests_to_xen: %x -> %x\n", - blk_ring->req_prod, req_prod)); - blk_ring->req_prod = req_prod; - last_req_prod = req_prod; - - hypervisor_notify_via_evtchn(blkif_evtchn); - return; -} - -static void -control_send(blkif_request_t *req, blkif_response_t *rsp) -{ - unsigned long flags; - struct xbdreq *xr; - - retry: - while ((req_prod - resp_cons) == BLKIF_RING_SIZE) { - tsleep((caddr_t) &req_prod, PUSER | PCATCH, - "blkfront", 0); - } - - save_and_cli(flags); - // simple_lock(&blkif_io_lock); - if ((req_prod - resp_cons) == BLKIF_RING_SIZE) { - // simple_unlock(&blkif_io_lock); - restore_flags(flags); - goto retry; - } - - blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req; - - GET_XBDREQ(xr); - blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = (unsigned long)xr; - // rec_ring[id].id = (unsigned long) req; - - // translate_req_to_pfn( &rec_ring[id], req ); - - req_prod++; - signal_requests_to_xen(); - - // simple_unlock(&blkif_io_lock); - restore_flags(flags); - - while (!blkif_control_rsp_valid) { - /* XXXcl: sleep/wakeup not ready yet - busy wait for now. - * interrupts are still of, so we pick up the control - * channel response on return from HYPERVISOR_yield(). - */ -#if 0 - tsleep((caddr_t)&blkif_control_rsp_valid, PUSER | PCATCH, - "blkfront", 0); -#else - HYPERVISOR_yield(); -#endif - } - - memcpy(rsp, &blkif_control_rsp, sizeof(*rsp)); - blkif_control_rsp_valid = 0; -} - -/* Send a driver status notification to the domain controller. */ -static void -send_driver_status(int ok) -{ - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_DRIVER_STATUS, - .length = sizeof(blkif_fe_driver_status_t), - }; - blkif_fe_driver_status_t *msg = (blkif_fe_driver_status_t *)cmsg.msg; - - msg->status = ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN; - - ctrl_if_send_message_block(&cmsg, NULL, 0, 0); -} - -/* Tell the controller to bring up the interface. */ -static void -send_interface_connect(void) -{ - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT, - .length = sizeof(blkif_fe_interface_connect_t), - }; - blkif_fe_interface_connect_t *msg = - (blkif_fe_interface_connect_t *)cmsg.msg; - paddr_t pa; - - pmap_extract(pmap_kernel(), (vaddr_t)blk_ring, &pa); - - msg->handle = 0; - msg->shmem_frame = xpmap_ptom_masked(pa) >> PAGE_SHIFT; - - ctrl_if_send_message_block(&cmsg, NULL, 0, 0); -} - -static void -setup_sysctl(void) -{ - struct sysctlnode *pnode; - - sysctl_createv(NULL, 0, NULL, NULL, - 0, - CTLTYPE_NODE, "machdep", NULL, - NULL, 0, NULL, 0, - CTL_MACHDEP, CTL_EOL); - - sysctl_createv(NULL, 0, NULL, &pnode, - 0, - CTLTYPE_NODE, "domain0", NULL, - NULL, 0, NULL, 0, - CTL_MACHDEP, CTL_CREATE, CTL_EOL); - - if (pnode == NULL) - return; - - sysctl_createv(NULL, 0, &pnode, &pnode, - 0, - CTLTYPE_NODE, "diskcookie", NULL, - NULL, 0, NULL, 0, - CTL_CREATE, CTL_EOL); - - if (pnode) - diskcookies = pnode; -} - -static int -xbd_wait_for_interfaces(void) -{ - - while (state != STATE_CONNECTED) - HYPERVISOR_yield(); - return 0; -} - -int -xbd_scan(struct device *self, struct xbd_attach_args *mainbus_xbda, - cfprint_t print) -{ - struct xbdreq *xr; - int i; - - blkctrl.xc_parent = self; - blkctrl.xc_cfprint = print; - - if (xen_start_info.flags & SIF_PRIVILEGED) - setup_sysctl(); - -#if NXBD > 0 - xbd_major = devsw_name2blk("xbd", NULL, 0); -#endif -#if NWD > 0 - xbd_wd_major = devsw_name2blk("wd", NULL, 0); - /* XXX Also handle the cdev majors since stuff like - * read_sector calls strategy on the cdev. This only works if - * all the majors we care about are different. - */ - xbd_wd_cdev_major = major(devsw_blk2chr(makedev(xbd_wd_major, 0))); -#endif -#if NSD > 0 - xbd_sd_major = devsw_name2blk("sd", NULL, 0); - xbd_sd_cdev_major = major(devsw_blk2chr(makedev(xbd_sd_major, 0))); -#endif -#if NCD > 0 - xbd_cd_major = devsw_name2blk("cd", NULL, 0); - xbd_cd_cdev_major = major(devsw_blk2chr(makedev(xbd_cd_major, 0))); -#endif - - MALLOC(xr, struct xbdreq *, BLKIF_RING_SIZE * sizeof(struct xbdreq), - M_DEVBUF, M_WAITOK | M_ZERO); -#ifdef DEBUG - xbd_allxr = xr; -#endif - for (i = 0; i < BLKIF_RING_SIZE - 1; i++) - PUT_XBDREQ(&xr[i]); - - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, xbd_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - send_driver_status(1); - - return 0; -} - -void -xbd_scan_finish(struct device *parent) -{ - int err; - - err = xbd_wait_for_interfaces(); - if (err) - ctrl_if_unregister_receiver(CMSG_NETIF_FE, xbd_ctrlif_rx); -} - -#if NXBD > 0 -int -xbd_match(struct device *parent, struct cfdata *match, void *aux) -{ - struct xbd_attach_args *xa = (struct xbd_attach_args *)aux; - - if (strcmp(xa->xa_device, "xbd") == 0) - return 1; - return 0; -} -#endif - -#if NWD > 0 -int -xbd_wd_match(struct device *parent, struct cfdata *match, void *aux) -{ - struct xbd_attach_args *xa = (struct xbd_attach_args *)aux; - - if (strcmp(xa->xa_device, "wd") == 0) - return 1; - return 0; -} -#endif - -#if NSD > 0 -int -xbd_sd_match(struct device *parent, struct cfdata *match, void *aux) -{ - struct xbd_attach_args *xa = (struct xbd_attach_args *)aux; - - if (strcmp(xa->xa_device, "sd") == 0) - return 1; - return 0; -} -#endif - -#if NCD > 0 -int -xbd_cd_match(struct device *parent, struct cfdata *match, void *aux) -{ - struct xbd_attach_args *xa = (struct xbd_attach_args *)aux; - - if (strcmp(xa->xa_device, "cd") == 0) - return 1; - return 0; -} -#endif - -static void -xbd_attach(struct device *parent, struct device *self, void *aux) -{ - struct xbd_attach_args *xbda = (struct xbd_attach_args *)aux; - struct xbd_softc *xs = (struct xbd_softc *)self; - - aprint_normal(": Xen Virtual Block Device"); - - simple_lock_init(&xs->sc_slock); - dk_sc_init(&xs->sc_dksc, xs, xs->sc_dev.dv_xname); - xbdinit(xs, xbda->xa_xd, xbda->xa_dkintf); - if (diskcookies) { - /* XXX beware that xs->sc_xd_device is a long */ - sysctl_createv(NULL, 0, &diskcookies, NULL, - 0, - CTLTYPE_INT, xs->sc_dev.dv_xname, NULL, - NULL, 0, &xs->sc_xd_device, 0, - CTL_CREATE, CTL_EOL); - } - -#if NRND > 0 - rnd_attach_source(&xs->sc_rnd_source, xs->sc_dev.dv_xname, - RND_TYPE_DISK, 0); -#endif -} - -static int -xbd_detach(struct device *dv, int flags) -{ - struct xbd_softc *xs = (struct xbd_softc *)dv; - - /* - * Mark disk about to be removed (between now and when the xs - * will be freed). - */ - xs->sc_shutdown = 1; - - /* And give it some time to settle if it's busy. */ - if (xs->sc_dksc.sc_dkdev.dk_busy > 0) - tsleep(&xs, PWAIT, "xbdetach", hz); - - /* Detach the disk. */ - disk_detach(&xs->sc_dksc.sc_dkdev); - - /* XXX decrement bufq_users and free? */ - - /* XXX no need to remove sysctl nodes since they only exist - * in domain0 and domain0's devices are never removed. - */ - - return 0; -} - -int -xbdopen(dev_t dev, int flags, int fmt, struct proc *p) -{ - struct xbd_softc *xs; - - DPRINTF_FOLLOW(("xbdopen(0x%04x, %d)\n", dev, flags)); - switch (fmt) { - case S_IFCHR: - GETXBD_SOFTC_CDEV(xs, dev); - break; - case S_IFBLK: - GETXBD_SOFTC(xs, dev); - break; - default: - return ENXIO; - } - return dk_open(xs->sc_di, &xs->sc_dksc, dev, flags, fmt, p); -} - -int -xbdclose(dev_t dev, int flags, int fmt, struct proc *p) -{ - struct xbd_softc *xs; - - DPRINTF_FOLLOW(("xbdclose(%d, %d)\n", dev, flags)); - switch (fmt) { - case S_IFCHR: - GETXBD_SOFTC_CDEV(xs, dev); - break; - case S_IFBLK: - GETXBD_SOFTC(xs, dev); - break; - default: - return ENXIO; - } - return dk_close(xs->sc_di, &xs->sc_dksc, dev, flags, fmt, p); -} - -void -xbdstrategy(struct buf *bp) -{ - struct xbd_softc *xs = getxbd_softc(bp->b_dev); - - DPRINTF_FOLLOW(("xbdstrategy(%p): b_bcount = %ld\n", bp, - (long)bp->b_bcount)); - - if (xs == NULL || xs->sc_shutdown) { - bp->b_flags |= B_ERROR; - bp->b_error = EIO; - biodone(bp); - return; - } - - dk_strategy(xs->sc_di, &xs->sc_dksc, bp); - return; -} - -int -xbdsize(dev_t dev) -{ - struct xbd_softc *xs = getxbd_softc(dev); - - DPRINTF_FOLLOW(("xbdsize(%d)\n", dev)); - if (xs == NULL || xs->sc_shutdown) - return -1; - return dk_size(xs->sc_di, &xs->sc_dksc, dev); -} - -static void -map_align(struct xbdreq *xr) -{ - int s; - - s = splvm(); - xr->xr_aligned = uvm_km_kmemalloc1(kmem_map, NULL, - xr->xr_bqueue, XEN_BSIZE, UVM_UNKNOWN_OFFSET, - 0/* UVM_KMF_NOWAIT */); - splx(s); - DPRINTF(XBDB_IO, ("map_align(%p): bp %p addr %p align 0x%08lx " - "size 0x%04lx\n", xr, xr->xr_bp, xr->xr_bp->b_data, - xr->xr_aligned, xr->xr_bqueue)); - xr->xr_data = xr->xr_aligned; - if ((xr->xr_bp->b_flags & B_READ) == 0) - memcpy((void *)xr->xr_aligned, xr->xr_bp->b_data, - xr->xr_bqueue); -} - -static void -unmap_align(struct xbdreq *xr) -{ - int s; - - if (xr->xr_bp->b_flags & B_READ) - memcpy(xr->xr_bp->b_data, (void *)xr->xr_aligned, - xr->xr_bp->b_bcount); - DPRINTF(XBDB_IO, ("unmap_align(%p): bp %p addr %p align 0x%08lx " - "size 0x%04lx\n", xr, xr->xr_bp, xr->xr_bp->b_data, - xr->xr_aligned, xr->xr_bp->b_bcount)); - s = splvm(); - uvm_km_free(kmem_map, xr->xr_aligned, xr->xr_bp->b_bcount); - splx(s); - xr->xr_aligned = (vaddr_t)0; -} - -static void -fill_ring(struct xbdreq *xr) -{ - struct xbdreq *pxr = xr->xr_parent; - paddr_t pa; - unsigned long ma; - vaddr_t addr, off; - blkif_request_t *ring_req; - int breq, nr_sectors, fsect, lsect; - - /* Fill out a communications ring structure. */ - ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req; - ring_req->id = (unsigned long)xr; - ring_req->operation = pxr->xr_bp->b_flags & B_READ ? BLKIF_OP_READ : - BLKIF_OP_WRITE; - ring_req->sector_number = pxr->xr_bn; - ring_req->device = pxr->xr_sc->sc_xd_device; - - DPRINTF(XBDB_IO, ("fill_ring(%d): bp %p sector %llu pxr %p xr %p\n", - MASK_BLKIF_IDX(req_prod), pxr->xr_bp, - (unsigned long long)pxr->xr_bn, - pxr, xr)); - - xr->xr_breq = 0; - ring_req->nr_segments = 0; - addr = trunc_page(pxr->xr_data); - off = pxr->xr_data - addr; - while (pxr->xr_bqueue > 0) { -#if 0 - pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map), - addr, &pa); -#else - pmap_extract(pmap_kernel(), addr, &pa); -#endif - ma = xpmap_ptom_masked(pa); - DIAGCONDPANIC((ma & (XEN_BSIZE - 1)) != 0, - ("xbd request ma not sector aligned")); - - if (pxr->xr_bqueue > PAGE_SIZE - off) - breq = PAGE_SIZE - off; - else - breq = pxr->xr_bqueue; - - nr_sectors = breq >> XEN_BSHIFT; - DIAGCONDPANIC(nr_sectors >= XEN_BSIZE, - ("xbd request nr_sectors >= XEN_BSIZE")); - - fsect = off >> XEN_BSHIFT; - lsect = fsect + nr_sectors - 1; - DIAGCONDPANIC(fsect > 7, ("xbd request fsect > 7")); - DIAGCONDPANIC(lsect > 7, ("xbd request lsect > 7")); - - DPRINTF(XBDB_IO, ("fill_ring(%d): va 0x%08lx pa 0x%08lx " - "ma 0x%08lx, sectors %d, left %ld/%ld\n", - MASK_BLKIF_IDX(req_prod), addr, pa, ma, nr_sectors, - pxr->xr_bqueue >> XEN_BSHIFT, pxr->xr_bqueue)); - - ring_req->frame_and_sects[ring_req->nr_segments++] = - ma | (fsect<<3) | lsect; - addr += PAGE_SIZE; - pxr->xr_bqueue -= breq; - pxr->xr_bn += nr_sectors; - xr->xr_breq += breq; - off = 0; - if (ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST) - break; - } - pxr->xr_data = addr; - - req_prod++; -} - -static void -xbdresume(void) -{ - struct xbdreq *pxr, *xr; - struct xbd_softc *xs; - struct buf *bp; - - while ((pxr = SIMPLEQ_FIRST(&xbdr_suspended)) != NULL) { - DPRINTF(XBDB_IO, ("xbdstart: resuming xbdreq %p for bp %p\n", - pxr, pxr->xr_bp)); - bp = pxr->xr_bp; - xs = getxbd_softc(bp->b_dev); - if (xs == NULL || xs->sc_shutdown) { - bp->b_flags |= B_ERROR; - bp->b_error = EIO; - } - if (bp->b_flags & B_ERROR) { - pxr->xr_bdone -= pxr->xr_bqueue; - pxr->xr_bqueue = 0; - if (pxr->xr_bdone == 0) { - bp->b_resid = bp->b_bcount; - if (pxr->xr_aligned) - unmap_align(pxr); - PUT_XBDREQ(pxr); - if (xs) { - disk_unbusy(&xs->sc_dksc.sc_dkdev, - (bp->b_bcount - bp->b_resid), - (bp->b_flags & B_READ)); -#if NRND > 0 - rnd_add_uint32(&xs->sc_rnd_source, - bp->b_blkno); -#endif - } - biodone(bp); - } - continue; - } - while (__predict_true(pxr->xr_bqueue > 0)) { - GET_XBDREQ(xr); - if (__predict_false(xr == NULL)) - goto out; - xr->xr_parent = pxr; - fill_ring(xr); - } - DPRINTF(XBDB_IO, ("xbdstart: resumed xbdreq %p for bp %p\n", - pxr, bp)); - SIMPLEQ_REMOVE_HEAD(&xbdr_suspended, xr_suspended); - } - - out: - return; -} - -static int -xbdstart(struct dk_softc *dksc, struct buf *bp) -{ - struct xbd_softc *xs; - struct xbdreq *pxr, *xr; - struct partition *pp; - daddr_t bn; - int ret, runqueue; - - DPRINTF_FOLLOW(("xbdstart(%p, %p)\n", dksc, bp)); - - runqueue = 1; - ret = -1; - - xs = getxbd_softc(bp->b_dev); - if (xs == NULL || xs->sc_shutdown) { - bp->b_flags |= B_ERROR; - bp->b_error = EIO; - biodone(bp); - return 0; - } - dksc = &xs->sc_dksc; - - /* XXXrcd: - * Translate partition relative blocks to absolute blocks, - * this probably belongs (somehow) in dksubr.c, since it - * is independant of the underlying code... This will require - * that the interface be expanded slightly, though. - */ - bn = bp->b_blkno; - if (DISKPART(bp->b_dev) != RAW_PART) { - pp = &xs->sc_dksc.sc_dkdev.dk_label-> - d_partitions[DISKPART(bp->b_dev)]; - bn += pp->p_offset; - } - - DPRINTF(XBDB_IO, ("xbdstart: addr %p, sector %llu, " - "count %ld [%s]\n", bp->b_data, (unsigned long long)bn, - bp->b_bcount, bp->b_flags & B_READ ? "read" : "write")); - - GET_XBDREQ(pxr); - if (__predict_false(pxr == NULL)) - goto out; - - disk_busy(&dksc->sc_dkdev); /* XXX: put in dksubr.c */ - /* - * We have a request slot, return 0 to make dk_start remove - * the bp from the work queue. - */ - ret = 0; - - pxr->xr_bp = bp; - pxr->xr_parent = pxr; - pxr->xr_bn = bn; - pxr->xr_bqueue = bp->b_bcount; - pxr->xr_bdone = bp->b_bcount; - pxr->xr_data = (vaddr_t)bp->b_data; - pxr->xr_sc = xs; - - if (pxr->xr_data & (XEN_BSIZE - 1)) - map_align(pxr); - - fill_ring(pxr); - - while (__predict_false(pxr->xr_bqueue > 0)) { - GET_XBDREQ(xr); - if (__predict_false(xr == NULL)) - break; - xr->xr_parent = pxr; - fill_ring(xr); - } - - if (__predict_false(pxr->xr_bqueue > 0)) { - SIMPLEQ_INSERT_TAIL(&xbdr_suspended, pxr, - xr_suspended); - DPRINTF(XBDB_IO, ("xbdstart: suspended xbdreq %p " - "for bp %p\n", pxr, bp)); - } else if (CANGET_XBDREQ() && BUFQ_PEEK(&bufq) != NULL) { - /* - * We have enough resources to start another bp and - * there are additional bps on the queue, dk_start - * will call us again and we'll run the queue then. - */ - runqueue = 0; - } - - out: - if (runqueue && last_req_prod != req_prod) - signal_requests_to_xen(); - - return ret; -} - -static int -xbd_response_handler(void *arg) -{ - struct buf *bp; - struct xbd_softc *xs; - blkif_response_t *ring_resp; - struct xbdreq *pxr, *xr; - BLKIF_RING_IDX i, rp; - - rp = blk_ring->resp_prod; - __insn_barrier(); /* Ensure we see queued responses up to 'rp'. */ - - for (i = resp_cons; i != rp; i++) { - ring_resp = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp; - xr = (struct xbdreq *)ring_resp->id; - - switch (ring_resp->operation) { - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - pxr = xr->xr_parent; - - DPRINTF(XBDB_IO, ("xbd_response_handler(%d): pxr %p " - "xr %p bdone %04lx breq %04lx\n", i, pxr, - xr, pxr->xr_bdone, xr->xr_breq)); - pxr->xr_bdone -= xr->xr_breq; - DIAGCONDPANIC(pxr->xr_bdone < 0, - ("xbd_response_handler: pxr->xr_bdone < 0")); - - if (__predict_false(ring_resp->status)) { - pxr->xr_bp->b_flags |= B_ERROR; - pxr->xr_bp->b_error = EIO; - } - - if (xr != pxr) { - PUT_XBDREQ(xr); - if (!SIMPLEQ_EMPTY(&xbdr_suspended)) - xbdresume(); - } - - if (pxr->xr_bdone == 0) { - bp = pxr->xr_bp; - xs = getxbd_softc(bp->b_dev); - if (xs == NULL) { /* don't fail bp if we're shutdown */ - bp->b_flags |= B_ERROR; - bp->b_error = EIO; - } - DPRINTF(XBDB_IO, ("xbd_response_handler(%d): " - "completed bp %p\n", i, bp)); - if (bp->b_flags & B_ERROR) - bp->b_resid = bp->b_bcount; - else - bp->b_resid = 0; - - if (pxr->xr_aligned) - unmap_align(pxr); - - PUT_XBDREQ(pxr); - if (xs) { - disk_unbusy(&xs->sc_dksc.sc_dkdev, - (bp->b_bcount - bp->b_resid), - (bp->b_flags & B_READ)); -#if NRND > 0 - rnd_add_uint32(&xs->sc_rnd_source, - bp->b_blkno); -#endif - } - biodone(bp); - if (!SIMPLEQ_EMPTY(&xbdr_suspended)) - xbdresume(); - /* XXX possible lockup if this was the only - * active device and requests were held back in - * the queue. - */ - if (xs) - dk_iodone(xs->sc_di, &xs->sc_dksc); - } - break; - case BLKIF_OP_PROBE: - memcpy(&blkif_control_rsp, ring_resp, - sizeof(*ring_resp)); - blkif_control_rsp_valid = 1; - wakeup((caddr_t)&blkif_control_rsp_valid); - break; - default: - panic("unknown response"); - } - } - resp_cons = i; - /* check if xbdresume queued any requests */ - if (last_req_prod != req_prod) - signal_requests_to_xen(); - return 0; -} - -#if 0 -static void -xbd_update_create_kthread(void *arg) -{ - - kthread_create1(xbd_update_kthread, arg, NULL, "xbdupdate"); -} - -static void -xbd_update_kthread(void *arg) -{ - struct device *parent = arg; - struct xbd_attach_args *xbda; - struct device *dev; - vdisk_t *xd; - vdisk_t *vbd_info_update, *vbd_info_old; - int i, j, new_nr_vbds; - extern int hypervisor_print(void *, const char *); - - MALLOC(vbd_info_update, vdisk_t *, MAX_VBDS * - sizeof(vdisk_t), M_DEVBUF, M_WAITOK); - - for (;;) { - memset(vbd_info_update, 0, MAX_VBDS * sizeof(vdisk_t)); - new_nr_vbds = get_vbd_info(vbd_info_update); - - if (memcmp(vbd_info, vbd_info_update, MAX_VBDS * - sizeof(vdisk_t)) == 0) { - FREE(vbd_info_update, M_DEVBUF); - tsleep(parent, PWAIT, "xbdupd", 0); - MALLOC(vbd_info_update, vdisk_t *, MAX_VBDS * - sizeof(vdisk_t), M_DEVBUF, M_WAITOK); - continue; - } - - j = 0; - for (i = 0; i < new_nr_vbds; i++) { - while (j < nr_vbds && - vbd_info[j].device < vbd_info_update[i].device) { - DPRINTF(XBDB_HOTPLUG, - ("delete device %x size %lx\n", - vbd_info[j].device, - vbd_info[j].capacity)); - xd = &vbd_info[j]; - dev = find_device(xd); - if (dev) - config_detach(dev, DETACH_FORCE); - j++; - } - if (j < nr_vbds && - vbd_info[j].device == vbd_info_update[i].device) { - DPRINTF(XBDB_HOTPLUG, - ("update device %x size %lx size %lx\n", - vbd_info_update[i].device, - vbd_info[j].capacity, - vbd_info_update[i].capacity)); - j++; - } else { - DPRINTF(XBDB_HOTPLUG, - ("add device %x size %lx\n", - vbd_info_update[i].device, - vbd_info_update[i].capacity)); - xd = &vbd_info_update[i]; - xbda = get_xbda(xd); - if (xbda) { - xbda->xa_xd = xd; - config_found(parent, xbda, hypervisor_print); - } - } - } - - while (j < nr_vbds) { - DPRINTF(XBDB_HOTPLUG, ("delete device %x\n", - vbd_info[j].device)); - xd = &vbd_info[j]; - dev = find_device(xd); - if (dev) - config_detach(dev, DETACH_FORCE); - j++; - } - - nr_vbds = new_nr_vbds; - - vbd_info_old = vbd_info; - vbd_info = vbd_info_update; - vbd_info_update = vbd_info_old; - } -} - -static int -xbd_update_handler(void *arg) -{ - - wakeup(arg); - - return 0; -} -#endif - -/* XXX: we should probably put these into dksubr.c, mostly */ -int -xbdread(dev_t dev, struct uio *uio, int flags) -{ - struct xbd_softc *xs; - struct dk_softc *dksc; - - DPRINTF_FOLLOW(("xbdread(%d, %p, %d)\n", dev, uio, flags)); - GETXBD_SOFTC_CDEV(xs, dev); - dksc = &xs->sc_dksc; - if ((dksc->sc_flags & DKF_INITED) == 0) - return ENXIO; - /* XXX see the comments about minphys in ccd.c */ - return physio(xbdstrategy, NULL, dev, B_READ, minphys, uio); -} - -/* XXX: we should probably put these into dksubr.c, mostly */ -int -xbdwrite(dev_t dev, struct uio *uio, int flags) -{ - struct xbd_softc *xs; - struct dk_softc *dksc; - - DPRINTF_FOLLOW(("xbdwrite(%d, %p, %d)\n", dev, uio, flags)); - GETXBD_SOFTC_CDEV(xs, dev); - dksc = &xs->sc_dksc; - if ((dksc->sc_flags & DKF_INITED) == 0) - return ENXIO; - /* XXX see the comments about minphys in ccd.c */ - return physio(xbdstrategy, NULL, dev, B_WRITE, minphys, uio); -} - -int -xbdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) -{ - struct xbd_softc *xs; - struct dk_softc *dksc; - int ret; - - DPRINTF_FOLLOW(("xbdioctl(%d, %08lx, %p, %d, %p)\n", - dev, cmd, data, flag, p)); - GETXBD_SOFTC(xs, dev); - dksc = &xs->sc_dksc; - - if ((ret = lockmgr(&dksc->sc_lock, LK_EXCLUSIVE, NULL)) != 0) - return ret; - - switch (cmd) { - default: - ret = dk_ioctl(xs->sc_di, dksc, dev, cmd, data, flag, p); - break; - } - - lockmgr(&dksc->sc_lock, LK_RELEASE, NULL); - return ret; -} - -int -xbdioctl_cdev(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) -{ - dev_t bdev; - - bdev = devsw_chr2blk(dev); - if (bdev == NODEV) - return ENXIO; - return xbdioctl(bdev, cmd, data, flag, p); -} - -int -xbddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) -{ - struct xbd_softc *xs; - - DPRINTF_FOLLOW(("xbddump(%d, %" PRId64 ", %p, %lu)\n", dev, blkno, va, - (unsigned long)size)); - GETXBD_SOFTC(xs, dev); - return dk_dump(xs->sc_di, &xs->sc_dksc, dev, blkno, va, size); -} - -static int -xbdinit(struct xbd_softc *xs, vdisk_t *xd, struct dk_intf *dkintf) -{ - struct dk_geom *pdg; - char buf[9]; - int ret; - - ret = 0; - - xs->sc_dksc.sc_size = xd->capacity; - xs->sc_xd_device = xd->device; - xs->sc_di = dkintf; - xs->sc_shutdown = 0; - - /* - * XXX here we should probe the underlying device. If we - * are accessing a partition of type RAW_PART, then - * we should populate our initial geometry with the - * geometry that we discover from the device. - */ - pdg = &xs->sc_dksc.sc_geom; - pdg->pdg_secsize = DEV_BSIZE; - pdg->pdg_ntracks = 1; - pdg->pdg_nsectors = 1024 * (1024 / pdg->pdg_secsize); - pdg->pdg_ncylinders = xs->sc_dksc.sc_size / pdg->pdg_nsectors; - - /* - * We have one shared bufq for all devices because otherwise - * requests can stall if there were no free request slots - * available in xbdstart and this device had no requests - * in-flight which would trigger a dk_start from the interrupt - * handler. - * XXX this assumes that we can just memcpy struct bufq_state - * to share it between devices. - * XXX we reference count the usage in case so we can de-alloc - * the bufq if all devices are deconfigured. - */ - if (bufq_users == 0) { - bufq_alloc(&bufq, BUFQ_FCFS); - bufq_users = 1; - } - memcpy(&xs->sc_dksc.sc_bufq, &bufq, sizeof(struct bufq_state)); - - xs->sc_dksc.sc_flags |= DKF_INITED; - - /* Attach the disk. */ - disk_attach(&xs->sc_dksc.sc_dkdev); - - /* Try and read the disklabel. */ - dk_getdisklabel(xs->sc_di, &xs->sc_dksc, 0 /* XXX ? */); - - format_bytes(buf, sizeof(buf), (uint64_t)xs->sc_dksc.sc_size * - pdg->pdg_secsize); - printf(" %s\n", buf); - -/* out: */ - return ret; -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/xen/xen_debug.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xen_debug.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,444 +0,0 @@ -/* $NetBSD: xen_debug.c,v 1.1.2.1 2004/05/22 15:59:31 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * - * Copyright (c) 2002-2003, K A Fraser & R Neugebauer - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xen_debug.c,v 1.1.2.1 2004/05/22 15:59:31 he Exp $"); - -#define XENDEBUG - -#include <sys/param.h> -#include <sys/systm.h> - -#include <machine/stdarg.h> -#include <machine/xen.h> -#include <machine/hypervisor.h> - -#ifdef XENDEBUG - -#define PRINTK_BUFSIZE 1024 -void -printk(const char *fmt, ...) -{ - va_list ap; - int ret; - static char buf[PRINTK_BUFSIZE]; - - va_start(ap, fmt); - ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap); - va_end(ap); - buf[ret] = 0; - (void)HYPERVISOR_console_io(CONSOLEIO_write, ret, buf); -} - -void -vprintk(const char *fmt, va_list ap) -{ - int ret; - static char buf[PRINTK_BUFSIZE]; - - ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap); - buf[ret] = 0; - (void)HYPERVISOR_console_io(CONSOLEIO_write, ret, buf); -} - -#endif - -#ifdef XENDEBUG_LOW - -int xen_once = 0; - -void hypervisor_callback(void); -void failsafe_callback(void); - -void xen_dbglow_init(void); -void -xen_dbglow_init() -{ - start_info_t *si; -#if 0 - int i; -#endif - - si = &xen_start_info; - - HYPERVISOR_set_callbacks( - __KERNEL_CS, (unsigned long)hypervisor_callback, - __KERNEL_CS, (unsigned long)failsafe_callback); - - trap_init(); - - /* __sti(); */ - - /* print out some useful information */ - printk(version); - printk("start_info: %p\n", si); - printk(" nr_pages: %lu", si->nr_pages); - printk(" shared_inf: %p (was %p)\n", HYPERVISOR_shared_info, - si->shared_info); - printk(" pt_base: %p", (void *)si->pt_base); - printk(" mod_start: 0x%lx\n", si->mod_start); - printk(" mod_len: %lu\n", si->mod_len); -#if 0 - printk(" net_rings: "); - for (i = 0; i < MAX_DOMAIN_VIFS; i++) { - if (si->net_rings[i] == 0) - break; - printk(" %lx", si->net_rings[i]); - }; - printk("\n"); - printk(" blk_ring: 0x%lx\n", si->blk_ring); -#endif - printk(" dom_id: %d\n", si->dom_id); - printk(" flags: 0x%lx\n", si->flags); - printk(" cmd_line: %s\n", si->cmd_line ? - (const char *)si->cmd_line : "NULL"); -} - - -void xen_dbg0(char *); -void -xen_dbg0(char *end) -{ - struct cpu_info *ci; - - ci = &cpu_info_primary; - if (xen_once) - printk("xencpu level %d ipending %08x master %08x\n", - ci->ci_ilevel, ci->ci_ipending, - HYPERVISOR_shared_info->events_mask); - /* ipending %08x imask %08x iunmask %08x */ - /* ci->ci_imask[IPL_NET], ci->ci_iunmask[IPL_NET]); */ -} - -void xen_dbg1(void *esp, int ss); -void -xen_dbg1(void *esp, int ss) -{ -#if 1 - struct cpu_info *ci; - - ci = &cpu_info_primary; - if (xen_once) - printk("xenhighlevel %d ipending %08x master %08x events %08x\n", - ci->ci_ilevel, ci->ci_ipending, - HYPERVISOR_shared_info->events_mask, HYPERVISOR_shared_info->events); -#else - printk("stack switch %p %d/%d, sp %p\n", esp, ss, IDXSEL(ss), &ss); -#endif -} - -void xen_dbg2(void); -void -xen_dbg2(void) -{ - if (xen_once) - printk("xen_dbg2\n"); -} - -void xen_dbg3(void *, void *); -void -xen_dbg3(void *ss, void *esp) -{ - if (xen_once) - printk("xen_dbg3 %p %p\n", ss, esp); -} - -void xen_dbg4(void *); -void -xen_dbg4(void *esi) -{ - - printk("xen_dbg4 %p\n", esi); - for(;;); -} - - - - -static void do_exit(void); - -/* - * These are assembler stubs in vector.S. - * They are the actual entry points for virtual exceptions. - */ -void divide_error(void); -void debug(void); -void int3(void); -void overflow(void); -void bounds(void); -void invalid_op(void); -void device_not_available(void); -void double_fault(void); -void coprocessor_segment_overrun(void); -void invalid_TSS(void); -void segment_not_present(void); -void stack_segment(void); -void general_protection(void); -void page_fault(void); -void coprocessor_error(void); -void simd_coprocessor_error(void); -void alignment_check(void); -void spurious_interrupt_bug(void); -void machine_check(void); - -static void -dump_regs(struct pt_regs *regs) -{ - int in_kernel = 1; - unsigned long esp; - unsigned short ss; - - esp = (unsigned long) (®s->esp); - ss = __KERNEL_DS; - if (regs->xcs & 2) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } - printf("EIP: %04x:[<%08lx>]\n", - 0xffff & regs->xcs, regs->eip); - printf("EFLAGS: %08lx\n",regs->eflags); - printf("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printf("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printf("ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); - printf("\n"); -} - - -static inline void -dump_code(unsigned eip) -{ - unsigned *ptr = (unsigned *)eip; - int x; - - printk("Bytes at eip:\n"); - for (x = -4; x < 5; x++) - printf("%x", ptr[x]); -} - - -/* - * C handlers here have their parameter-list constructed by the - * assembler stubs above. Each one gets a pointer to a list - * of register values (to be restored at end of exception). - * Some will also receive an error code -- this is the code that - * was generated by the processor for the underlying real exception. - * - * Note that the page-fault exception is special. It also receives - * the faulting linear address. Normally this would be found in - * register CR2, but that is not accessible in a virtualised OS. - */ - -static void inline -do_trap(int trapnr, char *str, struct pt_regs *regs, long error_code) -{ - - printk("FATAL: Unhandled Trap (see mini-os:traps.c)"); - printf("%d %s", trapnr, str); - dump_regs(regs); - dump_code(regs->eip); - - do_exit(); -} - -#define DO_ERROR(trapnr, str, name) \ -void do_##name(struct pt_regs *regs, long error_code); \ -void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - do_trap(trapnr, str, regs, error_code); \ -} - -#define DO_ERROR_INFO(trapnr, str, name, sicode, siaddr) \ -void do_##name(struct pt_regs *regs, long error_code); \ -void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - do_trap(trapnr, str, regs, error_code); \ -} - -DO_ERROR_INFO( 0, "divide error", divide_error, FPE_INTDIV, regs->eip) -DO_ERROR( 3, "int3", int3) -DO_ERROR( 4, "overflow", overflow) -DO_ERROR( 5, "bounds", bounds) -DO_ERROR_INFO( 6, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) -DO_ERROR( 7, "device not available", device_not_available) -DO_ERROR( 8, "double fault", double_fault) -DO_ERROR( 9, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_ERROR(10, "invalid TSS", invalid_TSS) -DO_ERROR(11, "segment not present", segment_not_present) -DO_ERROR(12, "stack segment", stack_segment) -DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR(18, "machine check", machine_check) - -void do_page_fault(struct pt_regs *, long, unsigned long); -void -do_page_fault(struct pt_regs *regs, long error_code, unsigned long address) -{ - - printk("Page fault\n"); - printk("Address: 0x%lx", address); - printk("Error Code: 0x%lx", error_code); - printk("eip: \t 0x%lx", regs->eip); - do_exit(); -} - -void do_general_protection(struct pt_regs *, long); -void -do_general_protection(struct pt_regs *regs, long error_code) -{ - - HYPERVISOR_shared_info->events_mask = 0; - printk("GPF\n"); - printk("Error Code: 0x%lx", error_code); - dump_regs(regs); - dump_code(regs->eip); - do_exit(); -} - - -void do_debug(struct pt_regs *, long); -void -do_debug(struct pt_regs *regs, long error_code) -{ - - printk("Debug exception\n"); -#define TF_MASK 0x100 - regs->eflags &= ~TF_MASK; - dump_regs(regs); - do_exit(); -} - - - -void do_coprocessor_error(struct pt_regs *, long); -void -do_coprocessor_error(struct pt_regs *regs, long error_code) -{ - - printk("Copro error\n"); - dump_regs(regs); - dump_code(regs->eip); - do_exit(); -} - -void simd_math_error(void *); -void -simd_math_error(void *eip) -{ - - printk("SIMD error\n"); -} - -void do_simd_coprocessor_error(struct pt_regs *, long); -void -do_simd_coprocessor_error(struct pt_regs *regs, long error_code) -{ - - printk("SIMD copro error\n"); -} - -void do_spurious_interrupt_bug(struct pt_regs *, long); -void -do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) -{ -} - -static void -do_exit(void) -{ - - HYPERVISOR_exit(); -} - -/* - * Submit a virtual IDT to teh hypervisor. This consists of tuples - * (interrupt vector, privilege ring, CS:EIP of handler). - * The 'privilege ring' field specifies the least-privileged ring that - * can trap to that vector using a software-interrupt instruction (INT). - */ -static trap_info_t trap_table[] = { - { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, - { 1, 0, __KERNEL_CS, (unsigned long)debug }, - { 3, 3, __KERNEL_CS, (unsigned long)int3 }, - { 4, 3, __KERNEL_CS, (unsigned long)overflow }, - { 5, 3, __KERNEL_CS, (unsigned long)bounds }, - { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, - { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, - { 8, 0, __KERNEL_CS, (unsigned long)double_fault }, - { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, - { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, - { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, - { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, - { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, - { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, - { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, - { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, - { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, - { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, - { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, - { 0, 0, 0, 0 } -}; - -void -trap_init(void) -{ - - HYPERVISOR_set_trap_table(trap_table); -} -#endif diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/xen/xencons.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xencons.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,414 +0,0 @@ -/* $NetBSD: xencons.c,v 1.1.2.1 2004/05/22 15:59:21 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xencons.c,v 1.1.2.1 2004/05/22 15:59:21 he Exp $"); - -#include <sys/param.h> -#include <sys/ioctl.h> -#include <sys/proc.h> -#include <sys/tty.h> -#include <sys/systm.h> -#include <sys/device.h> -#include <sys/conf.h> - -#include <machine/stdarg.h> -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/evtchn.h> -#include <machine/ctrl_if.h> - -#include <dev/cons.h> - -#include <ddb/db_output.h> /* XXX for db_max_line */ - -static int xencons_isconsole = 0; - -#define XENCONS_UNIT(x) (minor(x)) -#define XENCONS_BURST 128 - -int xencons_match (struct device *, struct cfdata *, void *); -void xencons_attach (struct device *, struct device *, void *); -/* int xencons_intr (void *); */ -void xencons_init (void); - -struct xencons_softc { - struct device sc_dev; - struct tty *sc_tty; -}; - -CFATTACH_DECL(xencons, sizeof(struct xencons_softc), - xencons_match, xencons_attach, NULL, NULL); - -extern struct cfdriver xencons_cd; - -dev_type_open(xencons_open); -dev_type_close(xencons_close); -dev_type_read(xencons_read); -dev_type_write(xencons_write); -dev_type_ioctl(xencons_ioctl); -dev_type_stop(xencons_stop); -dev_type_tty(xencons_tty); -dev_type_poll(xencons_poll); - -const struct cdevsw xencons_cdevsw = { - xencons_open, xencons_close, xencons_read, xencons_write, - xencons_ioctl, xencons_stop, xencons_tty, xencons_poll, - NULL, ttykqfilter, D_TTY -}; - - -static void xencons_rx(ctrl_msg_t *, unsigned long); -void xenconscn_attach(void); -int xenconscn_getc(dev_t); -void xenconscn_putc(dev_t, int); -void xenconscn_pollc(dev_t, int); - -static struct consdev xencons = { - NULL, NULL, xenconscn_getc, xenconscn_putc, xenconscn_pollc, - NULL, NULL, NULL, NODEV, CN_NORMAL -}; - -void xencons_start (struct tty *); -int xencons_param (struct tty *, struct termios *); - -int -xencons_match(struct device *parent, struct cfdata *match, void *aux) -{ - struct xencons_attach_args *xa = (struct xencons_attach_args *)aux; - - if (strcmp(xa->xa_device, "xencons") == 0) - return 1; - return 0; -} - -void -xencons_attach(struct device *parent, struct device *self, void *aux) -{ - struct xencons_softc *sc = (void *)self; - - aprint_normal(": Xen Virtual Console Driver\n"); - - if (xencons_isconsole) { - int maj; - - /* Locate the major number. */ - maj = cdevsw_lookup_major(&xencons_cdevsw); - - /* There can be only one, but it can have any unit number. */ - cn_tab->cn_dev = makedev(maj, sc->sc_dev.dv_unit); - - aprint_verbose("%s: console major %d, unit %d\n", - sc->sc_dev.dv_xname, maj, sc->sc_dev.dv_unit); - - /* Set db_max_line to avoid paging. */ - db_max_line = 0x7fffffff; - - (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0); - } -} - -int -xencons_open(dev_t dev, int flag, int mode, struct proc *p) -{ - struct xencons_softc *sc; - int unit = XENCONS_UNIT(dev); - struct tty *tp; - - sc = device_lookup(&xencons_cd, unit); - if (sc == NULL) - return (ENXIO); - - if (!sc->sc_tty) { - tp = sc->sc_tty = ttymalloc(); - tty_attach(tp); - } else - tp = sc->sc_tty; - - tp->t_oproc = xencons_start; - tp->t_param = xencons_param; - tp->t_dev = dev; - if ((tp->t_state & TS_ISOPEN) == 0) { - ttychars(tp); - tp->t_iflag = TTYDEF_IFLAG; - tp->t_oflag = TTYDEF_OFLAG; - tp->t_cflag = TTYDEF_CFLAG; - tp->t_lflag = TTYDEF_LFLAG; - tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; - xencons_param(tp, &tp->t_termios); - ttsetwater(tp); - } else if (tp->t_state&TS_XCLUDE && p->p_ucred->cr_uid != 0) - return (EBUSY); - tp->t_state |= TS_CARR_ON; - - return ((*tp->t_linesw->l_open)(dev, tp)); -} - -int -xencons_close(dev_t dev, int flag, int mode, struct proc *p) -{ - struct xencons_softc *sc = device_lookup(&xencons_cd, - XENCONS_UNIT(dev)); - struct tty *tp = sc->sc_tty; - - if (tp == NULL) - return (0); - (*tp->t_linesw->l_close)(tp, flag); - ttyclose(tp); -#ifdef notyet /* XXX */ - ttyfree(tp); -#endif - return (0); -} - -int -xencons_read(dev_t dev, struct uio *uio, int flag) -{ - struct xencons_softc *sc = device_lookup(&xencons_cd, - XENCONS_UNIT(dev)); - struct tty *tp = sc->sc_tty; - - return ((*tp->t_linesw->l_read)(tp, uio, flag)); -} - -int -xencons_write(dev_t dev, struct uio *uio, int flag) -{ - struct xencons_softc *sc = device_lookup(&xencons_cd, - XENCONS_UNIT(dev)); - struct tty *tp = sc->sc_tty; - - return ((*tp->t_linesw->l_write)(tp, uio, flag)); -} - -int -xencons_poll(dev_t dev, int events, struct proc *p) -{ - struct xencons_softc *sc = device_lookup(&xencons_cd, - XENCONS_UNIT(dev)); - struct tty *tp = sc->sc_tty; - - return ((*tp->t_linesw->l_poll)(tp, events, p)); -} - -struct tty * -xencons_tty(dev_t dev) -{ - struct xencons_softc *sc = device_lookup(&xencons_cd, - XENCONS_UNIT(dev)); - struct tty *tp = sc->sc_tty; - - return (tp); -} - -int -xencons_ioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) -{ - struct xencons_softc *sc = device_lookup(&xencons_cd, - XENCONS_UNIT(dev)); - struct tty *tp = sc->sc_tty; - int error; - - error = (*tp->t_linesw->l_ioctl)(tp, cmd, data, flag, p); - if (error != EPASSTHROUGH) - return (error); - - error = ttioctl(tp, cmd, data, flag, p); - if (error != EPASSTHROUGH) - return (error); - - switch (cmd) { - default: - return (EPASSTHROUGH); - } - -#ifdef DIAGNOSTIC - panic("xencons_ioctl: impossible"); -#endif -} - -void -xencons_start(struct tty *tp) -{ - struct clist *cl; - int s, len; - - s = spltty(); - if (tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP)) - goto out; - tp->t_state |= TS_BUSY; - splx(s); - - /* - * We need to do this outside spl since it could be fairly - * expensive and we don't want our serial ports to overflow. - */ - cl = &tp->t_outq; - if (xen_start_info.flags & SIF_INITDOMAIN) { - u_char buf[XENCONS_BURST+1]; - - len = q_to_b(cl, buf, XENCONS_BURST); - (void)HYPERVISOR_console_io(CONSOLEIO_write, len, buf); - } else { - ctrl_msg_t msg; - - len = q_to_b(cl, msg.msg, sizeof(msg.msg)); - msg.type = CMSG_CONSOLE; - msg.subtype = CMSG_CONSOLE_DATA; - msg.length = len; - ctrl_if_send_message_noblock(&msg, NULL, 0); - /* XXX check return value and queue wait for space - * thread/softint */ - } - - s = spltty(); - tp->t_state &= ~TS_BUSY; - if (cl->c_cc) { - tp->t_state |= TS_TIMEOUT; - callout_reset(&tp->t_rstrt_ch, 1, ttrstrt, tp); - } - if (cl->c_cc <= tp->t_lowat) { - if (tp->t_state & TS_ASLEEP) { - tp->t_state &= ~TS_ASLEEP; - wakeup(cl); - } - selwakeup(&tp->t_wsel); - } -out: - splx(s); -} - -void -xencons_stop(struct tty *tp, int flag) -{ - -} - - -/* Non-privileged receive callback. */ -static void -xencons_rx(ctrl_msg_t *msg, unsigned long id) -{ - int i; - int s; - // unsigned long flags; - struct xencons_softc *sc; - struct tty *tp; - - sc = device_lookup(&xencons_cd, XENCONS_UNIT(cn_tab->cn_dev)); - if (sc == NULL) - goto out; - - tp = sc->sc_tty; - if (tp == NULL) - goto out; - - s = spltty(); - // save_and_cli(flags); - // simple_lock(&xencons_lock); - for (i = 0; i < msg->length; i++) - (*tp->t_linesw->l_rint)(msg->msg[i], tp); - // simple_unlock(&xencons_lock); - // restore_flags(flags); - splx(s); - - out: - msg->length = 0; - ctrl_if_send_response(msg); -} - -void -xenconscn_attach() -{ - - cn_tab = &xencons; - - ctrl_if_early_init(); - - xencons_isconsole = 1; -} - -int -xenconscn_getc(dev_t dev) -{ - - printf("\n"); - for (;;); -} - -void -xenconscn_putc(dev_t dev, int c) -{ - extern int ctrl_if_evtchn; - - if (xen_start_info.flags & SIF_INITDOMAIN || - ctrl_if_evtchn == -1) { - u_char buf[1]; - - buf[0] = c; - (void)HYPERVISOR_console_io(CONSOLEIO_write, 1, buf); - } else { - ctrl_msg_t msg; - - msg.type = CMSG_CONSOLE; - msg.subtype = CMSG_CONSOLE_DATA; - msg.length = 1; - msg.msg[0] = c; - while (ctrl_if_send_message_noblock(&msg, NULL, 0) == EAGAIN) { - HYPERVISOR_yield(); - /* XXX check return value and queue wait for space - * thread/softint */ - } - } -} - -void -xenconscn_pollc(dev_t dev, int on) -{ - -} - -/* - * Set line parameters. - */ -int -xencons_param(struct tty *tp, struct termios *t) -{ - - tp->t_ispeed = t->c_ispeed; - tp->t_ospeed = t->c_ospeed; - tp->t_cflag = t->c_cflag; - return (0); -} - diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/arch/xen/xen/xenkbc.c --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xenkbc.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,600 +0,0 @@ -/* $NetBSD: xenkbc.c,v 1.3.2.1 2004/05/22 15:57:43 he Exp $ */ - -/* - * - * Copyright (c) 2004 Christian Limpach. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Christian Limpach. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Copyright (c) 2004 Ben Harris. - * Copyright (c) 1998 - * Matthias Drochner. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xenkbc.c,v 1.3.2.1 2004/05/22 15:57:43 he Exp $"); - -#include <sys/param.h> -#include <sys/device.h> -#include <sys/malloc.h> -#include <sys/systm.h> - -#include <dev/pckbport/pckbportvar.h> -#include <dev/ic/i8042reg.h> - -#include <machine/intr.h> - -#include <machine/xenkbcvar.h> -#include <machine/xen.h> -#include <machine/hypervisor.h> -#include <machine/xen-public/kbd.h> -#include <machine/evtchn.h> - -#define KBC_DELAY DELAY(1000) -#define KBC_TIMEOUT 250 - -#define XENKBC_NSLOTS 2 - -/* data per slave device */ -struct xenkbc_slotdata { - int xsd_polling; /* don't process data in interrupt handler */ - int xsd_poll_data; /* data read from inr handler if polling */ - int xsd_poll_stat; /* status read from inr handler if polling */ -#if NRND > 0 - rndsource_element_t xsd_rnd_source; -#endif -}; - -struct xenkbc_internal { - struct xenkbc_softc *xi_sc; - struct pckbport_tag *xi_pt; - struct xenkbc_slotdata *xi_slotdata[XENKBC_NSLOTS]; - int xi_flags; - int xi_data; - int xi_8042cmdbyte; -}; - -#define XI_CONSOLE_FLAG 0x01 -#define XI_HASAUX_FLAG 0x02 - -#define XI_CONSOLE(xi) ((xi)->xi_flags & XI_CONSOLE_FLAG) -#define XI_HASAUX(xi) ((xi)->xi_flags & XI_HASAUX_FLAG) - -#define XI_SETCONSOLE(xi,on) \ - ((on) ? ((xi)->xi_flags |= XI_CONSOLE_FLAG) : \ - ((xi)->xi_flags &= ~XI_CONSOLE_FLAG)) -#define XI_SETHASAUX(xi,on) \ - ((on) ? ((xi)->xi_flags |= XI_HASAUX_FLAG) : \ - ((xi)->xi_flags &= ~XI_HASAUX_FLAG)) - -static int xenkbc_match(struct device *, struct cfdata *, void *); -static void xenkbc_attach(struct device *, struct device *, void *); - -static int xenkbc_xt_translation(void *, pckbport_slot_t, int); -static void xenkbc_init_slotdata(struct xenkbc_slotdata *); - -static int xenkbc_get8042cmd (struct xenkbc_internal *); -static int xenkbc_put8042cmd (struct xenkbc_internal *); -static int xenkbc_send_devcmd(void *, pckbport_slot_t, u_char); -static int xenkbc_send_cmd(void *, u_char); -static int xenkbc_send_data(void *, u_char); -static int xenkbc_poll_data1(void *, pckbport_slot_t); - -static void xenkbc_slot_enable(void *, pckbport_slot_t, int); -static void xenkbc_intr_establish(void *, pckbport_slot_t); -static void xenkbc_set_poll(void *, pckbport_slot_t, int); - -static int xenkbc_intr(void *); - -CFATTACH_DECL(xenkbc, sizeof(struct xenkbc_softc), - xenkbc_match, xenkbc_attach, NULL, NULL); - -static struct pckbport_accessops const xenkbc_ops = { - xenkbc_xt_translation, - xenkbc_send_devcmd, - xenkbc_poll_data1, - xenkbc_slot_enable, - xenkbc_intr_establish, - xenkbc_set_poll -}; - -static struct xenkbc_internal xenkbc_consdata; -static struct xenkbc_slotdata xenkbc_cons_slotdata; - -/* #define XENKBCDEBUG */ -#ifdef XENKBCDEBUG -#define DPRINTF(x) printf x -#else -#define DPRINTF(x) -#endif - - -static int -xenkbc_getstatus(struct xenkbc_internal *xi) -{ - long res; - - res = HYPERVISOR_kbd_op(KBD_OP_READ, 0); - if (res < 0) { - xi->xi_data = 0; - return 0; - } - xi->xi_data = KBD_CODE_SCANCODE(res); - return KBD_CODE_STATUS(res); -} - -static int -xenkbc_wait_output(struct xenkbc_internal *xi) -{ - u_int i; - - for (i = KBC_TIMEOUT; i; i--) { - if ((xenkbc_getstatus(xi) & KBS_IBF) == 0) - return (1); - KBC_DELAY; - } - return (0); -} - -static int -xenkbc_match(struct device *parent, struct cfdata *cf, void *aux) -{ - struct xenkbc_attach_args *xa = aux; - - if ((xen_start_info.flags & SIF_PRIVILEGED) == 0) - return 0; - - if (strcmp(xa->xa_device, "xenkbc")) - return 0; - - return 1; -} - -static int -xenkbc_attach_slot(struct xenkbc_softc *xs, pckbport_slot_t slot) -{ - struct xenkbc_internal *xi = xs->sc_xi; - struct device *child; - int alloced = 0; - - if (xi->xi_slotdata[slot] == NULL) { - xi->xi_slotdata[slot] = malloc(sizeof(struct xenkbc_slotdata), - M_DEVBUF, M_NOWAIT); - if (xi->xi_slotdata[slot] == NULL) { - printf("%s: no memory\n", xs->sc_dev.dv_xname); - return 0; - } - xenkbc_init_slotdata(xi->xi_slotdata[slot]); - alloced++; - } - - child = pckbport_attach_slot(&xs->sc_dev, xi->xi_pt, slot); - - if (child == NULL && alloced) { - free(xi->xi_slotdata[slot], M_DEVBUF); - xi->xi_slotdata[slot] = NULL; - } - -#if NRND > 0 - if (child != NULL && xi->xi_slotdata[slot] != NULL) - rnd_attach_source(&xi->xi_slotdata[slot]->xsd_rnd_source, - child->dv_xname, RND_TYPE_TTY, 0); -#endif - - return child != NULL; -} - -static void -xenkbc_attach(struct device *parent, struct device *self, void *aux) -{ - /* struct xenkbc_attach_args *xa = aux; */ - struct xenkbc_softc *xs = (struct xenkbc_softc *)self; - struct xenkbc_internal *xi; - int res; - u_char cmdbits = 0; - - if (XI_CONSOLE(&xenkbc_consdata)) - xi = &xenkbc_consdata; - else { - xi = malloc(sizeof(struct xenkbc_internal), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (xi == NULL) { - aprint_error(": no memory\n"); - return; - } - xi->xi_8042cmdbyte = KC8_CPU; - } - - aprint_normal(": Xen Keyboard/Mouse Device\n"); - - xs->sc_xi = xi; - xi->xi_sc = xs; - - event_set_handler(_EVENT_PS2, &xenkbc_intr, xi, IPL_TTY); - hypervisor_enable_event(_EVENT_PS2); - - xi->xi_pt = pckbport_attach(xi, &xenkbc_ops); - - /* flush */ - xenkbc_poll_data1(xi, PCKBPORT_KBD_SLOT); - - /* set initial cmd byte */ - if (!xenkbc_put8042cmd(xi)) { - printf("kbc: cmd word write error\n"); - return; - } - - if (xenkbc_attach_slot(xs, PCKBPORT_KBD_SLOT)) - cmdbits |= KC8_KENABLE; - - /* - * Check aux port ok. - */ - if (!xenkbc_send_cmd(xi, KBC_AUXECHO)) { - printf("kbc: aux echo error 1\n"); - goto nomouse; - } - if (!xenkbc_wait_output(xi)) { - printf("kbc: aux echo error 2\n"); - goto nomouse; - } - XI_SETHASAUX(xi, 1); - xenkbc_send_data(xi, 0x5a); /* a random value */ - res = xenkbc_poll_data1(xi, PCKBPORT_AUX_SLOT); - if (res != -1) { - /* - * In most cases, the 0x5a gets echoed. - * Some older controllers (Gateway 2000 circa 1993) - * return 0xfe here. - * We are satisfied if there is anything in the - * aux output buffer. - */ - if (xenkbc_attach_slot(xs, PCKBPORT_AUX_SLOT)) - cmdbits |= KC8_MENABLE; - } else { -#ifdef XENKBCDEBUG - printf("kbc: aux echo test failed\n"); -#endif - XI_SETHASAUX(xi, 0); - } - - nomouse: - /* enable needed interrupts */ - xi->xi_8042cmdbyte |= cmdbits; - if (!xenkbc_put8042cmd(xi)) - printf("kbc: cmd word write error\n"); -} - -static void -xenkbc_init_slotdata(struct xenkbc_slotdata *xsd) -{ - - xsd->xsd_polling = 0; -} - -/* - * Get the current command byte. - */ -static int -xenkbc_get8042cmd(struct xenkbc_internal *xi) -{ - int data; - - if (!xenkbc_send_cmd(xi, K_RDCMDBYTE)) - return 0; - data = xenkbc_poll_data1(xi, PCKBPORT_KBD_SLOT); - if (data == -1) - return 0; - xi->xi_8042cmdbyte = data; - return 1; -} - -/* - * Pass command byte to keyboard controller (8042). - */ -static int -xenkbc_put8042cmd(struct xenkbc_internal *xi) -{ - - if (!xenkbc_send_cmd(xi, K_LDCMDBYTE)) - return 0; - if (!xenkbc_wait_output(xi)) - return 0; - return xenkbc_send_data(xi, xi->xi_8042cmdbyte); -} - -static int -xenkbc_send_devcmd(void *cookie, pckbport_slot_t slot, u_char devcmd) -{ - - DPRINTF(("send_devcmd %x\n", devcmd)); - - if (slot == PCKBPORT_AUX_SLOT) { - if (!xenkbc_send_cmd(cookie, KBC_AUXWRITE)) { - DPRINTF(("xenkbc_send_devcmd: KBC_AUXWRITE failed\n")); - return 0; - } - } - if (!xenkbc_wait_output(cookie)) { - DPRINTF(("xenkbc_send_devcmd: wait_output failed\n")); - return 0; - } - return xenkbc_send_data(cookie, devcmd); -} - -static int -xenkbc_send_cmd(void *cookie, u_char cmd) -{ - struct xenkbc_internal *xi = cookie; - - DPRINTF(("send_cmd %x\n", cmd)); - xenkbc_wait_output(xi); - return !HYPERVISOR_kbd_op(KBD_OP_WRITECOMMAND, cmd); -} - -static int -xenkbc_send_data(void *cookie, u_char output) -{ - struct xenkbc_internal *xi = cookie; - - DPRINTF(("send_data %x\n", output)); - xenkbc_wait_output(xi); - return !HYPERVISOR_kbd_op(KBD_OP_WRITEOUTPUT, output); -} - -static int -xenkbc_poll_data1(void *cookie, pckbport_slot_t slot) -{ - struct xenkbc_internal *xi = cookie; - struct xenkbc_slotdata *xsd = xi->xi_slotdata[slot]; - int s; - u_char stat, c; - int i = 1000; - - s = splhigh(); - - if (xsd && xsd->xsd_polling && xsd->xsd_poll_data != -1 && - xsd->xsd_poll_stat != -1) { - stat = xsd->xsd_poll_stat; - c = xsd->xsd_poll_data; - xsd->xsd_poll_data = -1; - xsd->xsd_poll_stat = -1; - goto process; - } - - DELAY(10); - for (; i; i--) { - stat = xenkbc_getstatus(xi); - if (stat & KBS_DIB) { - c = xi->xi_data; - DELAY(10); - process: - if (XI_HASAUX(xi) && (stat & 0x20)) { /* aux data */ - if (slot != PCKBPORT_AUX_SLOT) { -#ifdef XENKBCDEBUG - printf("lost aux 0x%x\n", c); -#endif - continue; - } - } else { - if (slot == PCKBPORT_AUX_SLOT) { -#ifdef XENKBCDEBUG - printf("lost kbd 0x%x\n", c); -#endif - continue; - } - } - splx(s); - DPRINTF(("poll -> %x stat %x\n", c, stat)); - return c; - } - } - - DPRINTF(("poll failed -> -1\n")); - splx(s); - return -1; -} - -/* - * switch scancode translation on / off - * return nonzero on success - */ -static int -xenkbc_xt_translation(void *cookie, pckbport_slot_t slot, int on) -{ - struct xenkbc_internal *xi = cookie; - int ison; - - if (slot != PCKBPORT_KBD_SLOT) { - /* translation only for kbd slot */ - if (on) - return 0; - else - return 1; - } - - ison = xi->xi_8042cmdbyte & KC8_TRANS; - if ((on && ison) || (!on && !ison)) - return 1; - - xi->xi_8042cmdbyte ^= KC8_TRANS; - if (!xenkbc_put8042cmd(xi)) - return 0; - - /* read back to be sure */ - if (!xenkbc_get8042cmd(xi)) - return 0; - - ison = xi->xi_8042cmdbyte & KC8_TRANS; - if ((on && ison) || (!on && !ison)) - return 1; - return 0; -} - -static const struct xenkbc_portcmd { - u_char cmd_en, cmd_dis; -} xenkbc_portcmd[2] = { - { - KBC_KBDENABLE, KBC_KBDDISABLE, - }, { - KBC_AUXENABLE, KBC_AUXDISABLE, - } -}; - -static void -xenkbc_slot_enable(void *cookie, pckbport_slot_t slot, int on) -{ - struct xenkbc_internal *xi = cookie; - const struct xenkbc_portcmd *cmd; - - cmd = &xenkbc_portcmd[slot]; - - DPRINTF(("slot enable %d -> %d\n", slot, on)); - xenkbc_send_cmd(xi, on ? cmd->cmd_en : cmd->cmd_dis); -} - - -static void -xenkbc_intr_establish(void *cookie, pckbport_slot_t slot) -{ - -} - -static void -xenkbc_set_poll(void *cookie, pckbport_slot_t slot, int on) -{ - struct xenkbc_internal *xi = cookie; - - DPRINTF(("xenkbc_set_poll %d -> %d\n", slot, on)); - - xi->xi_slotdata[slot]->xsd_polling = on; - - if (on) { - xi->xi_slotdata[slot]->xsd_poll_data = -1; - xi->xi_slotdata[slot]->xsd_poll_stat = -1; - } else { - int s; - - /* - * If disabling polling on a device that's been configured, - * make sure there are no bytes left in the FIFO, holding up - * the interrupt line. Otherwise we won't get any further - * interrupts. - */ - s = spltty(); - xenkbc_intr(xi); - splx(s); - } -} - -static int -xenkbc_intr(void *self) -{ - struct xenkbc_internal *xi = self; - u_char stat; - pckbport_slot_t slot; - struct xenkbc_slotdata *xsd; - int served = 0; - - for (;;) { - stat = xenkbc_getstatus(xi); - if (!(stat & KBS_DIB)) - break; - - served = 1; - - slot = (XI_HASAUX(xi) && (stat & 0x20)) ? - PCKBPORT_AUX_SLOT : PCKBPORT_KBD_SLOT; - xsd = xi->xi_slotdata[slot]; - - if (xsd == NULL) - continue; - -#if NRND > 0 - rnd_add_uint32(&xsd->xsd_rnd_source, - (stat << 8) | xi->xi_data); -#endif - - if (xsd->xsd_polling) { - xsd->xsd_poll_data = xi->xi_data; - xsd->xsd_poll_stat = stat; - break; /* xenkbc_poll_data() will get it */ - } - - pckbportintr(xi->xi_pt, slot, xi->xi_data); - } - - return served; -} - -int -xenkbc_cnattach(pckbport_slot_t slot) -{ - struct xenkbc_internal *xi = &xenkbc_consdata; - int ret; - - /* flush */ - (void) xenkbc_poll_data1(xi, PCKBPORT_KBD_SLOT); - - /* init cmd byte, enable ports */ - xenkbc_consdata.xi_8042cmdbyte = KC8_CPU; - if (!xenkbc_put8042cmd(xi)) { - printf("kbc: cmd word write error\n"); - return EIO; - } - - ret = pckbport_cnattach(xi, &xenkbc_ops, slot); - - xi->xi_slotdata[slot] = &xenkbc_cons_slotdata; - xenkbc_init_slotdata(xi->xi_slotdata[slot]); - XI_SETCONSOLE(xi, 1); - - return ret; -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs.h --- a/netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs.h Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,205 +0,0 @@ -/* $NetBSD: kernfs.h,v 1.20.2.3 2004/05/23 10:46:05 tron Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software donated to Berkeley by - * Jan-Simon Pendry. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)kernfs.h 8.6 (Berkeley) 3/29/95 - */ - -#define _PATH_KERNFS "/kern" /* Default mountpoint */ - -#ifdef _KERNEL -#include <sys/queue.h> - -/* - * The different types of node in a kernfs filesystem - */ -typedef enum { - KFSkern, /* the filesystem itself (.) */ - KFSroot, /* the filesystem root (..) */ - KFSnull, /* none aplicable */ - KFStime, /* boottime */ - KFSint, /* integer */ - KFSstring, /* string */ - KFShostname, /* hostname */ - KFSavenrun, /* loadavg */ - KFSdevice, /* device file (rootdev/rrootdev) */ - KFSmsgbuf, /* msgbuf */ - KFSipsecsadir, /* ipsec security association (top dir) */ - KFSipsecspdir, /* ipsec security policy (top dir) */ - KFSipsecsa, /* ipsec security association entry */ - KFSipsecsp, /* ipsec security policy entry */ - KFSsubdir, /* directory */ - KFSlasttype, /* last used type */ - KFSmaxtype = (1<<6) - 1 /* last possible type */ -} kfstype; - -/* - * Control data for the kern file system. - */ -struct kern_target { - u_char kt_type; - u_char kt_namlen; - const char *kt_name; - void *kt_data; - kfstype kt_tag; - u_char kt_vtype; - mode_t kt_mode; -}; - -struct dyn_kern_target { - struct kern_target dkt_kt; - SIMPLEQ_ENTRY(dyn_kern_target) dkt_queue; -}; - -struct kernfs_subdir { - SIMPLEQ_HEAD(,dyn_kern_target) ks_entries; - unsigned int ks_nentries; - unsigned int ks_dirs; - const struct kern_target *ks_parent; -}; - -struct kernfs_node { - LIST_ENTRY(kernfs_node) kfs_hash; /* hash chain */ - TAILQ_ENTRY(kernfs_node) kfs_list; /* flat list */ - struct vnode *kfs_vnode; /* vnode associated with this pfsnode */ - kfstype kfs_type; /* type of procfs node */ - mode_t kfs_mode; /* mode bits for stat() */ - long kfs_fileno; /* unique file id */ - u_int32_t kfs_value; /* SA id or SP id (KFSint) */ - const struct kern_target *kfs_kt; - void *kfs_v; /* pointer to secasvar/secpolicy/mbuf */ - long kfs_cookie; /* fileno cookie */ -}; - -struct kernfs_mount { - TAILQ_HEAD(, kernfs_node) nodelist; - long fileno_cookie; -}; - -#define UIO_MX 32 - -#define KERNFS_FILENO(kt, typ, cookie) \ - ((kt >= &kern_targets[0] && kt < &kern_targets[static_nkern_targets]) \ - ? 2 + ((kt) - &kern_targets[0]) \ - : (((cookie + 1) << 6) | (typ))) -#define KERNFS_TYPE_FILENO(typ, cookie) \ - (((cookie + 1) << 6) | (typ)) - -#define VFSTOKERNFS(mp) ((struct kernfs_mount *)((mp)->mnt_data)) -#define VTOKERN(vp) ((struct kernfs_node *)(vp)->v_data) -#define KERNFSTOV(kfs) ((kfs)->kfs_vnode) - -extern const struct kern_target kern_targets[]; -extern int nkern_targets; -extern const int static_nkern_targets; -extern int (**kernfs_vnodeop_p) __P((void *)); -extern struct vfsops kernfs_vfsops; -extern dev_t rrootdev; - -struct secasvar; -struct secpolicy; - -int kernfs_root __P((struct mount *, struct vnode **)); - -void kernfs_hashinit __P((void)); -void kernfs_hashreinit __P((void)); -void kernfs_hashdone __P((void)); -int kernfs_freevp __P((struct vnode *)); -int kernfs_allocvp __P((struct mount *, struct vnode **, kfstype, - const struct kern_target *, u_int32_t)); - -void kernfs_revoke_sa __P((struct secasvar *)); -void kernfs_revoke_sp __P((struct secpolicy *)); - -/* - * Data types for the kernfs file operations. - */ -typedef enum { - KERNFS_XREAD, - KERNFS_XWRITE, - KERNFS_FILEOP_CLOSE, - KERNFS_FILEOP_GETATTR, - KERNFS_FILEOP_IOCTL, - KERNFS_FILEOP_MMAP, - KERNFS_FILEOP_OPEN, - KERNFS_FILEOP_READ, - KERNFS_FILEOP_WRITE, -} kfsfileop; - -struct kernfs_fileop { - kfstype kf_type; - kfsfileop kf_fileop; - union { - void *_kf_genop; - int (*_kf_vop)(void *); - int (*_kf_xread) - (const struct kernfs_node *, int, char **, size_t, - size_t *); - int (*_kf_xwrite) - (const struct kernfs_node *, char *, size_t); - } _kf_opfn; - SPLAY_ENTRY(kernfs_fileop) kf_node; -}; -#define kf_genop _kf_opfn -#define kf_vop _kf_opfn._kf_vop -#define kf_xwrite _kf_opfn._kf_xwrite -#define kf_xread _kf_opfn._kf_xread - -typedef struct kern_target kernfs_parentdir_t; -typedef struct dyn_kern_target kernfs_entry_t; - -/* - * Functions for adding kernfs datatypes and nodes. - */ -kfstype kernfs_alloctype(int, const struct kernfs_fileop *); -#define KERNFS_ALLOCTYPE(kf) kernfs_alloctype(sizeof((kf)) / \ - sizeof((kf)[0]), (kf)) -#define KERNFS_ALLOCENTRY(dkt, m_type, m_flags) \ - dkt = (struct dyn_kern_target *)malloc( \ - sizeof(struct dyn_kern_target), (m_type), (m_flags)) -#define KERNFS_INITENTRY(dkt, type, name, data, tag, vtype, mode) do { \ - (dkt)->dkt_kt.kt_type = (type); \ - (dkt)->dkt_kt.kt_namlen = strlen((name)); \ - (dkt)->dkt_kt.kt_name = (name); \ - (dkt)->dkt_kt.kt_data = (data); \ - (dkt)->dkt_kt.kt_tag = (tag); \ - (dkt)->dkt_kt.kt_vtype = (vtype); \ - (dkt)->dkt_kt.kt_mode = (mode); \ -} while (/*CONSTCOND*/0) -#define KERNFS_ENTOPARENTDIR(dkt) &(dkt)->dkt_kt -int kernfs_addentry __P((kernfs_parentdir_t *, kernfs_entry_t *)); - -#ifdef SYSCTL_SETUP_PROTO -SYSCTL_SETUP_PROTO(sysctl_vfs_kernfs_setup); -#endif /* SYSCTL_SETUP_PROTO */ - -#endif /* _KERNEL */ diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c --- a/netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,1583 +0,0 @@ -/* $NetBSD: kernfs_vnops.c,v 1.98.2.3 2004/05/15 13:35:27 tron Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software donated to Berkeley by - * Jan-Simon Pendry. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 - */ - -/* - * Kernel parameter filesystem (/kern) - */ - -#include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kernfs_vnops.c,v 1.98.2.3 2004/05/15 13:35:27 tron Exp $"); - -#ifdef _KERNEL_OPT -#include "opt_ipsec.h" -#endif - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/vmmeter.h> -#include <sys/time.h> -#include <sys/proc.h> -#include <sys/vnode.h> -#include <sys/malloc.h> -#include <sys/file.h> -#include <sys/stat.h> -#include <sys/mount.h> -#include <sys/namei.h> -#include <sys/buf.h> -#include <sys/dirent.h> -#include <sys/msgbuf.h> - -#include <miscfs/genfs/genfs.h> -#include <miscfs/kernfs/kernfs.h> - -#ifdef IPSEC -#include <sys/mbuf.h> -#include <net/route.h> -#include <netinet/in.h> -#include <netinet6/ipsec.h> -#include <netkey/key.h> -#endif - -#include <uvm/uvm_extern.h> - -#define KSTRING 256 /* Largest I/O available via this filesystem */ -#define UIO_MX 32 - -#define READ_MODE (S_IRUSR|S_IRGRP|S_IROTH) -#define WRITE_MODE (S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH) -#define UREAD_MODE (S_IRUSR) -#define DIR_MODE (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) -#define UDIR_MODE (S_IRUSR|S_IXUSR) - -#define N(s) sizeof(s)-1, s -const struct kern_target kern_targets[] = { -/* NOTE: The name must be less than UIO_MX-16 chars in length */ - /* name data tag type ro/rw */ - { DT_DIR, N("."), 0, KFSkern, VDIR, DIR_MODE }, - { DT_DIR, N(".."), 0, KFSroot, VDIR, DIR_MODE }, - { DT_REG, N("boottime"), &boottime.tv_sec, KFSint, VREG, READ_MODE }, - /* XXX cast away const */ - { DT_REG, N("copyright"), (void *)copyright, - KFSstring, VREG, READ_MODE }, - { DT_REG, N("hostname"), 0, KFShostname, VREG, WRITE_MODE }, - { DT_REG, N("hz"), &hz, KFSint, VREG, READ_MODE }, -#ifdef IPSEC - { DT_DIR, N("ipsecsa"), 0, KFSipsecsadir, VDIR, UDIR_MODE }, - { DT_DIR, N("ipsecsp"), 0, KFSipsecspdir, VDIR, UDIR_MODE }, -#endif - { DT_REG, N("loadavg"), 0, KFSavenrun, VREG, READ_MODE }, - { DT_REG, N("msgbuf"), 0, KFSmsgbuf, VREG, READ_MODE }, - { DT_REG, N("pagesize"), &uvmexp.pagesize, KFSint, VREG, READ_MODE }, - { DT_REG, N("physmem"), &physmem, KFSint, VREG, READ_MODE }, -#if 0 - { DT_DIR, N("root"), 0, KFSnull, VDIR, DIR_MODE }, -#endif - { DT_BLK, N("rootdev"), &rootdev, KFSdevice, VBLK, READ_MODE }, - { DT_CHR, N("rrootdev"), &rrootdev, KFSdevice, VCHR, READ_MODE }, - { DT_REG, N("time"), 0, KFStime, VREG, READ_MODE }, - /* XXX cast away const */ - { DT_REG, N("version"), (void *)version, - KFSstring, VREG, READ_MODE }, -}; -const struct kern_target subdir_targets[] = { -/* NOTE: The name must be less than UIO_MX-16 chars in length */ - /* name data tag type ro/rw */ - { DT_DIR, N("."), 0, KFSsubdir, VDIR, DIR_MODE }, - { DT_DIR, N(".."), 0, KFSkern, VDIR, DIR_MODE }, -}; -#ifdef IPSEC -const struct kern_target ipsecsa_targets[] = { -/* NOTE: The name must be less than UIO_MX-16 chars in length */ - /* name data tag type ro/rw */ - { DT_DIR, N("."), 0, KFSipsecsadir, VDIR, DIR_MODE }, - { DT_DIR, N(".."), 0, KFSkern, VDIR, DIR_MODE }, -}; -const struct kern_target ipsecsp_targets[] = { -/* NOTE: The name must be less than UIO_MX-16 chars in length */ - /* name data tag type ro/rw */ - { DT_DIR, N("."), 0, KFSipsecspdir, VDIR, DIR_MODE }, - { DT_DIR, N(".."), 0, KFSkern, VDIR, DIR_MODE }, -}; -const struct kern_target ipsecsa_kt = - { DT_DIR, N(""), 0, KFSipsecsa, VREG, UREAD_MODE }; -const struct kern_target ipsecsp_kt = - { DT_DIR, N(""), 0, KFSipsecsp, VREG, UREAD_MODE }; -#endif -#undef N -SIMPLEQ_HEAD(,dyn_kern_target) dyn_kern_targets = - SIMPLEQ_HEAD_INITIALIZER(dyn_kern_targets); -int nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]); -const int static_nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]); -#ifdef IPSEC -int nipsecsa_targets = sizeof(ipsecsa_targets) / sizeof(ipsecsa_targets[0]); -int nipsecsp_targets = sizeof(ipsecsp_targets) / sizeof(ipsecsp_targets[0]); -int nkern_dirs = 4; /* 2 extra subdirs */ -#else -int nkern_dirs = 2; -#endif - -int kernfs_try_fileop(kfstype, kfsfileop, void *, int); -int kernfs_try_xread(kfstype, const struct kernfs_node *, int, char **, - size_t, size_t *, int); -int kernfs_try_xwrite(kfstype, const struct kernfs_node *, char *, - size_t, int); - -static int kernfs_default_xread(void *v); -static int kernfs_default_xwrite(void *v); -static int kernfs_default_fileop_getattr(void *); - -/* must include all fileop's */ -const struct kernfs_fileop kernfs_default_fileops[] = { - { .kf_fileop = KERNFS_XREAD }, - { .kf_fileop = KERNFS_XWRITE }, - { .kf_fileop = KERNFS_FILEOP_OPEN }, - { .kf_fileop = KERNFS_FILEOP_GETATTR, - .kf_genop = {kernfs_default_fileop_getattr} }, - { .kf_fileop = KERNFS_FILEOP_IOCTL }, - { .kf_fileop = KERNFS_FILEOP_MMAP }, - { .kf_fileop = KERNFS_FILEOP_CLOSE }, - { .kf_fileop = KERNFS_FILEOP_READ, .kf_genop = {kernfs_default_xread} }, - { .kf_fileop = KERNFS_FILEOP_WRITE, .kf_genop = {kernfs_default_xwrite} }, -}; - -int kernfs_lookup __P((void *)); -#define kernfs_create genfs_eopnotsupp -#define kernfs_mknod genfs_eopnotsupp -int kernfs_open __P((void *)); -int kernfs_close __P((void *)); -int kernfs_access __P((void *)); -int kernfs_getattr __P((void *)); -int kernfs_setattr __P((void *)); -int kernfs_read __P((void *)); -int kernfs_write __P((void *)); -#define kernfs_fcntl genfs_fcntl -int kernfs_ioctl __P((void *)); -#define kernfs_poll genfs_poll -#define kernfs_revoke genfs_revoke -int kernfs_mmap __P((void *)); -#define kernfs_fsync genfs_nullop -#define kernfs_seek genfs_nullop -#define kernfs_remove genfs_eopnotsupp -int kernfs_link __P((void *)); -#define kernfs_rename genfs_eopnotsupp -#define kernfs_mkdir genfs_eopnotsupp -#define kernfs_rmdir genfs_eopnotsupp -int kernfs_symlink __P((void *)); -int kernfs_readdir __P((void *)); -#define kernfs_readlink genfs_eopnotsupp -#define kernfs_abortop genfs_abortop -int kernfs_inactive __P((void *)); -int kernfs_reclaim __P((void *)); -#define kernfs_lock genfs_lock -#define kernfs_unlock genfs_unlock -#define kernfs_bmap genfs_badop -#define kernfs_strategy genfs_badop -int kernfs_print __P((void *)); -#define kernfs_islocked genfs_islocked -int kernfs_pathconf __P((void *)); -#define kernfs_advlock genfs_einval -#define kernfs_blkatoff genfs_eopnotsupp -#define kernfs_valloc genfs_eopnotsupp -#define kernfs_vfree genfs_nullop -#define kernfs_truncate genfs_eopnotsupp -#define kernfs_update genfs_nullop -#define kernfs_bwrite genfs_eopnotsupp -#define kernfs_putpages genfs_putpages - -static int kernfs_xread __P((struct kernfs_node *, int, char **, size_t, size_t *)); -static int kernfs_xwrite __P((const struct kernfs_node *, char *, size_t)); - -int (**kernfs_vnodeop_p) __P((void *)); -const struct vnodeopv_entry_desc kernfs_vnodeop_entries[] = { - { &vop_default_desc, vn_default_error }, - { &vop_lookup_desc, kernfs_lookup }, /* lookup */ - { &vop_create_desc, kernfs_create }, /* create */ - { &vop_mknod_desc, kernfs_mknod }, /* mknod */ - { &vop_open_desc, kernfs_open }, /* open */ - { &vop_close_desc, kernfs_close }, /* close */ - { &vop_access_desc, kernfs_access }, /* access */ - { &vop_getattr_desc, kernfs_getattr }, /* getattr */ - { &vop_setattr_desc, kernfs_setattr }, /* setattr */ - { &vop_read_desc, kernfs_read }, /* read */ - { &vop_write_desc, kernfs_write }, /* write */ - { &vop_fcntl_desc, kernfs_fcntl }, /* fcntl */ - { &vop_ioctl_desc, kernfs_ioctl }, /* ioctl */ - { &vop_poll_desc, kernfs_poll }, /* poll */ - { &vop_revoke_desc, kernfs_revoke }, /* revoke */ - { &vop_mmap_desc, kernfs_mmap }, /* mmap */ - { &vop_fsync_desc, kernfs_fsync }, /* fsync */ - { &vop_seek_desc, kernfs_seek }, /* seek */ - { &vop_remove_desc, kernfs_remove }, /* remove */ - { &vop_link_desc, kernfs_link }, /* link */ - { &vop_rename_desc, kernfs_rename }, /* rename */ - { &vop_mkdir_desc, kernfs_mkdir }, /* mkdir */ - { &vop_rmdir_desc, kernfs_rmdir }, /* rmdir */ - { &vop_symlink_desc, kernfs_symlink }, /* symlink */ - { &vop_readdir_desc, kernfs_readdir }, /* readdir */ - { &vop_readlink_desc, kernfs_readlink }, /* readlink */ - { &vop_abortop_desc, kernfs_abortop }, /* abortop */ - { &vop_inactive_desc, kernfs_inactive }, /* inactive */ - { &vop_reclaim_desc, kernfs_reclaim }, /* reclaim */ - { &vop_lock_desc, kernfs_lock }, /* lock */ - { &vop_unlock_desc, kernfs_unlock }, /* unlock */ - { &vop_bmap_desc, kernfs_bmap }, /* bmap */ - { &vop_strategy_desc, kernfs_strategy }, /* strategy */ - { &vop_print_desc, kernfs_print }, /* print */ - { &vop_islocked_desc, kernfs_islocked }, /* islocked */ - { &vop_pathconf_desc, kernfs_pathconf }, /* pathconf */ - { &vop_advlock_desc, kernfs_advlock }, /* advlock */ - { &vop_blkatoff_desc, kernfs_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, kernfs_valloc }, /* valloc */ - { &vop_vfree_desc, kernfs_vfree }, /* vfree */ - { &vop_truncate_desc, kernfs_truncate }, /* truncate */ - { &vop_update_desc, kernfs_update }, /* update */ - { &vop_bwrite_desc, kernfs_bwrite }, /* bwrite */ - { &vop_putpages_desc, kernfs_putpages }, /* putpages */ - { NULL, NULL } -}; -const struct vnodeopv_desc kernfs_vnodeop_opv_desc = - { &kernfs_vnodeop_p, kernfs_vnodeop_entries }; - -static __inline int -kernfs_fileop_compare(struct kernfs_fileop *a, struct kernfs_fileop *b) -{ - if (a->kf_type < b->kf_type) - return -1; - if (a->kf_type > b->kf_type) - return 1; - if (a->kf_fileop < b->kf_fileop) - return -1; - if (a->kf_fileop > b->kf_fileop) - return 1; - return (0); -} - -SPLAY_HEAD(kfsfileoptree, kernfs_fileop) kfsfileoptree = - SPLAY_INITIALIZER(kfsfileoptree); -SPLAY_PROTOTYPE(kfsfileoptree, kernfs_fileop, kf_node, kernfs_fileop_compare); -SPLAY_GENERATE(kfsfileoptree, kernfs_fileop, kf_node, kernfs_fileop_compare); - -kfstype -kernfs_alloctype(int nkf, const struct kernfs_fileop *kf) -{ - static u_char nextfreetype = KFSlasttype; - struct kernfs_fileop *dkf, *fkf, skf; - int i; - - /* XXX need to keep track of dkf's memory if we support - deallocating types */ - dkf = malloc(sizeof(kernfs_default_fileops), M_TEMP, M_WAITOK); - memcpy(dkf, kernfs_default_fileops, sizeof(kernfs_default_fileops)); - - for (i = 0; i < sizeof(kernfs_default_fileops) / - sizeof(kernfs_default_fileops[0]); i++) { - dkf[i].kf_type = nextfreetype; - SPLAY_INSERT(kfsfileoptree, &kfsfileoptree, &dkf[i]); - } - - for (i = 0; i < nkf; i++) { - skf.kf_type = nextfreetype; - skf.kf_fileop = kf[i].kf_fileop; - if ((fkf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf))) - fkf->kf_genop = kf[i].kf_genop; - } - - return nextfreetype++; -} - -int -kernfs_try_fileop(kfstype type, kfsfileop fileop, void *v, int error) -{ - const struct kernfs_fileop *kf; - struct kernfs_fileop skf; - - skf.kf_type = type; - skf.kf_fileop = fileop; - kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf); - if (kf == NULL) - kf = &kernfs_default_fileops[fileop]; - if (kf->kf_vop) - return kf->kf_vop(v); - return error; -} - -int -kernfs_try_xread(kfstype type, const struct kernfs_node *kfs, int off, - char **bufp, size_t len, size_t *wrlen, int error) -{ - const struct kernfs_fileop *kf; - struct kernfs_fileop skf; - - skf.kf_type = type; - skf.kf_fileop = KERNFS_XREAD; - kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf); - if (kf == NULL) - kf = &kernfs_default_fileops[KERNFS_XREAD]; - if (kf->kf_xread) - return kf->kf_xread(kfs, off, bufp, len, wrlen); - *wrlen = 0; - return error; -} - -int -kernfs_try_xwrite(kfstype type, const struct kernfs_node *kfs, char *buf, - size_t len, int error) -{ - const struct kernfs_fileop *kf; - struct kernfs_fileop skf; - - skf.kf_type = type; - skf.kf_fileop = KERNFS_XWRITE; - kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf); - if (kf == NULL) - kf = &kernfs_default_fileops[KERNFS_XWRITE]; - if (kf->kf_xwrite) - return kf->kf_xwrite(kfs, buf, len); - return error; -} - -int -kernfs_addentry(kernfs_parentdir_t *pkt, kernfs_entry_t *dkt) -{ - struct kernfs_subdir *ks, *parent; - - if (pkt == NULL) { - SIMPLEQ_INSERT_TAIL(&dyn_kern_targets, dkt, dkt_queue); - nkern_targets++; - if (dkt->dkt_kt.kt_vtype == VDIR) - nkern_dirs++; - } else { - parent = (struct kernfs_subdir *)pkt->kt_data; - SIMPLEQ_INSERT_TAIL(&parent->ks_entries, dkt, dkt_queue); - parent->ks_nentries++; - if (dkt->dkt_kt.kt_vtype == VDIR) - parent->ks_dirs++; - } - if (dkt->dkt_kt.kt_vtype == VDIR && dkt->dkt_kt.kt_data == NULL) { - ks = malloc(sizeof(struct kernfs_subdir), - M_TEMP, M_WAITOK); - SIMPLEQ_INIT(&ks->ks_entries); - ks->ks_nentries = 2; /* . and .. */ - ks->ks_dirs = 2; - ks->ks_parent = pkt ? pkt : &kern_targets[0]; - dkt->dkt_kt.kt_data = ks; - } - return 0; -} - -static int -kernfs_xread(kfs, off, bufp, len, wrlen) - struct kernfs_node *kfs; - int off; - char **bufp; - size_t len; - size_t *wrlen; -{ - const struct kern_target *kt; -#ifdef IPSEC - struct mbuf *m; -#endif - - kt = kfs->kfs_kt; - - switch (kfs->kfs_type) { - case KFStime: { - struct timeval tv; - - microtime(&tv); - snprintf(*bufp, len, "%ld %ld\n", tv.tv_sec, tv.tv_usec); - break; - } - - case KFSint: { - int *ip = kt->kt_data; - - snprintf(*bufp, len, "%d\n", *ip); - break; - } - - case KFSstring: { - char *cp = kt->kt_data; - - *bufp = cp; - break; - } - - case KFSmsgbuf: { - long n; - - /* - * deal with cases where the message buffer has - * become corrupted. - */ - if (!msgbufenabled || msgbufp->msg_magic != MSG_MAGIC) { - msgbufenabled = 0; - return (ENXIO); - } - - /* - * Note that reads of /kern/msgbuf won't necessarily yield - * consistent results, if the message buffer is modified - * while the read is in progress. The worst that can happen - * is that incorrect data will be read. There's no way - * that this can crash the system unless the values in the - * message buffer header are corrupted, but that'll cause - * the system to die anyway. - */ - if (off >= msgbufp->msg_bufs) { - *wrlen = 0; - return (0); - } - n = msgbufp->msg_bufx + off; - if (n >= msgbufp->msg_bufs) - n -= msgbufp->msg_bufs; - len = min(msgbufp->msg_bufs - n, msgbufp->msg_bufs - off); - *bufp = msgbufp->msg_bufc + n; - *wrlen = len; - return (0); - } - - case KFShostname: { - char *cp = hostname; - int xlen = hostnamelen; - - if (xlen >= (len - 2)) - return (EINVAL); - - memcpy(*bufp, cp, xlen); - (*bufp)[xlen] = '\n'; - (*bufp)[xlen+1] = '\0'; - len = strlen(*bufp); - break; - } - - case KFSavenrun: - averunnable.fscale = FSCALE; - snprintf(*bufp, len, "%d %d %d %ld\n", - averunnable.ldavg[0], averunnable.ldavg[1], - averunnable.ldavg[2], averunnable.fscale); - break; - -#ifdef IPSEC - case KFSipsecsa: - /* - * Note that SA configuration could be changed during the - * read operation, resulting in garbled output. - */ - m = key_setdumpsa_spi(htonl(kfs->kfs_value)); - if (!m) - return (ENOBUFS); - if (off >= m->m_pkthdr.len) { - *wrlen = 0; - m_freem(m); - return (0); - } - if (len > m->m_pkthdr.len - off) - len = m->m_pkthdr.len - off; - m_copydata(m, off, len, *bufp); - *wrlen = len; - m_freem(m); - return (0); - - case KFSipsecsp: - /* - * Note that SP configuration could be changed during the - * read operation, resulting in garbled output. - */ - if (!kfs->kfs_v) { - struct secpolicy *sp; - - sp = key_getspbyid(kfs->kfs_value); - if (sp) - kfs->kfs_v = sp; - else - return (ENOENT); - } - m = key_setdumpsp((struct secpolicy *)kfs->kfs_v, - SADB_X_SPDGET, 0, 0); - if (!m) - return (ENOBUFS); - if (off >= m->m_pkthdr.len) { - *wrlen = 0; - m_freem(m); - return (0); - } - if (len > m->m_pkthdr.len - off) - len = m->m_pkthdr.len - off; - m_copydata(m, off, len, *bufp); - *wrlen = len; - m_freem(m); - return (0); -#endif - - default: - return kernfs_try_xread(kfs->kfs_type, kfs, off, bufp, len, - wrlen, 0); - } - - len = strlen(*bufp); - if (len <= off) - *wrlen = 0; - else { - *bufp += off; - *wrlen = len - off; - } - return (0); -} - -static int -kernfs_xwrite(kfs, buf, len) - const struct kernfs_node *kfs; - char *buf; - size_t len; -{ - - switch (kfs->kfs_type) { - case KFShostname: - if (buf[len-1] == '\n') - --len; - memcpy(hostname, buf, len); - hostname[len] = '\0'; - hostnamelen = (size_t) len; - return (0); - - default: - return kernfs_try_xwrite(kfs->kfs_type, kfs, buf, len, EIO); - } -} - - -/* - * vp is the current namei directory - * ndp is the name to locate in that directory... - */ -int -kernfs_lookup(v) - void *v; -{ - struct vop_lookup_args /* { - struct vnode * a_dvp; - struct vnode ** a_vpp; - struct componentname * a_cnp; - } */ *ap = v; - struct componentname *cnp = ap->a_cnp; - struct vnode **vpp = ap->a_vpp; - struct vnode *dvp = ap->a_dvp; - const char *pname = cnp->cn_nameptr; - const struct kernfs_node *kfs; - const struct kern_target *kt; - const struct dyn_kern_target *dkt; - const struct kernfs_subdir *ks; - int error, i, wantpunlock; -#ifdef IPSEC - char *ep; - u_int32_t id; -#endif - - *vpp = NULLVP; - cnp->cn_flags &= ~PDIRUNLOCK; - - if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) - return (EROFS); - - if (cnp->cn_namelen == 1 && *pname == '.') { - *vpp = dvp; - VREF(dvp); - return (0); - } - - wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN)); - kfs = VTOKERN(dvp); - switch (kfs->kfs_type) { - case KFSkern: - /* - * Shouldn't get here with .. in the root node. - */ - if (cnp->cn_flags & ISDOTDOT) - return (EIO); - - for (i = 0; i < static_nkern_targets; i++) { - kt = &kern_targets[i]; - if (cnp->cn_namelen == kt->kt_namlen && - memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0) - goto found; - } - SIMPLEQ_FOREACH(dkt, &dyn_kern_targets, dkt_queue) { - if (cnp->cn_namelen == dkt->dkt_kt.kt_namlen && - memcmp(dkt->dkt_kt.kt_name, pname, cnp->cn_namelen) == 0) { - kt = &dkt->dkt_kt; - goto found; - } - } - break; - - found: - error = kernfs_allocvp(dvp->v_mount, vpp, kt->kt_tag, kt, 0); - if ((error == 0) && wantpunlock) { - VOP_UNLOCK(dvp, 0); - cnp->cn_flags |= PDIRUNLOCK; - } - return (error); - - case KFSsubdir: - ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data; - if (cnp->cn_flags & ISDOTDOT) { - kt = ks->ks_parent; - goto found; - } - - SIMPLEQ_FOREACH(dkt, &ks->ks_entries, dkt_queue) { - if (cnp->cn_namelen == dkt->dkt_kt.kt_namlen && - memcmp(dkt->dkt_kt.kt_name, pname, cnp->cn_namelen) == 0) { - kt = &dkt->dkt_kt; - goto found; - } - } - break; - -#ifdef IPSEC - case KFSipsecsadir: - if (cnp->cn_flags & ISDOTDOT) { - kt = &kern_targets[0]; - goto found; - } - - for (i = 2; i < nipsecsa_targets; i++) { - kt = &ipsecsa_targets[i]; - if (cnp->cn_namelen == kt->kt_namlen && - memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0) - goto found; - } - - ep = NULL; - id = strtoul(pname, &ep, 10); - if (!ep || *ep || ep == pname) - break; - - error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsa, &ipsecsa_kt, id); - if ((error == 0) && wantpunlock) { - VOP_UNLOCK(dvp, 0); - cnp->cn_flags |= PDIRUNLOCK; - } - return (error); - - case KFSipsecspdir: - if (cnp->cn_flags & ISDOTDOT) { - kt = &kern_targets[0]; - goto found; - } - - for (i = 2; i < nipsecsp_targets; i++) { - kt = &ipsecsp_targets[i]; - if (cnp->cn_namelen == kt->kt_namlen && - memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0) - goto found; - } - - ep = NULL; - id = strtoul(pname, &ep, 10); - if (!ep || *ep || ep == pname) - break; - - error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsp, &ipsecsp_kt, id); - if ((error == 0) && wantpunlock) { - VOP_UNLOCK(dvp, 0); - cnp->cn_flags |= PDIRUNLOCK; - } - return (error); -#endif - - default: - return (ENOTDIR); - } - - return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); -} - -int -kernfs_open(v) - void *v; -{ - struct vop_open_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap = v; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); -#ifdef IPSEC - struct mbuf *m; - struct secpolicy *sp; -#endif - - switch (kfs->kfs_type) { -#ifdef IPSEC - case KFSipsecsa: - m = key_setdumpsa_spi(htonl(kfs->kfs_value)); - if (m) { - m_freem(m); - return (0); - } else - return (ENOENT); - - case KFSipsecsp: - sp = key_getspbyid(kfs->kfs_value); - if (sp) { - kfs->kfs_v = sp; - return (0); - } else - return (ENOENT); -#endif - - default: - return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_OPEN, - v, 0); - } -} - -int -kernfs_close(v) - void *v; -{ - struct vop_close_args /* { - struct vnode *a_vp; - int a_fflag; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap = v; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); - - switch (kfs->kfs_type) { -#ifdef IPSEC - case KFSipsecsp: - key_freesp((struct secpolicy *)kfs->kfs_v); - break; -#endif - - default: - return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_CLOSE, - v, 0); - } - - return (0); -} - -int -kernfs_access(v) - void *v; -{ - struct vop_access_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap = v; - struct vattr va; - int error; - - if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0) - return (error); - - return (vaccess(va.va_type, va.va_mode, va.va_uid, va.va_gid, - ap->a_mode, ap->a_cred)); -} - -static int -kernfs_default_fileop_getattr(v) - void *v; -{ - struct vop_getattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap = v; - struct vattr *vap = ap->a_vap; - - vap->va_nlink = 1; - vap->va_bytes = vap->va_size = 0; - - return 0; -} - -int -kernfs_getattr(v) - void *v; -{ - struct vop_getattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap = v; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); - struct kernfs_subdir *ks; - struct vattr *vap = ap->a_vap; - int error = 0; - char strbuf[KSTRING], *buf; - size_t nread, total; - - VATTR_NULL(vap); - vap->va_type = ap->a_vp->v_type; - vap->va_uid = 0; - vap->va_gid = 0; - vap->va_mode = kfs->kfs_mode; - vap->va_fileid = kfs->kfs_fileno; - vap->va_flags = 0; - vap->va_size = 0; - vap->va_blocksize = DEV_BSIZE; - /* - * Make all times be current TOD, except for the "boottime" node. - * Avoid microtime(9), it's slow. - * We don't guard the read from time(9) with splclock(9) since we - * don't actually need to be THAT sure the access is atomic. - */ - if (kfs->kfs_kt && kfs->kfs_kt->kt_namlen == 8 && - !memcmp(kfs->kfs_kt->kt_name, "boottime", 8)) { - TIMEVAL_TO_TIMESPEC(&boottime, &vap->va_ctime); - } else { - TIMEVAL_TO_TIMESPEC(&time, &vap->va_ctime); - } - vap->va_atime = vap->va_mtime = vap->va_ctime; - vap->va_gen = 0; - vap->va_flags = 0; - vap->va_rdev = 0; - vap->va_bytes = 0; - - switch (kfs->kfs_type) { - case KFSkern: - vap->va_nlink = nkern_dirs; - vap->va_bytes = vap->va_size = DEV_BSIZE; - break; - - case KFSroot: - vap->va_nlink = 1; - vap->va_bytes = vap->va_size = DEV_BSIZE; - break; - - case KFSsubdir: - ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data; - vap->va_nlink = ks->ks_dirs; - vap->va_bytes = vap->va_size = DEV_BSIZE; - break; - - case KFSnull: - case KFStime: - case KFSint: - case KFSstring: - case KFShostname: - case KFSavenrun: - case KFSdevice: - case KFSmsgbuf: -#ifdef IPSEC - case KFSipsecsa: - case KFSipsecsp: -#endif - vap->va_nlink = 1; - total = 0; - do { - buf = strbuf; - error = kernfs_xread(kfs, total, &buf, - sizeof(strbuf), &nread); - total += nread; - } while (error == 0 && nread != 0); - vap->va_bytes = vap->va_size = total; - break; - -#ifdef IPSEC - case KFSipsecsadir: - case KFSipsecspdir: - vap->va_nlink = 2; - vap->va_bytes = vap->va_size = DEV_BSIZE; - break; -#endif - - default: - error = kernfs_try_fileop(kfs->kfs_type, - KERNFS_FILEOP_GETATTR, v, EINVAL); - break; - } - - return (error); -} - -/*ARGSUSED*/ -int -kernfs_setattr(v) - void *v; -{ - - /* - * Silently ignore attribute changes. - * This allows for open with truncate to have no - * effect until some data is written. I want to - * do it this way because all writes are atomic. - */ - return (0); -} - -static int -kernfs_default_xread(v) - void *v; -{ - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap = v; - struct uio *uio = ap->a_uio; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); - char strbuf[KSTRING], *buf; - off_t off; - size_t len; - int error; - - if (ap->a_vp->v_type == VDIR) - return (EOPNOTSUPP); - - off = uio->uio_offset; - buf = strbuf; - if ((error = kernfs_xread(kfs, off, &buf, sizeof(strbuf), &len)) == 0) - error = uiomove(buf, len, uio); - return (error); -} - -int -kernfs_read(v) - void *v; -{ - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap = v; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); - - return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_READ, v, 0); -} - -static int -kernfs_default_xwrite(v) - void *v; -{ - struct vop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap = v; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); - struct uio *uio = ap->a_uio; - int error, xlen; - char strbuf[KSTRING]; - - if (uio->uio_offset != 0) - return (EINVAL); - - xlen = min(uio->uio_resid, KSTRING-1); - if ((error = uiomove(strbuf, xlen, uio)) != 0) - return (error); - - if (uio->uio_resid != 0) - return (EIO); - - strbuf[xlen] = '\0'; - xlen = strlen(strbuf); - return (kernfs_xwrite(kfs, strbuf, xlen)); -} - -int -kernfs_write(v) - void *v; -{ - struct vop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap = v; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); - - return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_WRITE, v, 0); -} - -int -kernfs_ioctl(v) - void *v; -{ - struct vop_ioctl_args /* { - const struct vnodeop_desc *a_desc; - struct vnode *a_vp; - u_long a_command; - void *a_data; - int a_fflag; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap = v; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); - - return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_IOCTL, v, - EPASSTHROUGH); -} - -int -kernfs_mmap(v) - void *v; -{ - struct vop_mmap_args /* { - const struct vnodeop_desc *a_desc; - struct vnode *a_vp; - int a_fflags; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap = v; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); - - return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_MMAP, v, 0); -} - -static int -kernfs_setdirentfileno_kt(struct dirent *d, const struct kern_target *kt, - u_int32_t value, struct vop_readdir_args *ap) -{ - struct kernfs_node *kfs; - struct vnode *vp; - int error; - - if ((error = kernfs_allocvp(ap->a_vp->v_mount, &vp, kt->kt_tag, kt, - value)) != 0) - return error; - if (kt->kt_tag == KFSdevice) { - struct vattr va; - if ((error = VOP_GETATTR(vp, &va, ap->a_cred, - ap->a_uio->uio_segflg == UIO_USERSPACE ? - ap->a_uio->uio_procp : &proc0)) != 0) - return (error); - d->d_fileno = va.va_fileid; - } else { - kfs = VTOKERN(vp); - d->d_fileno = kfs->kfs_fileno; - } - vput(vp); - return 0; -} - -static int -kernfs_setdirentfileno(struct dirent *d, off_t entry, - struct kernfs_node *thisdir_kfs, const struct kern_target *parent_kt, - const struct kern_target *kt, struct vop_readdir_args *ap) -{ - const struct kern_target *ikt; - int error; - - switch (entry) { - case 0: - d->d_fileno = thisdir_kfs->kfs_fileno; - return 0; - case 1: - ikt = parent_kt; - break; - default: - ikt = kt; - break; - } - if (ikt != thisdir_kfs->kfs_kt) { - if ((error = kernfs_setdirentfileno_kt(d, ikt, 0, ap)) != 0) - return error; - } else - d->d_fileno = thisdir_kfs->kfs_fileno; - return 0; -} - -int -kernfs_readdir(v) - void *v; -{ - struct vop_readdir_args /* { - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; - int *a_eofflag; - off_t **a_cookies; - int a_*ncookies; - } */ *ap = v; - struct uio *uio = ap->a_uio; - struct dirent d; - struct kernfs_node *kfs = VTOKERN(ap->a_vp); - const struct kern_target *kt; - const struct dyn_kern_target *dkt = NULL; - const struct kernfs_subdir *ks; - off_t i, j; - int error; - off_t *cookies = NULL; - int ncookies = 0, n; -#ifdef IPSEC - struct secasvar *sav, *sav2; - struct secpolicy *sp; -#endif - - if (uio->uio_resid < UIO_MX) - return (EINVAL); - if (uio->uio_offset < 0) - return (EINVAL); - - error = 0; - i = uio->uio_offset; - memset(&d, 0, sizeof(d)); - d.d_reclen = UIO_MX; - ncookies = uio->uio_resid / UIO_MX; - - switch (kfs->kfs_type) { - case KFSkern: - if (i >= nkern_targets) - return (0); - - if (ap->a_ncookies) { - ncookies = min(ncookies, (nkern_targets - i)); - cookies = malloc(ncookies * sizeof(off_t), M_TEMP, - M_WAITOK); - *ap->a_cookies = cookies; - } - - n = 0; - for (; i < nkern_targets && uio->uio_resid >= UIO_MX; i++) { - if (i < static_nkern_targets) - kt = &kern_targets[i]; - else { - if (dkt == NULL) { - dkt = SIMPLEQ_FIRST(&dyn_kern_targets); - for (j = static_nkern_targets; j < i && - dkt != NULL; j++) - dkt = SIMPLEQ_NEXT(dkt, dkt_queue); - if (j != i) - break; - } else { - dkt = SIMPLEQ_NEXT(dkt, dkt_queue); - if (dkt == NULL) - break; - } - kt = &dkt->dkt_kt; - } - if (kt->kt_tag == KFSdevice) { - dev_t *dp = kt->kt_data; - struct vnode *fvp; - - if (*dp == NODEV || - !vfinddev(*dp, kt->kt_vtype, &fvp)) - continue; - } - d.d_namlen = kt->kt_namlen; - if ((error = kernfs_setdirentfileno(&d, i, kfs, - &kern_targets[0], kt, ap)) != 0) - break; - memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1); - d.d_type = kt->kt_type; - if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) - break; - if (cookies) - *cookies++ = i + 1; - n++; - } - ncookies = n; - break; - - case KFSroot: - if (i >= 2) - return 0; - - if (ap->a_ncookies) { - ncookies = min(ncookies, (2 - i)); - cookies = malloc(ncookies * sizeof(off_t), M_TEMP, - M_WAITOK); - *ap->a_cookies = cookies; - } - - n = 0; - for (; i < 2 && uio->uio_resid >= UIO_MX; i++) { - kt = &kern_targets[i]; - d.d_namlen = kt->kt_namlen; - d.d_fileno = KERNFS_FILENO(kt, kt->kt_tag, 0); - memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1); - d.d_type = kt->kt_type; - if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) - break; - if (cookies) - *cookies++ = i + 1; - n++; - } - ncookies = n; - break; - - case KFSsubdir: - ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data; - if (i >= ks->ks_nentries) - return (0); - - if (ap->a_ncookies) { - ncookies = min(ncookies, (ks->ks_nentries - i)); - cookies = malloc(ncookies * sizeof(off_t), M_TEMP, - M_WAITOK); - *ap->a_cookies = cookies; - } - - dkt = SIMPLEQ_FIRST(&ks->ks_entries); - for (j = 0; j < i && dkt != NULL; j++) - dkt = SIMPLEQ_NEXT(dkt, dkt_queue); - n = 0; - for (; i < ks->ks_nentries && uio->uio_resid >= UIO_MX; i++) { - if (i < 2) - kt = &subdir_targets[i]; - else { - /* check if ks_nentries lied to us */ - if (dkt == NULL) - break; - kt = &dkt->dkt_kt; - dkt = SIMPLEQ_NEXT(dkt, dkt_queue); - } - if (kt->kt_tag == KFSdevice) { - dev_t *dp = kt->kt_data; - struct vnode *fvp; - - if (*dp == NODEV || - !vfinddev(*dp, kt->kt_vtype, &fvp)) - continue; - } - d.d_namlen = kt->kt_namlen; - if ((error = kernfs_setdirentfileno(&d, i, kfs, - ks->ks_parent, kt, ap)) != 0) - break; - memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1); - d.d_type = kt->kt_type; - if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) - break; - if (cookies) - *cookies++ = i + 1; - n++; - } - ncookies = n; - break; - -#ifdef IPSEC - case KFSipsecsadir: - /* count SA in the system */ - n = 0; - TAILQ_FOREACH(sav, &satailq, tailq) { - for (sav2 = TAILQ_FIRST(&satailq); - sav2 != sav; - sav2 = TAILQ_NEXT(sav2, tailq)) { - if (sav->spi == sav2->spi) { - /* multiple SA with same SPI */ - break; - } - } - if (sav == sav2 || sav->spi != sav2->spi) - n++; - } - - if (i >= nipsecsa_targets + n) - return (0); - - if (ap->a_ncookies) { - ncookies = min(ncookies, (n - i)); - cookies = malloc(ncookies * sizeof(off_t), M_TEMP, - M_WAITOK); - *ap->a_cookies = cookies; - } - - n = 0; - for (; i < nipsecsa_targets && uio->uio_resid >= UIO_MX; i++) { - kt = &ipsecsa_targets[i]; - d.d_namlen = kt->kt_namlen; - if ((error = kernfs_setdirentfileno(&d, i, kfs, - &kern_targets[0], kt, ap)) != 0) - break; - memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1); - d.d_type = kt->kt_type; - if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) - break; - if (cookies) - *cookies++ = i + 1; - n++; - } - if (error) { - ncookies = n; - break; - } - - TAILQ_FOREACH(sav, &satailq, tailq) { - for (sav2 = TAILQ_FIRST(&satailq); - sav2 != sav; - sav2 = TAILQ_NEXT(sav2, tailq)) { - if (sav->spi == sav2->spi) { - /* multiple SA with same SPI */ - break; - } - } - if (sav != sav2 && sav->spi == sav2->spi) - continue; - if (uio->uio_resid < UIO_MX) - break; - if ((error = kernfs_setdirentfileno_kt(&d, &ipsecsa_kt, - sav->spi, ap)) != 0) - break; - d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), - "%u", ntohl(sav->spi)); - d.d_type = DT_REG; - if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) - break; - if (cookies) - *cookies++ = i + 1; - n++; - i++; - } - ncookies = n; - break; - - case KFSipsecspdir: - /* count SP in the system */ - n = 0; - TAILQ_FOREACH(sp, &sptailq, tailq) - n++; - - if (i >= nipsecsp_targets + n) - return (0); - - if (ap->a_ncookies) { - ncookies = min(ncookies, (n - i)); - cookies = malloc(ncookies * sizeof(off_t), M_TEMP, - M_WAITOK); - *ap->a_cookies = cookies; - } - - n = 0; - for (; i < nipsecsp_targets && uio->uio_resid >= UIO_MX; i++) { - kt = &ipsecsp_targets[i]; - d.d_namlen = kt->kt_namlen; - if ((error = kernfs_setdirentfileno(&d, i, kfs, - &kern_targets[0], kt, ap)) != 0) - break; - memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1); - d.d_type = kt->kt_type; - if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) - break; - if (cookies) - *cookies++ = i + 1; - n++; - } - if (error) { - ncookies = n; - break; - } - - TAILQ_FOREACH(sp, &sptailq, tailq) { - if (uio->uio_resid < UIO_MX) - break; - if ((error = kernfs_setdirentfileno_kt(&d, &ipsecsp_kt, - sp->id, ap)) != 0) - break; - d.d_namlen = snprintf(d.d_name, sizeof(d.d_name), - "%u", sp->id); - d.d_type = DT_REG; - if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) - break; - if (cookies) - *cookies++ = i + 1; - n++; - i++; - } - ncookies = n; - break; -#endif - - default: - error = ENOTDIR; - break; - } - - if (ap->a_ncookies) { - if (error) { - if (cookies) - free(*ap->a_cookies, M_TEMP); - *ap->a_ncookies = 0; - *ap->a_cookies = NULL; - } else - *ap->a_ncookies = ncookies; - } - - uio->uio_offset = i; - return (error); -} - -int -kernfs_inactive(v) - void *v; -{ - struct vop_inactive_args /* { - struct vnode *a_vp; - struct proc *a_p; - } */ *ap = v; - struct vnode *vp = ap->a_vp; - const struct kernfs_node *kfs = VTOKERN(ap->a_vp); -#ifdef IPSEC - struct mbuf *m; - struct secpolicy *sp; -#endif - - VOP_UNLOCK(vp, 0); - switch (kfs->kfs_type) { -#ifdef IPSEC - case KFSipsecsa: - m = key_setdumpsa_spi(htonl(kfs->kfs_value)); - if (m) - m_freem(m); - else - vgone(vp); - break; - case KFSipsecsp: - sp = key_getspbyid(kfs->kfs_value); - if (sp) - key_freesp(sp); - else { - /* should never happen as we hold a refcnt */ - vgone(vp); - } - break; -#endif - default: - break; - } - return (0); -} - -int -kernfs_reclaim(v) - void *v; -{ - struct vop_reclaim_args /* { - struct vnode *a_vp; - } */ *ap = v; - - return (kernfs_freevp(ap->a_vp)); -} - -/* - * Return POSIX pathconf information applicable to special devices. - */ -int -kernfs_pathconf(v) - void *v; -{ - struct vop_pathconf_args /* { - struct vnode *a_vp; - int a_name; - register_t *a_retval; - } */ *ap = v; - - switch (ap->a_name) { - case _PC_LINK_MAX: - *ap->a_retval = LINK_MAX; - return (0); - case _PC_MAX_CANON: - *ap->a_retval = MAX_CANON; - return (0); - case _PC_MAX_INPUT: - *ap->a_retval = MAX_INPUT; - return (0); - case _PC_PIPE_BUF: - *ap->a_retval = PIPE_BUF; - return (0); - case _PC_CHOWN_RESTRICTED: - *ap->a_retval = 1; - return (0); - case _PC_VDISABLE: - *ap->a_retval = _POSIX_VDISABLE; - return (0); - case _PC_SYNC_IO: - *ap->a_retval = 1; - return (0); - default: - return (EINVAL); - } - /* NOTREACHED */ -} - -/* - * Print out the contents of a /dev/fd vnode. - */ -/* ARGSUSED */ -int -kernfs_print(v) - void *v; -{ - - printf("tag VT_KERNFS, kernfs vnode\n"); - return (0); -} - -int -kernfs_link(v) - void *v; -{ - struct vop_link_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - } */ *ap = v; - - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); - return (EROFS); -} - -int -kernfs_symlink(v) - void *v; -{ - struct vop_symlink_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - char *a_target; - } */ *ap = v; - - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); - return (EROFS); -} diff -r 64cd054aa143 -r 0255f48b757f netbsd-2.0-xen-sparse/sys/nfs/files.nfs --- a/netbsd-2.0-xen-sparse/sys/nfs/files.nfs Sun Dec 4 17:24:24 2005 +++ /dev/null Sun Dec 4 19:12:00 2005 @@ -1,34 +0,0 @@ -# $NetBSD: files.nfs,v 1.3 2004/03/11 21:48:43 cl Exp $ - -deffs fs_nfs.h NFS - -defflag opt_nfs_boot.h NFS_BOOT_BOOTP NFS_BOOT_BOOTPARAM NFS_BOOT_DHCP - NFS_BOOT_GATEWAY NFS_BOOT_TCP - NFS_BOOT_BOOTSTATIC - -defparam opt_nfs_boot.h NFS_BOOT_BOOTP_REQFILE NFS_BOOT_OPTIONS - NFS_BOOT_RWSIZE - NFS_BOOTSTATIC_MYIP NFS_BOOTSTATIC_GWIP - NFS_BOOTSTATIC_MASK NFS_BOOTSTATIC_SERVADDR - NFS_BOOTSTATIC_SERVER - -defflag opt_nfs.h NFS_V2_ONLY - -defflag NFSSERVER - -file nfs/krpc_subr.c nfs -file nfs/nfs_bio.c nfs -file nfs/nfs_boot.c nfs -file nfs/nfs_bootdhcp.c nfs & (nfs_boot_bootp | nfs_boot_dhcp) -file nfs/nfs_bootparam.c nfs & nfs_boot_bootparam -file nfs/nfs_bootstatic.c nfs & nfs_boot_bootstatic -file nfs/nfs_kq.c nfs -file nfs/nfs_node.c nfs -file nfs/nfs_nqlease.c nfsserver | nfs -file nfs/nfs_serv.c nfsserver -file nfs/nfs_socket.c nfsserver | nfs -file nfs/nfs_srvcache.c nfsserver -file nfs/nfs_subs.c nfsserver | nfs -file nfs/nfs_syscalls.c nfsserver | nfs -file nfs/nfs_vfsops.c nfs -file nfs/nfs_vnops.c nfs _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |