[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Merge with xen-unstable.hg.
# HG changeset patch # User Hollis Blanchard <hollisb@xxxxxxxxxx> # Node ID 2234703167568d71dc67c7ea77c93111b061cfd3 # Parent ee4397571e446dae11899b4cf16668470ef96f84 # Parent 7826e5482d421cf3bfd2a3e54800f7a21ca2a9f9 Merge with xen-unstable.hg. Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx> --- extras/mini-os/include/x86/spinlock.h | 121 patches/linux-2.6.16.13/blktap-aio-16_03_06.patch | 274 patches/linux-2.6.16.13/device_bind.patch | 14 patches/linux-2.6.16.13/fix-hz-suspend.patch | 25 patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch | 13 patches/linux-2.6.16.13/i386-mach-io-check-nmi.patch | 35 patches/linux-2.6.16.13/ipv6-no-autoconf.patch | 22 patches/linux-2.6.16.13/net-csum.patch | 57 patches/linux-2.6.16.13/net-gso-0-base.patch | 2510 -- patches/linux-2.6.16.13/net-gso-1-check-dodgy.patch | 22 patches/linux-2.6.16.13/net-gso-2-checksum-fix.patch | 400 patches/linux-2.6.16.13/net-gso-3-fix-errorcheck.patch | 13 patches/linux-2.6.16.13/net-gso-4-kill-warnon.patch | 18 patches/linux-2.6.16.13/pmd-shared.patch | 100 patches/linux-2.6.16.13/rcu_needs_cpu.patch | 31 patches/linux-2.6.16.13/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch | 26 patches/linux-2.6.16.13/series | 23 patches/linux-2.6.16.13/smp-alts.patch | 540 patches/linux-2.6.16.13/tpm_plugin_2.6.17.patch | 1381 - patches/linux-2.6.16.13/x86-elfnote-as-preprocessor-macro.patch | 28 patches/linux-2.6.16.13/x86-increase-interrupt-vector-range.patch | 73 patches/linux-2.6.16.13/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 168 patches/linux-2.6.16.13/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 71 patches/linux-2.6.16.13/xen-hotplug.patch | 9 patches/linux-2.6.16.13/xenoprof-generic.patch | 568 tools/check/check_hotplug | 16 tools/debugger/pdb/Domain.ml | 61 tools/debugger/pdb/Domain.mli | 39 tools/debugger/pdb/Intel.ml | 66 tools/debugger/pdb/Makefile | 57 tools/debugger/pdb/OCamlMakefile | 1149 - tools/debugger/pdb/PDB.ml | 342 tools/debugger/pdb/Process.ml | 79 tools/debugger/pdb/Process.mli | 41 tools/debugger/pdb/Util.ml | 165 tools/debugger/pdb/Xen_domain.ml | 43 tools/debugger/pdb/Xen_domain.mli | 25 tools/debugger/pdb/debugger.ml | 372 tools/debugger/pdb/evtchn.ml | 40 tools/debugger/pdb/evtchn.mli | 19 tools/debugger/pdb/linux-2.6-module/Makefile | 21 tools/debugger/pdb/linux-2.6-module/debug.c | 851 tools/debugger/pdb/linux-2.6-module/module.c | 337 tools/debugger/pdb/linux-2.6-module/pdb_debug.h | 47 tools/debugger/pdb/linux-2.6-module/pdb_module.h | 142 tools/debugger/pdb/linux-2.6-patches/Makefile | 11 tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch | 18 tools/debugger/pdb/linux-2.6-patches/kdebug.patch | 10 tools/debugger/pdb/linux-2.6-patches/makefile.patch | 10 tools/debugger/pdb/linux-2.6-patches/ptrace.patch | 10 tools/debugger/pdb/linux-2.6-patches/traps.patch | 19 tools/debugger/pdb/pdb_caml_domain.c | 527 tools/debugger/pdb/pdb_caml_evtchn.c | 186 tools/debugger/pdb/pdb_caml_process.c | 587 tools/debugger/pdb/pdb_caml_xc.c | 170 tools/debugger/pdb/pdb_caml_xcs.c | 307 tools/debugger/pdb/pdb_caml_xen.h | 39 tools/debugger/pdb/pdb_xen.c | 75 tools/debugger/pdb/readme | 96 tools/debugger/pdb/server.ml | 241 tools/debugger/pdb/xcs.ml | 85 tools/debugger/pdb/xcs.mli | 13 tools/firmware/acpi/Makefile | 68 tools/firmware/acpi/README | 22 tools/firmware/acpi/acpi2_0.h | 331 tools/firmware/acpi/acpi_build.c | 232 tools/firmware/acpi/acpi_dsdt.asl | 521 tools/firmware/acpi/acpi_dsdt.c | 296 tools/firmware/acpi/acpi_facs.c | 72 tools/firmware/acpi/acpi_facs.h | 32 tools/firmware/acpi/acpi_fadt.c | 193 tools/firmware/acpi/acpi_fadt.h | 165 tools/firmware/acpi/acpi_gen.c | 53 tools/firmware/acpi/acpi_madt.c | 68 tools/firmware/acpi/acpi_madt.h | 44 tools/firmware/acpi/acpi_rsdt.c | 68 tools/firmware/hvmloader/acpi_madt.c | 188 tools/misc/mbootpack/GPL | 340 tools/misc/mbootpack/Makefile | 74 tools/misc/mbootpack/README | 75 tools/misc/mbootpack/bin2c.c | 356 tools/misc/mbootpack/bootsect.S | 136 tools/misc/mbootpack/buildimage.c | 175 tools/misc/mbootpack/mb_header.h | 90 tools/misc/mbootpack/mb_info.h | 217 tools/misc/mbootpack/mbootpack.c | 706 tools/misc/mbootpack/mbootpack.h | 109 tools/misc/mbootpack/setup.S | 1064 - tools/pygrub/src/fsys/__init__.py | 64 tools/pygrub/src/fsys/ext2/__init__.py | 38 tools/pygrub/src/fsys/ext2/ext2module.c | 387 tools/pygrub/src/fsys/ext2/test.py | 15 tools/pygrub/src/fsys/reiser/__init__.py | 39 tools/pygrub/src/fsys/reiser/reisermodule.c | 345 tools/python/xen/xm/sysrq.py | 32 tools/security/example.txt | 376 tools/security/install.txt | 87 tools/xm-test/ramdisk/configs/buildroot | 330 xen/arch/ia64/vmx/mm.c | 153 xen/arch/x86/hvm/i8259.c | 620 xen/arch/x86/hvm/svm/instrlen.c | 479 xen/include/asm-ia64/linux/asm/acpi.h | 121 xen/include/asm-ia64/linux/asm/atomic.h | 183 xen/include/asm-ia64/linux/asm/numa.h | 74 xen/include/asm-ia64/vmx_uaccess.h | 156 xen/include/asm-x86/hvm/vpit.h | 103 .hgignore | 19 .hgtags | 1 Config.mk | 56 Makefile | 5 buildconfigs/Rules.mk | 3 buildconfigs/linux-defconfig_xen0_ia64 | 16 buildconfigs/linux-defconfig_xenU_ia64 | 9 buildconfigs/linux-defconfig_xen_ia64 | 16 buildconfigs/linux-defconfig_xen_x86_32 | 1 buildconfigs/linux-defconfig_xen_x86_64 | 1 buildconfigs/mk.linux-2.6-xen | 2 config/Linux.mk | 8 config/OpenBSD.mk | 1 config/StdGNU.mk | 30 config/SunOS.mk | 35 config/ia64.mk | 2 config/powerpc64.mk | 1 config/x86_32.mk | 10 config/x86_64.mk | 12 docs/Makefile | 5 docs/man/xm.pod.1 | 8 docs/src/interface.tex | 42 docs/src/user.tex | 28 docs/xen-api/Makefile | 23 docs/xen-api/coversheet.tex | 50 docs/xen-api/fdl.tex | 488 docs/xen-api/presentation.tex | 149 docs/xen-api/todo.tex | 140 docs/xen-api/vm-lifecycle.tex | 24 docs/xen-api/vm_lifecycle.dot | 15 docs/xen-api/wire-protocol.tex | 287 docs/xen-api/xen.eps | 49 docs/xen-api/xenapi-coversheet.tex | 40 docs/xen-api/xenapi-datamodel-graph.dot | 17 docs/xen-api/xenapi-datamodel.tex | 9648 ++++++++++ docs/xen-api/xenapi.tex | 56 extras/mini-os/Makefile | 13 extras/mini-os/README | 4 extras/mini-os/arch/x86/mm.c | 428 extras/mini-os/arch/x86/sched.c | 150 extras/mini-os/arch/x86/setup.c | 108 extras/mini-os/events.c | 48 extras/mini-os/include/events.h | 9 extras/mini-os/include/mm.h | 175 extras/mini-os/include/sched.h | 29 extras/mini-os/include/spinlock.h | 55 extras/mini-os/include/time.h | 6 extras/mini-os/include/x86/arch_mm.h | 209 extras/mini-os/include/x86/arch_sched.h | 58 extras/mini-os/include/x86/arch_spinlock.h | 93 extras/mini-os/include/x86/os.h | 7 extras/mini-os/include/x86/x86_32/hypercall-x86_32.h | 8 extras/mini-os/include/x86/x86_64/hypercall-x86_64.h | 8 extras/mini-os/kernel.c | 99 extras/mini-os/mm.c | 383 extras/mini-os/sched.c | 226 extras/mini-os/time.c | 26 linux-2.6-xen-sparse/arch/i386/Kconfig | 3 linux-2.6-xen-sparse/arch/i386/kernel/fixup.c | 3 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S | 3 linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c | 38 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 46 linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c | 2 linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c | 1 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c | 8 linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c | 16 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c | 10 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c | 10 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c | 27 linux-2.6-xen-sparse/arch/i386/oprofile/Makefile | 5 linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c | 567 linux-2.6-xen-sparse/arch/ia64/Kconfig | 17 linux-2.6-xen-sparse/arch/ia64/dig/setup.c | 23 linux-2.6-xen-sparse/arch/ia64/kernel/Makefile | 62 linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S | 1 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c | 4 linux-2.6-xen-sparse/arch/ia64/xen/Makefile | 3 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c | 375 linux-2.6-xen-sparse/arch/ia64/xen/util.c | 2 linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c | 303 linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c | 319 linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c | 656 linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c | 263 linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S | 21 linux-2.6-xen-sparse/arch/x86_64/Kconfig | 2 linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile | 1 linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S | 7 linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c | 1 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 49 linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c | 5 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 41 linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile | 5 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c | 12 linux-2.6-xen-sparse/drivers/char/tty_io.c | 14 linux-2.6-xen-sparse/drivers/serial/Kconfig | 1 linux-2.6-xen-sparse/drivers/xen/balloon/Makefile | 2 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 233 linux-2.6-xen-sparse/drivers/xen/balloon/common.h | 58 linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c | 165 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c | 119 linux-2.6-xen-sparse/drivers/xen/blkback/common.h | 7 linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c | 5 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 23 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 74 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h | 2 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c | 85 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 875 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 7 linux-2.6-xen-sparse/drivers/xen/char/mem.c | 57 linux-2.6-xen-sparse/drivers/xen/console/console.c | 36 linux-2.6-xen-sparse/drivers/xen/core/Makefile | 2 linux-2.6-xen-sparse/drivers/xen/core/features.c | 4 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c | 4 linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c | 185 linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 212 linux-2.6-xen-sparse/drivers/xen/core/skbuff.c | 7 linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c | 3 linux-2.6-xen-sparse/drivers/xen/netback/common.h | 7 linux-2.6-xen-sparse/drivers/xen/netback/interface.c | 96 linux-2.6-xen-sparse/drivers/xen/netback/loopback.c | 64 linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 218 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 127 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 259 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 205 linux-2.6-xen-sparse/drivers/xen/tpmback/common.h | 8 linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c | 19 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c | 14 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c | 8 linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile | 1 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c | 9 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c | 11 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c | 5 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 299 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h | 74 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c | 271 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c | 11 linux-2.6-xen-sparse/drivers/xen/xenoprof/xenoprofile.c | 500 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h | 3 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h | 10 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h | 13 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h | 3 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h | 60 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h | 21 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h | 1 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h | 1 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h | 4 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/xenoprof.h | 48 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h | 21 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h | 277 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h | 28 linux-2.6-xen-sparse/include/asm-ia64/maddr.h | 20 linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h | 3 linux-2.6-xen-sparse/include/asm-ia64/xen/xcom_hcall.h | 76 linux-2.6-xen-sparse/include/asm-ia64/xen/xencomm.h | 60 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h | 1 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h | 10 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h | 3 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h | 39 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h | 21 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h | 19 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h | 17 linux-2.6-xen-sparse/include/linux/skbuff.h | 24 linux-2.6-xen-sparse/include/xen/balloon.h | 20 linux-2.6-xen-sparse/include/xen/gnttab.h | 5 linux-2.6-xen-sparse/include/xen/public/evtchn.h | 3 linux-2.6-xen-sparse/include/xen/xenbus.h | 1 linux-2.6-xen-sparse/include/xen/xencons.h | 3 linux-2.6-xen-sparse/include/xen/xenoprof.h | 42 linux-2.6-xen-sparse/lib/Makefile | 2 linux-2.6-xen-sparse/mm/Kconfig | 2 linux-2.6-xen-sparse/mm/memory.c | 6 linux-2.6-xen-sparse/mm/mmap.c | 17 linux-2.6-xen-sparse/mm/page_alloc.c | 3 linux-2.6-xen-sparse/net/core/skbuff.c | 125 patches/linux-2.6.16.32/blktap-aio-16_03_06.patch | 161 patches/linux-2.6.16.32/device_bind.patch | 9 patches/linux-2.6.16.32/fix-hz-suspend.patch | 9 patches/linux-2.6.16.32/fix-ide-cd-pio-mode.patch | 13 patches/linux-2.6.16.32/i386-mach-io-check-nmi.patch | 30 patches/linux-2.6.16.32/ipv6-no-autoconf.patch | 16 patches/linux-2.6.16.32/kasprintf.patch | 32 patches/linux-2.6.16.32/net-csum.patch | 40 patches/linux-2.6.16.32/net-gso-0-base.patch | 1970 ++ patches/linux-2.6.16.32/net-gso-1-check-dodgy.patch | 16 patches/linux-2.6.16.32/net-gso-2-checksum-fix.patch | 311 patches/linux-2.6.16.32/net-gso-3-fix-errorcheck.patch | 13 patches/linux-2.6.16.32/net-gso-4-kill-warnon.patch | 26 patches/linux-2.6.16.32/net-gso-5-rcv-mss.patch | 11 patches/linux-2.6.16.32/pci-mmconfig-fix-from-2.6.17.patch | 143 patches/linux-2.6.16.32/pmd-shared.patch | 57 patches/linux-2.6.16.32/rcu_needs_cpu.patch | 18 patches/linux-2.6.16.32/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch | 26 patches/linux-2.6.16.32/series | 27 patches/linux-2.6.16.32/smp-alts.patch | 330 patches/linux-2.6.16.32/tpm_plugin_2.6.17.patch | 703 patches/linux-2.6.16.32/vsnprintf.patch | 177 patches/linux-2.6.16.32/x86-elfnote-as-preprocessor-macro.patch | 25 patches/linux-2.6.16.32/x86-increase-interrupt-vector-range.patch | 73 patches/linux-2.6.16.32/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 39 patches/linux-2.6.16.32/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 63 patches/linux-2.6.16.32/xen-hotplug.patch | 10 patches/linux-2.6.16.32/xenoprof-generic.patch | 294 tools/Makefile | 3 tools/Rules.mk | 13 tools/blktap/drivers/Makefile | 12 tools/blktap/drivers/blktapctrl.c | 215 tools/blktap/drivers/blktapctrl.h | 6 tools/blktap/drivers/tapdisk.c | 72 tools/blktap/drivers/tapdisk.h | 15 tools/blktap/lib/Makefile | 9 tools/blktap/lib/blktaplib.h | 26 tools/blktap/lib/xenbus.c | 193 tools/blktap/lib/xs_api.c | 103 tools/blktap/lib/xs_api.h | 2 tools/check/check_brctl | 31 tools/check/check_crypto_lib | 11 tools/check/check_iproute | 29 tools/check/check_openssl_devel | 11 tools/check/check_python | 17 tools/check/check_python_devel | 16 tools/check/check_udev | 16 tools/check/check_x11_devel | 11 tools/check/check_zlib_devel | 17 tools/check/check_zlib_lib | 17 tools/check/chk | 19 tools/console/Makefile | 10 tools/console/daemon/io.c | 2 tools/console/daemon/utils.c | 2 tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c | 2 tools/examples/Makefile | 5 tools/examples/blktap | 18 tools/examples/block | 31 tools/examples/external-device-migrate | 56 tools/examples/init.d/xendomains | 8 tools/examples/locking.sh | 2 tools/examples/vif-bridge | 2 tools/examples/vif-common.sh | 2 tools/examples/vif-nat | 4 tools/examples/vif-route | 2 tools/examples/vtpm-common.sh | 6 tools/examples/xen-backend.rules | 1 tools/examples/xend-config.sxp | 47 tools/examples/xmexample.hvm | 33 tools/examples/xmexample.vti | 9 tools/firmware/Makefile | 7 tools/firmware/hvmloader/Makefile | 23 tools/firmware/hvmloader/acpi/Makefile | 61 tools/firmware/hvmloader/acpi/README | 24 tools/firmware/hvmloader/acpi/acpi2_0.h | 346 tools/firmware/hvmloader/acpi/build.c | 206 tools/firmware/hvmloader/acpi/dsdt.asl | 657 tools/firmware/hvmloader/acpi/dsdt.c | 452 tools/firmware/hvmloader/acpi/static_tables.c | 145 tools/firmware/hvmloader/acpi_ssdt_tpm.asl | 29 tools/firmware/hvmloader/acpi_ssdt_tpm.h | 25 tools/firmware/hvmloader/acpi_utils.c | 318 tools/firmware/hvmloader/acpi_utils.h | 36 tools/firmware/hvmloader/apic_regs.h | 108 tools/firmware/hvmloader/config.h | 13 tools/firmware/hvmloader/hvmloader.c | 468 tools/firmware/hvmloader/mp_tables.c | 493 tools/firmware/hvmloader/pci_regs.h | 108 tools/firmware/hvmloader/smbios.c | 914 tools/firmware/hvmloader/util.c | 617 tools/firmware/hvmloader/util.h | 52 tools/firmware/rombios/rombios.c | 96 tools/firmware/vmxassist/Makefile | 9 tools/firmware/vmxassist/head.S | 2 tools/firmware/vmxassist/machine.h | 1 tools/firmware/vmxassist/setup.c | 3 tools/firmware/vmxassist/trap.S | 2 tools/firmware/vmxassist/util.c | 38 tools/firmware/vmxassist/util.h | 1 tools/firmware/vmxassist/vm86.c | 282 tools/firmware/vmxassist/vm86.h | 10 tools/guest-headers/Makefile | 12 tools/ioemu/Makefile.target | 30 tools/ioemu/d3des.c | 434 tools/ioemu/d3des.h | 51 tools/ioemu/hw/fdc.c | 2 tools/ioemu/hw/ide.c | 4 tools/ioemu/hw/ne2000.c | 35 tools/ioemu/hw/pc.c | 3 tools/ioemu/hw/pci.c | 24 tools/ioemu/hw/piix4acpi.c | 14 tools/ioemu/hw/piix_pci.c | 18 tools/ioemu/hw/rtl8139.c | 12 tools/ioemu/hw/serial.c | 126 tools/ioemu/hw/tpm_tis.c | 1114 + tools/ioemu/hw/vga.c | 13 tools/ioemu/hw/xen_platform.c | 14 tools/ioemu/keymaps/ja | 3 tools/ioemu/patches/domain-timeoffset | 8 tools/ioemu/patches/fix-vga-scanning-code-overflow | 37 tools/ioemu/patches/hypervisor-rtc | 26 tools/ioemu/patches/ide-cd-dma | 18 tools/ioemu/patches/qemu-bootorder | 22 tools/ioemu/patches/qemu-cleanup | 33 tools/ioemu/patches/qemu-daemonize | 4 tools/ioemu/patches/qemu-logging | 3 tools/ioemu/patches/qemu-pci | 23 tools/ioemu/patches/qemu-target-i386-dm | 16 tools/ioemu/patches/serial-port-rate-limit | 47 tools/ioemu/patches/series | 10 tools/ioemu/patches/vnc-access-monitor-vt | 6 tools/ioemu/patches/vnc-backoff-screen-scan | 226 tools/ioemu/patches/vnc-cleanup | 42 tools/ioemu/patches/vnc-display-find-unused | 15 tools/ioemu/patches/vnc-fixes | 64 tools/ioemu/patches/vnc-listen-specific-interface | 142 tools/ioemu/patches/vnc-password | 119 tools/ioemu/patches/vnc-protocol-fixes | 46 tools/ioemu/patches/vnc-start-vncviewer | 9 tools/ioemu/patches/vnc-title-domain-name | 8 tools/ioemu/patches/xen-build | 13 tools/ioemu/patches/xen-platform-device | 15 tools/ioemu/patches/xen-support-buffered-ioreqs | 8 tools/ioemu/patches/xenstore-block-device-config | 47 tools/ioemu/patches/xenstore-write-vnc-port | 12 tools/ioemu/target-i386-dm/cpu.h | 2 tools/ioemu/target-i386-dm/exec-dm.c | 50 tools/ioemu/target-i386-dm/helper2.c | 131 tools/ioemu/target-i386-dm/i8259-dm.c | 42 tools/ioemu/target-i386-dm/piix_pci-dm.c | 152 tools/ioemu/target-i386-dm/qemu-dm.debug | 7 tools/ioemu/target-i386-dm/rtc-dm.c | 107 tools/ioemu/usb-linux.c | 4 tools/ioemu/vl.c | 118 tools/ioemu/vl.h | 32 tools/ioemu/vnc.c | 413 tools/ioemu/vnc_keysym.h | 10 tools/ioemu/xenstore.c | 190 tools/libfsimage/Makefile | 13 tools/libfsimage/Rules.mk | 32 tools/libfsimage/check-libext2fs | 21 tools/libfsimage/common/Makefile | 46 tools/libfsimage/common/fsimage.c | 142 tools/libfsimage/common/fsimage.h | 52 tools/libfsimage/common/fsimage_grub.c | 276 tools/libfsimage/common/fsimage_grub.h | 92 tools/libfsimage/common/fsimage_plugin.c | 214 tools/libfsimage/common/fsimage_plugin.h | 65 tools/libfsimage/common/fsimage_priv.h | 62 tools/libfsimage/common/mapfile-GNU | 37 tools/libfsimage/common/mapfile-SunOS | 35 tools/libfsimage/ext2fs-lib/Makefile | 15 tools/libfsimage/ext2fs-lib/ext2fs-lib.c | 172 tools/libfsimage/ext2fs/Makefile | 13 tools/libfsimage/ext2fs/fsys_ext2fs.c | 872 tools/libfsimage/reiserfs/Makefile | 13 tools/libfsimage/reiserfs/fsys_reiserfs.c | 1318 + tools/libfsimage/ufs/Makefile | 13 tools/libfsimage/ufs/fsys_ufs.c | 276 tools/libfsimage/ufs/ufs.h | 228 tools/libxc/Makefile | 20 tools/libxc/ia64/xc_ia64_hvm_build.c | 28 tools/libxc/ia64/xc_ia64_linux_restore.c | 51 tools/libxc/ia64/xc_ia64_linux_save.c | 71 tools/libxc/xc_acm.c | 9 tools/libxc/xc_core.c | 2 tools/libxc/xc_domain.c | 79 tools/libxc/xc_evtchn.c | 6 tools/libxc/xc_hvm_build.c | 477 tools/libxc/xc_linux.c | 82 tools/libxc/xc_linux_build.c | 310 tools/libxc/xc_linux_restore.c | 149 tools/libxc/xc_linux_save.c | 120 tools/libxc/xc_load_elf.c | 2 tools/libxc/xc_misc.c | 96 tools/libxc/xc_private.c | 156 tools/libxc/xc_private.h | 24 tools/libxc/xc_ptrace.c | 51 tools/libxc/xc_ptrace.h | 24 tools/libxc/xc_ptrace_core.c | 26 tools/libxc/xc_solaris.c | 235 tools/libxc/xc_tbuf.c | 8 tools/libxc/xenctrl.h | 86 tools/libxc/xenguest.h | 53 tools/libxc/xg_private.c | 88 tools/libxc/xg_private.h | 6 tools/libxen/COPYING | 510 tools/libxen/Makefile | 37 tools/libxen/README | 54 tools/libxen/include/xen_boot_type.h | 87 tools/libxen/include/xen_boot_type_internal.h | 37 tools/libxen/include/xen_common.h | 145 tools/libxen/include/xen_cpu_feature.h | 387 tools/libxen/include/xen_cpu_feature_internal.h | 37 tools/libxen/include/xen_driver_type.h | 77 tools/libxen/include/xen_driver_type_internal.h | 37 tools/libxen/include/xen_host.h | 292 tools/libxen/include/xen_host_cpu.h | 239 tools/libxen/include/xen_host_cpu_decl.h | 30 tools/libxen/include/xen_host_decl.h | 30 tools/libxen/include/xen_int_float_map.h | 53 tools/libxen/include/xen_internal.h | 193 tools/libxen/include/xen_network.h | 273 tools/libxen/include/xen_network_decl.h | 30 tools/libxen/include/xen_on_crash_behaviour.h | 97 tools/libxen/include/xen_on_crash_behaviour_internal.h | 38 tools/libxen/include/xen_on_normal_exit.h | 77 tools/libxen/include/xen_on_normal_exit_internal.h | 37 tools/libxen/include/xen_pif.h | 290 tools/libxen/include/xen_pif_decl.h | 30 tools/libxen/include/xen_sr.h | 282 tools/libxen/include/xen_sr_decl.h | 30 tools/libxen/include/xen_string_string_map.h | 53 tools/libxen/include/xen_user.h | 204 tools/libxen/include/xen_user_decl.h | 30 tools/libxen/include/xen_vbd.h | 285 tools/libxen/include/xen_vbd_decl.h | 30 tools/libxen/include/xen_vbd_mode.h | 77 tools/libxen/include/xen_vbd_mode_internal.h | 37 tools/libxen/include/xen_vdi.h | 344 tools/libxen/include/xen_vdi_decl.h | 30 tools/libxen/include/xen_vdi_type.h | 82 tools/libxen/include/xen_vdi_type_internal.h | 37 tools/libxen/include/xen_vif.h | 305 tools/libxen/include/xen_vif_decl.h | 30 tools/libxen/include/xen_vm.h | 819 tools/libxen/include/xen_vm_decl.h | 30 tools/libxen/include/xen_vm_power_state.h | 97 tools/libxen/include/xen_vm_power_state_internal.h | 37 tools/libxen/include/xen_vtpm.h | 216 tools/libxen/include/xen_vtpm_decl.h | 31 tools/libxen/src/xen_boot_type.c | 83 tools/libxen/src/xen_common.c | 1363 + tools/libxen/src/xen_cpu_feature.c | 143 tools/libxen/src/xen_driver_type.c | 81 tools/libxen/src/xen_host.c | 390 tools/libxen/src/xen_host_cpu.c | 287 tools/libxen/src/xen_int_float_map.c | 37 tools/libxen/src/xen_network.c | 364 tools/libxen/src/xen_on_crash_behaviour.c | 85 tools/libxen/src/xen_on_normal_exit.c | 81 tools/libxen/src/xen_pif.c | 403 tools/libxen/src/xen_sr.c | 388 tools/libxen/src/xen_string_string_map.c | 49 tools/libxen/src/xen_user.c | 201 tools/libxen/src/xen_vbd.c | 387 tools/libxen/src/xen_vbd_mode.c | 81 tools/libxen/src/xen_vdi.c | 533 tools/libxen/src/xen_vdi_type.c | 82 tools/libxen/src/xen_vif.c | 440 tools/libxen/src/xen_vm.c | 1596 + tools/libxen/src/xen_vm_power_state.c | 85 tools/libxen/src/xen_vtpm.c | 227 tools/libxen/test/test_bindings.c | 424 tools/misc/Makefile | 10 tools/misc/lomount/Makefile | 5 tools/misc/lomount/lomount.c | 2 tools/misc/miniterm/Makefile | 4 tools/misc/miniterm/miniterm.c | 25 tools/misc/xend | 10 tools/misc/xenperf.c | 154 tools/pygrub/Makefile | 8 tools/pygrub/setup.py | 43 tools/pygrub/src/fsimage/fsimage.c | 299 tools/pygrub/src/pygrub | 57 tools/python/Makefile | 6 tools/python/README.XendConfig | 160 tools/python/README.sxpcfg | 117 tools/python/scripts/README | 49 tools/python/scripts/README.lifecycle | 136 tools/python/scripts/xapi.domcfg.py | 37 tools/python/scripts/xapi.py | 537 tools/python/scripts/xapi.vbdcfg.py | 12 tools/python/scripts/xapi.vdicfg.py | 7 tools/python/scripts/xapi.vifcfg.py | 10 tools/python/scripts/xapi.vtpmcfg.py | 3 tools/python/setup.py | 3 tools/python/xen/lowlevel/acm/acm.c | 19 tools/python/xen/lowlevel/xc/xc.c | 129 tools/python/xen/util/auxbin.py | 2 tools/python/xen/util/blkif.py | 22 tools/python/xen/util/security.py | 38 tools/python/xen/util/xmlrpclib2.py | 61 tools/python/xen/web/connection.py | 16 tools/python/xen/web/tcp.py | 14 tools/python/xen/xend/Args.py | 2 tools/python/xen/xend/PrettyPrint.py | 2 tools/python/xen/xend/XendAPI.py | 1548 + tools/python/xen/xend/XendAPIConstants.py | 75 tools/python/xen/xend/XendAuthSessions.py | 145 tools/python/xen/xend/XendBootloader.py | 9 tools/python/xen/xend/XendCheckpoint.py | 55 tools/python/xen/xend/XendClient.py | 1 tools/python/xen/xend/XendConfig.py | 947 tools/python/xen/xend/XendConstants.py | 102 tools/python/xen/xend/XendDevices.py | 83 tools/python/xen/xend/XendDomain.py | 1402 + tools/python/xen/xend/XendDomainInfo.py | 2754 +- tools/python/xen/xend/XendError.py | 16 tools/python/xen/xend/XendNode.py | 135 tools/python/xen/xend/XendProtocol.py | 2 tools/python/xen/xend/XendRoot.py | 52 tools/python/xen/xend/XendStorageRepository.py | 358 tools/python/xen/xend/XendVDI.py | 155 tools/python/xen/xend/arch.py | 1 tools/python/xen/xend/image.py | 138 tools/python/xen/xend/osdep.py | 36 tools/python/xen/xend/server/DevController.py | 81 tools/python/xen/xend/server/SrvDaemon.py | 26 tools/python/xen/xend/server/SrvDomain.py | 14 tools/python/xen/xend/server/SrvDomainDir.py | 2 tools/python/xen/xend/server/SrvServer.py | 81 tools/python/xen/xend/server/XMLRPCServer.py | 131 tools/python/xen/xend/server/blkif.py | 57 tools/python/xen/xend/server/iopif.py | 4 tools/python/xen/xend/server/irqif.py | 2 tools/python/xen/xend/server/netif.py | 38 tools/python/xen/xend/server/pciif.py | 83 tools/python/xen/xend/server/tpmif.py | 36 tools/python/xen/xend/sxp.py | 24 tools/python/xen/xend/uuid.py | 10 tools/python/xen/xm/addlabel.py | 110 tools/python/xen/xm/cfgbootpolicy.py | 206 tools/python/xen/xm/console.py | 2 tools/python/xen/xm/create.py | 201 tools/python/xen/xm/dry-run.py | 56 tools/python/xen/xm/dumppolicy.py | 31 tools/python/xen/xm/getlabel.py | 64 tools/python/xen/xm/labels.py | 77 tools/python/xen/xm/loadpolicy.py | 34 tools/python/xen/xm/main.py | 908 tools/python/xen/xm/makepolicy.py | 25 tools/python/xen/xm/migrate.py | 16 tools/python/xen/xm/new.py | 68 tools/python/xen/xm/opts.py | 98 tools/python/xen/xm/resources.py | 44 tools/python/xen/xm/rmlabel.py | 65 tools/python/xen/xm/shutdown.py | 1 tools/security/policy.txt | 163 tools/security/policytools.txt | 148 tools/security/readme.txt | 31 tools/security/secpol_tool.c | 14 tools/security/secpol_xml2bin.c | 10 tools/vnet/doc/Makefile | 5 tools/vnet/doc/man/vn.pod.1 | 4 tools/vnet/examples/Makefile | 6 tools/vnet/libxutil/Makefile | 9 tools/vnet/libxutil/hash_table.c | 13 tools/vnet/libxutil/hash_table.h | 1 tools/vnet/scripts/Makefile | 6 tools/vnet/vnet-module/Makefile.ver | 27 tools/vnet/vnet-module/esp.c | 16 tools/vnet/vnet-module/etherip.c | 43 tools/vnet/vnet-module/tunnel.c | 7 tools/vnet/vnet-module/tunnel.h | 8 tools/vnet/vnet-module/varp.c | 9 tools/vnet/vnet-module/varp_socket.c | 76 tools/vnet/vnet-module/vif.c | 1 tools/vnet/vnet-module/vnet.c | 13 tools/vnet/vnet-module/vnet_dev.c | 12 tools/vnet/vnet-module/vnet_eval.c | 2 tools/vnet/vnet-module/vnet_forward.c | 1 tools/vnet/vnetd/Makefile | 6 tools/vnet/vnetd/vnetd.c | 34 tools/vtpm/Rules.mk | 5 tools/vtpm_manager/Rules.mk | 5 tools/xcutils/Makefile | 6 tools/xcutils/readnotes.c | 6 tools/xenmon/Makefile | 11 tools/xenmon/xenmon.py | 9 tools/xenstat/libxenstat/Makefile | 12 tools/xenstat/libxenstat/src/xenstat.c | 20 tools/xenstat/xentop/Makefile | 7 tools/xenstat/xentop/xentop.1 | 3 tools/xenstat/xentop/xentop.c | 21 tools/xenstore/Makefile | 28 tools/xenstore/xenstore_client.c | 21 tools/xenstore/xenstored_core.c | 36 tools/xenstore/xenstored_core.h | 3 tools/xenstore/xenstored_domain.c | 41 tools/xenstore/xenstored_domain.h | 4 tools/xenstore/xenstored_linux.c | 4 tools/xenstore/xenstored_solaris.c | 66 tools/xenstore/xenstored_transaction.c | 2 tools/xenstore/xs_lib.c | 9 tools/xenstore/xsls.c | 20 tools/xentrace/Makefile | 7 tools/xentrace/formats | 31 tools/xentrace/xenctx.c | 108 tools/xm-test/README | 36 tools/xm-test/configure.ac | 26 tools/xm-test/grouptest/default | 1 tools/xm-test/grouptest/security | 1 tools/xm-test/lib/XmTestLib/Console.py | 5 tools/xm-test/lib/XmTestLib/XenDomain.py | 83 tools/xm-test/lib/XmTestLib/acm.py | 91 tools/xm-test/lib/XmTestLib/arch.py | 148 tools/xm-test/lib/XmTestLib/block_utils.py | 2 tools/xm-test/lib/XmTestReport/OSReport.py | 10 tools/xm-test/lib/XmTestReport/arch.py | 52 tools/xm-test/ramdisk/Makefile.am | 42 tools/xm-test/ramdisk/README-XenSource-initrd-1.0-img | 46 tools/xm-test/ramdisk/README-XenSource-initrd-1.1-img | 45 tools/xm-test/ramdisk/configs/buildroot-i386 | 346 tools/xm-test/ramdisk/configs/buildroot-powerpc | 338 tools/xm-test/ramdisk/make-release.sh | 47 tools/xm-test/ramdisk/patches/buildroot/add_xvd_devices.patch | 5 tools/xm-test/runtest.sh | 38 tools/xm-test/tests/Makefile.am | 1 tools/xm-test/tests/block-create/01_block_attach_device_pos.py | 10 tools/xm-test/tests/block-create/02_block_attach_file_device_pos.py | 8 tools/xm-test/tests/block-create/04_block_attach_device_repeatedly_pos.py | 16 tools/xm-test/tests/block-create/05_block_attach_and_dettach_device_repeatedly_pos.py | 18 tools/xm-test/tests/block-create/06_block_attach_baddomain_neg.py | 8 tools/xm-test/tests/block-create/07_block_attach_baddevice_neg.py | 14 tools/xm-test/tests/block-create/08_block_attach_bad_filedevice_neg.py | 16 tools/xm-test/tests/block-create/09_block_attach_and_dettach_device_check_data_pos.py | 44 tools/xm-test/tests/block-create/10_block_attach_dettach_multiple_devices.py | 30 tools/xm-test/tests/block-create/11_block_attach_shared_dom0.py | 2 tools/xm-test/tests/block-create/12_block_attach_shared_domU.py | 2 tools/xm-test/tests/block-destroy/01_block-destroy_btblock_pos.py | 8 tools/xm-test/tests/block-destroy/02_block-destroy_rtblock_pos.py | 8 tools/xm-test/tests/block-destroy/04_block-destroy_nonattached_neg.py | 2 tools/xm-test/tests/block-destroy/05_block-destroy_byname_pos.py | 8 tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py | 10 tools/xm-test/tests/block-integrity/01_block_device_read_verify.py | 4 tools/xm-test/tests/block-integrity/02_block_device_write_verify.py | 4 tools/xm-test/tests/block-list/01_block-list_pos.py | 6 tools/xm-test/tests/block-list/02_block-list_attachbd_pos.py | 6 tools/xm-test/tests/block-list/03_block-list_anotherbd_pos.py | 10 tools/xm-test/tests/block-list/06_block-list_checkremove_pos.py | 24 tools/xm-test/tests/create/07_create_mem64_pos.py | 2 tools/xm-test/tests/create/08_create_mem128_pos.py | 2 tools/xm-test/tests/create/09_create_mem256_pos.py | 2 tools/xm-test/tests/create/11_create_concurrent_pos.py | 2 tools/xm-test/tests/create/12_create_concurrent_stress_pos.py | 9 tools/xm-test/tests/create/14_create_blockroot_pos.py | 11 tools/xm-test/tests/create/15_create_smallmem_pos.py | 4 tools/xm-test/tests/create/16_create_smallmem_neg.py | 8 tools/xm-test/tests/network-attach/04_network_attach_baddomain_neg.py | 6 tools/xm-test/tests/security-acm/01_security-acm_basic.py | 121 tools/xm-test/tests/security-acm/02_security-acm_dom_start.py | 64 tools/xm-test/tests/security-acm/03_security-acm_dom_conflict.py | 60 tools/xm-test/tests/security-acm/04_security-acm_dom_res.py | 69 tools/xm-test/tests/security-acm/05_security-acm_dom_res_conf.py | 38 tools/xm-test/tests/security-acm/06_security-acm_dom_block_attach.py | 82 tools/xm-test/tests/security-acm/Makefile.am | 28 tools/xm-test/tests/security-acm/acm_utils.py | 15 tools/xm-test/tests/security-acm/xm-test-security_policy.xml | 110 tools/xm-test/tests/vtpm/06_vtpm-susp_res_pcrs.py | 2 tools/xm-test/tests/vtpm/07_vtpm-mig_pcrs.py | 2 tools/xm-test/tests/vtpm/08_vtpm-mig_pcrs.py | 2 tools/xm-test/tests/vtpm/vtpm_utils.py | 6 unmodified_drivers/linux-2.6/Makefile | 1 unmodified_drivers/linux-2.6/README | 4 unmodified_drivers/linux-2.6/blkfront/Makefile | 3 unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopmd.h | 14 unmodified_drivers/linux-2.6/compat-include/asm-generic/pgtable-nopud.h | 15 unmodified_drivers/linux-2.6/compat-include/linux/io.h | 10 unmodified_drivers/linux-2.6/compat-include/linux/mutex.h | 31 unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h | 72 unmodified_drivers/linux-2.6/mkbuildtree | 55 unmodified_drivers/linux-2.6/netfront/Makefile | 3 unmodified_drivers/linux-2.6/overrides.mk | 2 unmodified_drivers/linux-2.6/platform-pci/Kbuild | 7 unmodified_drivers/linux-2.6/platform-pci/Makefile | 3 unmodified_drivers/linux-2.6/platform-pci/evtchn.c | 32 unmodified_drivers/linux-2.6/platform-pci/platform-compat.c | 139 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 40 unmodified_drivers/linux-2.6/platform-pci/xen_support.c | 4 unmodified_drivers/linux-2.6/util/Kbuild | 3 unmodified_drivers/linux-2.6/util/Makefile | 3 unmodified_drivers/linux-2.6/xenbus/Makefile | 3 xen/COPYING | 20 xen/Makefile | 32 xen/Rules.mk | 23 xen/acm/acm_chinesewall_hooks.c | 2 xen/acm/acm_core.c | 107 xen/acm/acm_simple_type_enforcement_hooks.c | 2 xen/arch/ia64/Makefile | 22 xen/arch/ia64/Rules.mk | 22 xen/arch/ia64/asm-offsets.c | 4 xen/arch/ia64/linux-xen/Makefile | 2 xen/arch/ia64/linux-xen/README.origin | 2 xen/arch/ia64/linux-xen/efi.c | 2 xen/arch/ia64/linux-xen/entry.S | 8 xen/arch/ia64/linux-xen/mca.c | 13 xen/arch/ia64/linux-xen/minstate.h | 2 xen/arch/ia64/linux-xen/mm_contig.c | 2 xen/arch/ia64/linux-xen/mm_numa.c | 75 xen/arch/ia64/linux-xen/numa.c | 67 xen/arch/ia64/linux-xen/sal.c | 75 xen/arch/ia64/linux-xen/setup.c | 2 xen/arch/ia64/linux-xen/smpboot.c | 3 xen/arch/ia64/linux-xen/tlb.c | 9 xen/arch/ia64/linux-xen/unaligned.c | 20 xen/arch/ia64/tools/p2m_expose/Makefile | 28 xen/arch/ia64/tools/p2m_expose/README.p2m_expose | 12 xen/arch/ia64/tools/p2m_expose/expose_p2m.c | 185 xen/arch/ia64/vmx/Makefile | 1 xen/arch/ia64/vmx/mmio.c | 133 xen/arch/ia64/vmx/optvfault.S | 841 xen/arch/ia64/vmx/pal_emul.c | 515 xen/arch/ia64/vmx/vlsapic.c | 355 xen/arch/ia64/vmx/vmmu.c | 106 xen/arch/ia64/vmx/vmx_entry.S | 2 xen/arch/ia64/vmx/vmx_hypercall.c | 28 xen/arch/ia64/vmx/vmx_init.c | 88 xen/arch/ia64/vmx/vmx_interrupt.c | 19 xen/arch/ia64/vmx/vmx_ivt.S | 37 xen/arch/ia64/vmx/vmx_phy_mode.c | 90 xen/arch/ia64/vmx/vmx_process.c | 238 xen/arch/ia64/vmx/vmx_support.c | 28 xen/arch/ia64/vmx/vmx_vcpu.c | 72 xen/arch/ia64/vmx/vmx_virt.c | 36 xen/arch/ia64/vmx/vtlb.c | 44 xen/arch/ia64/xen/Makefile | 4 xen/arch/ia64/xen/acpi.c | 2 xen/arch/ia64/xen/dom0_ops.c | 94 xen/arch/ia64/xen/dom_fw.c | 39 xen/arch/ia64/xen/domain.c | 276 xen/arch/ia64/xen/faults.c | 508 xen/arch/ia64/xen/flushtlb.c | 117 xen/arch/ia64/xen/fw_emul.c | 144 xen/arch/ia64/xen/hypercall.c | 105 xen/arch/ia64/xen/hyperprivop.S | 2 xen/arch/ia64/xen/irq.c | 16 xen/arch/ia64/xen/mm.c | 564 xen/arch/ia64/xen/privop.c | 1054 - xen/arch/ia64/xen/regionreg.c | 12 xen/arch/ia64/xen/tlb_track.c | 507 xen/arch/ia64/xen/vcpu.c | 2254 +- xen/arch/ia64/xen/vhpt.c | 343 xen/arch/ia64/xen/xen.lds.S | 3 xen/arch/ia64/xen/xenasm.S | 32 xen/arch/ia64/xen/xencomm.c | 387 xen/arch/ia64/xen/xenmem.c | 95 xen/arch/ia64/xen/xenmisc.c | 8 xen/arch/ia64/xen/xenpatch.c | 122 xen/arch/ia64/xen/xensetup.c | 43 xen/arch/ia64/xen/xentime.c | 47 xen/arch/powerpc/Makefile | 5 xen/arch/powerpc/backtrace.c | 20 xen/arch/powerpc/domain.c | 35 xen/arch/powerpc/domain_build.c | 2 xen/arch/powerpc/mm.c | 88 xen/arch/powerpc/of-devwalk.c | 18 xen/arch/powerpc/papr/xlate.c | 2 xen/arch/powerpc/powerpc64/domain.c | 6 xen/arch/powerpc/setup.c | 12 xen/arch/powerpc/shadow.c | 4 xen/arch/powerpc/usercopy.c | 7 xen/arch/x86/Makefile | 27 xen/arch/x86/Rules.mk | 38 xen/arch/x86/acpi/boot.c | 81 xen/arch/x86/apic.c | 95 xen/arch/x86/boot/mkelf32.c | 5 xen/arch/x86/boot/x86_32.S | 33 xen/arch/x86/boot/x86_64.S | 56 xen/arch/x86/cpu/mcheck/Makefile | 6 xen/arch/x86/cpu/mcheck/mce.c | 4 xen/arch/x86/cpu/mtrr/Makefile | 6 xen/arch/x86/cpu/mtrr/main.c | 8 xen/arch/x86/domain.c | 238 xen/arch/x86/domain_build.c | 152 xen/arch/x86/domctl.c | 11 xen/arch/x86/e820.c | 2 xen/arch/x86/extable.c | 2 xen/arch/x86/flushtlb.c | 101 xen/arch/x86/hvm/Makefile | 6 xen/arch/x86/hvm/hvm.c | 832 xen/arch/x86/hvm/i8254.c | 34 xen/arch/x86/hvm/instrlen.c | 440 xen/arch/x86/hvm/intercept.c | 178 xen/arch/x86/hvm/io.c | 201 xen/arch/x86/hvm/irq.c | 227 xen/arch/x86/hvm/platform.c | 1031 - xen/arch/x86/hvm/pmtimer.c | 63 xen/arch/x86/hvm/rtc.c | 436 xen/arch/x86/hvm/svm/Makefile | 1 xen/arch/x86/hvm/svm/emulate.c | 32 xen/arch/x86/hvm/svm/intr.c | 134 xen/arch/x86/hvm/svm/svm.c | 1061 - xen/arch/x86/hvm/svm/vmcb.c | 448 xen/arch/x86/hvm/svm/x86_32/exits.S | 15 xen/arch/x86/hvm/svm/x86_64/exits.S | 16 xen/arch/x86/hvm/vioapic.c | 951 xen/arch/x86/hvm/vlapic.c | 978 - xen/arch/x86/hvm/vmx/io.c | 105 xen/arch/x86/hvm/vmx/vmcs.c | 596 xen/arch/x86/hvm/vmx/vmx.c | 2119 +- xen/arch/x86/hvm/vmx/x86_32/exits.S | 9 xen/arch/x86/hvm/vmx/x86_64/exits.S | 6 xen/arch/x86/hvm/vpic.c | 463 xen/arch/x86/i387.c | 9 xen/arch/x86/io_apic.c | 22 xen/arch/x86/irq.c | 27 xen/arch/x86/microcode.c | 11 xen/arch/x86/mm.c | 641 xen/arch/x86/mm/shadow/common.c | 1360 - xen/arch/x86/mm/shadow/multi.c | 2771 +- xen/arch/x86/mm/shadow/multi.h | 11 xen/arch/x86/mm/shadow/private.h | 497 xen/arch/x86/mm/shadow/types.h | 268 xen/arch/x86/mpparse.c | 61 xen/arch/x86/numa.c | 308 xen/arch/x86/oprofile/nmi_int.c | 46 xen/arch/x86/oprofile/op_model_athlon.c | 28 xen/arch/x86/oprofile/op_model_p4.c | 10 xen/arch/x86/oprofile/op_model_ppro.c | 10 xen/arch/x86/oprofile/xenoprof.c | 668 xen/arch/x86/physdev.c | 4 xen/arch/x86/platform_hypercall.c | 18 xen/arch/x86/setup.c | 98 xen/arch/x86/smp.c | 2 xen/arch/x86/smpboot.c | 5 xen/arch/x86/srat.c | 315 xen/arch/x86/time.c | 7 xen/arch/x86/traps.c | 468 xen/arch/x86/x86_32/Makefile | 1 xen/arch/x86/x86_32/asm-offsets.c | 2 xen/arch/x86/x86_32/domain_page.c | 2 xen/arch/x86/x86_32/entry.S | 49 xen/arch/x86/x86_32/gpr_switch.S | 43 xen/arch/x86/x86_32/seg_fixup.c | 50 xen/arch/x86/x86_32/supervisor_mode_kernel.S | 2 xen/arch/x86/x86_32/traps.c | 69 xen/arch/x86/x86_64/Makefile | 1 xen/arch/x86/x86_64/asm-offsets.c | 2 xen/arch/x86/x86_64/entry.S | 36 xen/arch/x86/x86_64/gpr_switch.S | 63 xen/arch/x86/x86_64/mm.c | 36 xen/arch/x86/x86_64/traps.c | 79 xen/arch/x86/x86_emulate.c | 247 xen/common/Makefile | 4 xen/common/domain.c | 120 xen/common/domctl.c | 87 xen/common/elf.c | 2 xen/common/event_channel.c | 3 xen/common/gdbstub.c | 83 xen/common/grant_table.c | 118 xen/common/keyhandler.c | 5 xen/common/lib.c | 17 xen/common/memory.c | 298 xen/common/multicall.c | 2 xen/common/page_alloc.c | 218 xen/common/perfc.c | 13 xen/common/sched_credit.c | 579 xen/common/sched_sedf.c | 65 xen/common/schedule.c | 115 xen/common/shutdown.c | 3 xen/common/symbols-dummy.c | 16 xen/common/symbols.c | 13 xen/common/time.c | 77 xen/common/trace.c | 4 xen/common/vsprintf.c | 2 xen/common/xenoprof.c | 743 xen/common/xmalloc.c | 90 xen/drivers/acpi/Makefile | 1 xen/drivers/acpi/numa.c | 216 xen/drivers/acpi/tables.c | 473 xen/drivers/char/console.c | 306 xen/drivers/char/serial.c | 4 xen/drivers/video/vga.c | 11 xen/include/acm/acm_hooks.h | 14 xen/include/acpi/platform/acenv.h | 2 xen/include/asm-ia64/config.h | 22 xen/include/asm-ia64/debugger.h | 1 xen/include/asm-ia64/dom_fw.h | 9 xen/include/asm-ia64/domain.h | 59 xen/include/asm-ia64/flushtlb.h | 89 xen/include/asm-ia64/guest_access.h | 152 xen/include/asm-ia64/hvm/vlapic.h | 6 xen/include/asm-ia64/ia64_int.h | 4 xen/include/asm-ia64/linux-null/asm/mmzone.h | 1 xen/include/asm-ia64/linux-xen/asm/README.origin | 3 xen/include/asm-ia64/linux-xen/asm/acpi.h | 123 xen/include/asm-ia64/linux-xen/asm/atomic.h | 188 xen/include/asm-ia64/linux-xen/asm/cache.h | 2 xen/include/asm-ia64/linux-xen/asm/numa.h | 80 xen/include/asm-ia64/linux-xen/asm/pgtable.h | 37 xen/include/asm-ia64/linux-xen/asm/processor.h | 18 xen/include/asm-ia64/linux-xen/asm/spinlock.h | 8 xen/include/asm-ia64/linux-xen/asm/system.h | 1 xen/include/asm-ia64/linux/README.origin | 1 xen/include/asm-ia64/linux/asm/README.origin | 4 xen/include/asm-ia64/linux/asm/nodedata.h | 52 xen/include/asm-ia64/linux/asm/sal.h | 10 xen/include/asm-ia64/linux/hash.h | 58 xen/include/asm-ia64/mm.h | 34 xen/include/asm-ia64/p2m_entry.h | 76 xen/include/asm-ia64/perfc_defn.h | 65 xen/include/asm-ia64/privop.h | 4 xen/include/asm-ia64/tlb_track.h | 155 xen/include/asm-ia64/tlbflush.h | 10 xen/include/asm-ia64/uaccess.h | 18 xen/include/asm-ia64/vcpu.h | 321 xen/include/asm-ia64/vcpumask.h | 60 xen/include/asm-ia64/vhpt.h | 43 xen/include/asm-ia64/vlsapic.h | 1 xen/include/asm-ia64/vmmu.h | 3 xen/include/asm-ia64/vmx.h | 4 xen/include/asm-ia64/vmx_pal_vsa.h | 7 xen/include/asm-ia64/vmx_phy_mode.h | 9 xen/include/asm-ia64/vmx_platform.h | 12 xen/include/asm-ia64/vmx_vcpu.h | 803 xen/include/asm-ia64/vmx_vpd.h | 2 xen/include/asm-ia64/vtm.h | 3 xen/include/asm-ia64/xenkregs.h | 3 xen/include/asm-ia64/xenpage.h | 4 xen/include/asm-ia64/xensystem.h | 1 xen/include/asm-powerpc/mm.h | 8 xen/include/asm-powerpc/powerpc64/config.h | 6 xen/include/asm-powerpc/spinlock.h | 12 xen/include/asm-x86/acpi.h | 4 xen/include/asm-x86/apicdef.h | 1 xen/include/asm-x86/bitops.h | 60 xen/include/asm-x86/config.h | 18 xen/include/asm-x86/debugger.h | 44 xen/include/asm-x86/desc.h | 5 xen/include/asm-x86/domain.h | 30 xen/include/asm-x86/flushtlb.h | 7 xen/include/asm-x86/grant_table.h | 4 xen/include/asm-x86/guest_access.h | 20 xen/include/asm-x86/hvm/domain.h | 21 xen/include/asm-x86/hvm/hvm.h | 92 xen/include/asm-x86/hvm/io.h | 61 xen/include/asm-x86/hvm/irq.h | 107 xen/include/asm-x86/hvm/support.h | 36 xen/include/asm-x86/hvm/svm/emulate.h | 36 xen/include/asm-x86/hvm/svm/vmcb.h | 25 xen/include/asm-x86/hvm/vcpu.h | 10 xen/include/asm-x86/hvm/vioapic.h | 115 xen/include/asm-x86/hvm/vlapic.h | 144 xen/include/asm-x86/hvm/vmx/vmcs.h | 61 xen/include/asm-x86/hvm/vmx/vmx.h | 322 xen/include/asm-x86/hvm/vpic.h | 111 xen/include/asm-x86/hvm/vpt.h | 139 xen/include/asm-x86/io_apic.h | 1 xen/include/asm-x86/mach-generic/mach_apic.h | 6 xen/include/asm-x86/mm.h | 154 xen/include/asm-x86/msr.h | 8 xen/include/asm-x86/multicall.h | 2 xen/include/asm-x86/numa.h | 78 xen/include/asm-x86/page.h | 12 xen/include/asm-x86/perfc_defn.h | 9 xen/include/asm-x86/processor.h | 2 xen/include/asm-x86/regs.h | 2 xen/include/asm-x86/shadow.h | 309 xen/include/asm-x86/spinlock.h | 8 xen/include/asm-x86/x86_32/page-2level.h | 3 xen/include/asm-x86/x86_32/page-3level.h | 13 xen/include/asm-x86/x86_32/regs.h | 3 xen/include/asm-x86/x86_64/asm_defns.h | 6 xen/include/asm-x86/x86_64/page.h | 18 xen/include/asm-x86/x86_64/regs.h | 3 xen/include/asm-x86/xenoprof.h | 68 xen/include/public/COPYING | 16 xen/include/public/acm.h | 18 xen/include/public/acm_ops.h | 18 xen/include/public/arch-ia64.h | 109 xen/include/public/arch-powerpc.h | 26 xen/include/public/arch-x86_32.h | 21 xen/include/public/arch-x86_64.h | 27 xen/include/public/callback.h | 18 xen/include/public/dom0_ops.h | 18 xen/include/public/domctl.h | 63 xen/include/public/elfnote.h | 27 xen/include/public/event_channel.h | 18 xen/include/public/features.h | 18 xen/include/public/grant_table.h | 18 xen/include/public/hvm/e820.h | 27 xen/include/public/hvm/hvm_info_table.h | 19 xen/include/public/hvm/hvm_op.h | 53 xen/include/public/hvm/ioreq.h | 52 xen/include/public/hvm/params.h | 42 xen/include/public/hvm/vmx_assist.h | 18 xen/include/public/io/blkif.h | 47 xen/include/public/io/console.h | 18 xen/include/public/io/netif.h | 18 xen/include/public/io/pciif.h | 18 xen/include/public/io/ring.h | 42 xen/include/public/io/tpmif.h | 18 xen/include/public/io/xenbus.h | 18 xen/include/public/io/xs_wire.h | 19 xen/include/public/memory.h | 18 xen/include/public/nmi.h | 18 xen/include/public/physdev.h | 21 xen/include/public/platform.h | 18 xen/include/public/sched.h | 18 xen/include/public/sysctl.h | 18 xen/include/public/trace.h | 31 xen/include/public/vcpu.h | 25 xen/include/public/version.h | 18 xen/include/public/xen-compat.h | 27 xen/include/public/xen.h | 70 xen/include/public/xencomm.h | 32 xen/include/public/xenoprof.h | 22 xen/include/xen/compiler.h | 2 xen/include/xen/config.h | 76 xen/include/xen/console.h | 12 xen/include/xen/cpumask.h | 8 xen/include/xen/domain.h | 16 xen/include/xen/event.h | 7 xen/include/xen/gdbstub.h | 3 xen/include/xen/iocap.h | 8 xen/include/xen/keyhandler.h | 3 xen/include/xen/lib.h | 9 xen/include/xen/mm.h | 7 xen/include/xen/nodemask.h | 338 xen/include/xen/numa.h | 13 xen/include/xen/sched-if.h | 8 xen/include/xen/sched.h | 85 xen/include/xen/softirq.h | 5 xen/include/xen/spinlock.h | 8 xen/include/xen/stdarg.h | 5 xen/include/xen/time.h | 14 xen/include/xen/xenoprof.h | 7 xen/tools/figlet/figlet.c | 5 1119 files changed, 88034 insertions(+), 48311 deletions(-), 3 modifications(!) diff -r ee4397571e44 -r 223470316756 .hgignore --- a/.hgignore Wed Aug 09 15:38:37 2006 -0400 +++ b/.hgignore Wed Nov 29 14:16:36 2006 -0600 @@ -15,8 +15,11 @@ .*\.rej$ .*/a\.out$ .*/cscope\..*$ +^cscope.*$ ^[^/]*\.bz2$ +^\.config$ ^TAGS$ +^tags$ ^dist/.*$ ^docs/.*\.aux$ ^docs/.*\.dvi$ @@ -95,7 +98,6 @@ ^tools/firmware/.*\.bin$ ^tools/firmware/.*\.sym$ ^tools/firmware/.*bios/.*bios.*\.txt$ -^tools/firmware/acpi/acpigen$ ^tools/firmware/hvmloader/hvmloader$ ^tools/firmware/hvmloader/roms\.h$ ^tools/firmware/rombios/BIOS-bochs-[^/]*$ @@ -120,6 +122,7 @@ ^tools/ioemu/qemu\.1$ ^tools/ioemu/qemu\.pod$ ^tools/libxc/xen/.*$ +^tools/libxen/test/test_bindings$ ^tools/libaio/src/.*\.ol$ ^tools/libaio/src/.*\.os$ ^tools/misc/cpuperf/cpuperf-perfcntr$ @@ -139,12 +142,15 @@ ^tools/security/secpol_tool$ ^tools/security/xen/.*$ ^tools/tests/test_x86_emulator$ +^tools/vnet/Make.local$ +^tools/vnet/build/.*$ ^tools/vnet/gc$ ^tools/vnet/gc.*/.*$ ^tools/vnet/vnet-module/.*\.ko$ ^tools/vnet/vnet-module/\..*\.cmd$ ^tools/vnet/vnet-module/\.tmp_versions/.*$ ^tools/vnet/vnet-module/vnet_module\.mod\..*$ +^tools/vnet/vnetd/vnetd$ ^tools/vtpm/tpm_emulator-.*\.tar\.gz$ ^tools/vtpm/tpm_emulator/.*$ ^tools/vtpm/vtpm/.*$ @@ -178,6 +184,17 @@ ^tools/xentrace/xenctx$ ^tools/xentrace/xentrace$ ^tools/xm-test/ramdisk/buildroot +^tools/xm-test/aclocal.m4$ +^tools/xm-test/autom4te +^tools/xm-test/install-sh$ +^tools/xm-test/mkinstalldirs$ +^tools/xm-test/missing$ +^tools/xm-test/config(ure|.log|.status|.guess|.sub)$ +^tools/xm-test/Makefile(.in)*$ +^tools/xm-test/.*/Makefile(.in)*$ +^tools/xm-test/lib/XmTestLib/config.py$ +^tools/xm-test/lib/XmTestReport/xmtest.py$ +^tools/xm-test/tests/.*\.test$ ^xen/BLOG$ ^xen/TAGS$ ^xen/arch/x86/asm-offsets\.s$ diff -r ee4397571e44 -r 223470316756 .hgtags --- a/.hgtags Wed Aug 09 15:38:37 2006 -0400 +++ b/.hgtags Wed Nov 29 14:16:36 2006 -0600 @@ -15,3 +15,4 @@ c8fdb0caa77b429cf47f9707926e83947778cb48 c8fdb0caa77b429cf47f9707926e83947778cb48 RELEASE-3.0.0 af0573e9e5258db0a9d28aa954dd302ddd2c2d23 3.0.2-rc d0d3fef37685be264a7f52201f8ef44c030daad3 3.0.2-branched +6ed4368b4a9e1924c983774c4b1a2b6baf8e98a6 3.0.3-branched diff -r ee4397571e44 -r 223470316756 Config.mk --- a/Config.mk Wed Aug 09 15:38:37 2006 -0400 +++ b/Config.mk Wed Nov 29 14:16:36 2006 -0600 @@ -4,43 +4,21 @@ debug ?= n debug ?= n XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ - -e s/ppc/powerpc/) + -e s/ppc/powerpc/ -e s/i86pc/x86_32/) XEN_TARGET_ARCH ?= $(XEN_COMPILE_ARCH) XEN_TARGET_X86_PAE ?= n +XEN_OS ?= $(shell uname -s) + +CONFIG_$(XEN_OS) := y # Tools to run on system hosting the build HOSTCC = gcc HOSTCFLAGS = -Wall -Werror -Wstrict-prototypes -O2 -fomit-frame-pointer -AS = $(CROSS_COMPILE)as -LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc -CPP = $(CROSS_COMPILE)gcc -E -AR = $(CROSS_COMPILE)ar -RANLIB = $(CROSS_COMPILE)ranlib -NM = $(CROSS_COMPILE)nm -STRIP = $(CROSS_COMPILE)strip -OBJCOPY = $(CROSS_COMPILE)objcopy -OBJDUMP = $(CROSS_COMPILE)objdump - DISTDIR ?= $(XEN_ROOT)/dist DESTDIR ?= / -INSTALL = install -INSTALL_DIR = $(INSTALL) -d -m0755 -INSTALL_DATA = $(INSTALL) -m0644 -INSTALL_PROG = $(INSTALL) -m0755 - -ifneq ($(debug),y) -# Optimisation flags are overridable -CFLAGS ?= -O2 -fomit-frame-pointer -CFLAGS += -DNDEBUG -else -# Less than -O1 produces bad code and large stack frames -CFLAGS ?= -O1 -fno-omit-frame-pointer -CFLAGS += -g -endif - +include $(XEN_ROOT)/config/$(XEN_OS).mk include $(XEN_ROOT)/config/$(XEN_TARGET_ARCH).mk ifneq ($(EXTRA_PREFIX),) @@ -48,18 +26,30 @@ EXTRA_LIB += $(EXTRA_PREFIX)/$(LIBDIR) EXTRA_LIB += $(EXTRA_PREFIX)/$(LIBDIR) endif -test-gcc-flag = $(shell $(1) -v --help 2>&1 | grep -q " $(2) " && echo $(2)) +# cc-option +# Usage: cflags-y += $(call cc-option,$(CC),-march=winchip-c6,-march=i586) +cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \ + /dev/null 2>&1`"; then echo "$(2)"; else echo "$(3)"; fi ;) + +ifneq ($(debug),y) +CFLAGS += -DNDEBUG +else +CFLAGS += -g +endif + +CFLAGS += -std=gnu99 CFLAGS += -Wall -Wstrict-prototypes -HOSTCFLAGS += $(call test-gcc-flag,$(HOSTCC),-Wdeclaration-after-statement) -CFLAGS += $(call test-gcc-flag,$(CC),-Wdeclaration-after-statement) +# -Wunused-value makes GCC 4.x too aggressive for my taste: ignoring the +# result of any casted expression causes a warning. +CFLAGS += -Wno-unused-value + +HOSTCFLAGS += $(call cc-option,$(HOSTCC),-Wdeclaration-after-statement,) +CFLAGS += $(call cc-option,$(CC),-Wdeclaration-after-statement,) LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i)) CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i)) - -# Choose the best mirror to download linux kernel -KERNEL_REPO = http://www.kernel.org # If ACM_SECURITY = y, then the access control module is compiled # into Xen and the policy type can be set by the boot policy file diff -r ee4397571e44 -r 223470316756 Makefile --- a/Makefile Wed Aug 09 15:38:37 2006 -0400 +++ b/Makefile Wed Nov 29 14:16:36 2006 -0600 @@ -1,11 +1,6 @@ # # Grand Unified Makefile for Xen. # - -KERNELS ?= linux-2.6-xen -# You may use wildcards in the above e.g. KERNELS=*2.6* - -XKERNELS := $(foreach kernel, $(KERNELS), $(patsubst buildconfigs/mk.%,%,$(wildcard buildconfigs/mk.$(kernel))) ) # Export target architecture overrides to Xen and Linux sub-trees. ifneq ($(XEN_TARGET_ARCH),) diff -r ee4397571e44 -r 223470316756 buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Wed Aug 09 15:38:37 2006 -0400 +++ b/buildconfigs/Rules.mk Wed Nov 29 14:16:36 2006 -0600 @@ -2,6 +2,9 @@ include Config.mk include Config.mk export DESTDIR + +# Choose the best mirror to download linux kernel +KERNEL_REPO = http://www.kernel.org ALLKERNELS = $(patsubst buildconfigs/mk.%,%,$(wildcard buildconfigs/mk.*)) ALLSPARSETREES = $(patsubst %-xen-sparse,%,$(wildcard *-xen-sparse)) diff -r ee4397571e44 -r 223470316756 buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Wed Aug 09 15:38:37 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Wed Nov 29 14:16:36 2006 -0600 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16.13-xen0 -# Fri Sep 1 11:03:26 2006 +# Linux kernel version: 2.6.16.29-xen0 +# Tue Nov 14 10:39:09 2006 # # @@ -92,6 +92,8 @@ CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y CONFIG_XEN_IA64_VDSO_PARAVIRT=y +CONFIG_XEN_IA64_EXPOSE_P2M=y +CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y # CONFIG_IA64_GENERIC is not set @@ -119,6 +121,7 @@ CONFIG_SMP=y CONFIG_SMP=y CONFIG_NR_CPUS=16 CONFIG_HOTPLUG_CPU=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_SELECT_MEMORY_MODEL=y @@ -336,13 +339,14 @@ CONFIG_FW_LOADER=y # Block devices # # CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set +CONFIG_BLK_CPQ_CISS_DA=y +# CONFIG_CISS_SCSI_TAPE is not set # CONFIG_BLK_DEV_DAC960 is not set # CONFIG_BLK_DEV_UMEM is not set # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y -CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y @@ -1040,7 +1044,7 @@ CONFIG_SND_ATIIXP=y # CONFIG_SND_ES1938 is not set # CONFIG_SND_ES1968 is not set CONFIG_SND_FM801=y -CONFIG_SND_FM801_TEA575X=y +# CONFIG_SND_FM801_TEA575X_BOOL is not set # CONFIG_SND_HDA_INTEL is not set # CONFIG_SND_HDSP is not set # CONFIG_SND_HDSPM is not set @@ -1526,7 +1530,7 @@ CONFIG_XEN_XENBUS_DEV=y CONFIG_XEN_XENBUS_DEV=y CONFIG_XEN_BACKEND=y CONFIG_XEN_BLKDEV_BACKEND=y -# CONFIG_XEN_BLKDEV_TAP is not set +CONFIG_XEN_BLKDEV_TAP=y CONFIG_XEN_NETDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y diff -r ee4397571e44 -r 223470316756 buildconfigs/linux-defconfig_xenU_ia64 --- a/buildconfigs/linux-defconfig_xenU_ia64 Wed Aug 09 15:38:37 2006 -0400 +++ b/buildconfigs/linux-defconfig_xenU_ia64 Wed Nov 29 14:16:36 2006 -0600 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16.13-xenU -# Fri Sep 1 10:50:54 2006 +# Linux kernel version: 2.6.16.29-xenU +# Wed Oct 4 12:54:26 2006 # # @@ -89,6 +89,8 @@ CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y CONFIG_XEN_IA64_VDSO_PARAVIRT=y +CONFIG_XEN_IA64_EXPOSE_P2M=y +CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y # CONFIG_IA64_GENERIC is not set @@ -116,6 +118,7 @@ CONFIG_SMP=y CONFIG_SMP=y CONFIG_NR_CPUS=16 # CONFIG_HOTPLUG_CPU is not set +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_SELECT_MEMORY_MODEL=y @@ -939,7 +942,7 @@ CONFIG_SND_AC97_BUS=y # CONFIG_SND_ES1938 is not set # CONFIG_SND_ES1968 is not set CONFIG_SND_FM801=y -CONFIG_SND_FM801_TEA575X=y +# CONFIG_SND_FM801_TEA575X_BOOL is not set # CONFIG_SND_HDA_INTEL is not set # CONFIG_SND_HDSP is not set # CONFIG_SND_HDSPM is not set diff -r ee4397571e44 -r 223470316756 buildconfigs/linux-defconfig_xen_ia64 --- a/buildconfigs/linux-defconfig_xen_ia64 Wed Aug 09 15:38:37 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen_ia64 Wed Nov 29 14:16:36 2006 -0600 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16.13-xen -# Fri Sep 1 10:58:55 2006 +# Linux kernel version: 2.6.16.29-xen +# Tue Nov 14 10:38:50 2006 # # @@ -92,6 +92,8 @@ CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y CONFIG_XEN_IA64_VDSO_PARAVIRT=y +CONFIG_XEN_IA64_EXPOSE_P2M=y +CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y # CONFIG_IA64_GENERIC is not set @@ -119,6 +121,7 @@ CONFIG_SMP=y CONFIG_SMP=y CONFIG_NR_CPUS=16 CONFIG_HOTPLUG_CPU=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_SELECT_MEMORY_MODEL=y @@ -336,13 +339,14 @@ CONFIG_FW_LOADER=y # Block devices # # CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set +CONFIG_BLK_CPQ_CISS_DA=y +# CONFIG_CISS_SCSI_TAPE is not set # CONFIG_BLK_DEV_DAC960 is not set # CONFIG_BLK_DEV_UMEM is not set # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y -CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y @@ -1046,7 +1050,7 @@ CONFIG_SND_ATIIXP=y # CONFIG_SND_ES1938 is not set # CONFIG_SND_ES1968 is not set CONFIG_SND_FM801=y -CONFIG_SND_FM801_TEA575X=y +# CONFIG_SND_FM801_TEA575X_BOOL is not set # CONFIG_SND_HDA_INTEL is not set # CONFIG_SND_HDSP is not set # CONFIG_SND_HDSPM is not set @@ -1532,7 +1536,7 @@ CONFIG_XEN_XENBUS_DEV=y CONFIG_XEN_XENBUS_DEV=y CONFIG_XEN_BACKEND=y CONFIG_XEN_BLKDEV_BACKEND=y -# CONFIG_XEN_BLKDEV_TAP is not set +CONFIG_XEN_BLKDEV_TAP=y CONFIG_XEN_NETDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y diff -r ee4397571e44 -r 223470316756 buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Wed Aug 09 15:38:37 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Wed Nov 29 14:16:36 2006 -0600 @@ -2377,6 +2377,7 @@ CONFIG_SND_ES1938=m CONFIG_SND_ES1938=m CONFIG_SND_ES1968=m CONFIG_SND_FM801=m +# CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_FM801_TEA575X=m CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m diff -r ee4397571e44 -r 223470316756 buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Wed Aug 09 15:38:37 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Wed Nov 29 14:16:36 2006 -0600 @@ -2237,6 +2237,7 @@ CONFIG_SND_ES1938=m CONFIG_SND_ES1938=m CONFIG_SND_ES1968=m CONFIG_SND_FM801=m +# CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_FM801_TEA575X=m CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m diff -r ee4397571e44 -r 223470316756 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Wed Aug 09 15:38:37 2006 -0400 +++ b/buildconfigs/mk.linux-2.6-xen Wed Nov 29 14:16:36 2006 -0600 @@ -1,5 +1,5 @@ LINUX_SERIES = 2.6 LINUX_SERIES = 2.6 -LINUX_VER = 2.6.16.13 +LINUX_VER = 2.6.16.32 EXTRAVERSION ?= xen diff -r ee4397571e44 -r 223470316756 config/ia64.mk --- a/config/ia64.mk Wed Aug 09 15:38:37 2006 -0400 +++ b/config/ia64.mk Wed Nov 29 14:16:36 2006 -0600 @@ -1,4 +1,6 @@ CONFIG_IA64 := y CONFIG_IA64 := y +CONFIG_IA64_$(XEN_OS) := y + CONFIG_IOEMU := y CONFIG_XCUTILS := y diff -r ee4397571e44 -r 223470316756 config/powerpc64.mk --- a/config/powerpc64.mk Wed Aug 09 15:38:37 2006 -0400 +++ b/config/powerpc64.mk Wed Nov 29 14:16:36 2006 -0600 @@ -1,4 +1,5 @@ CONFIG_POWERPC := y CONFIG_POWERPC := y +CONFIG_POWERPC_$(XEN_OS) := y CFLAGS += -DELFSIZE=64 LIBDIR := lib diff -r ee4397571e44 -r 223470316756 config/x86_32.mk --- a/config/x86_32.mk Wed Aug 09 15:38:37 2006 -0400 +++ b/config/x86_32.mk Wed Nov 29 14:16:36 2006 -0600 @@ -1,9 +1,17 @@ CONFIG_X86 := y CONFIG_X86 := y +CONFIG_X86_$(XEN_OS) := y + CONFIG_HVM := y CONFIG_MIGRATE := y CONFIG_XCUTILS := y CONFIG_IOEMU := y -CONFIG_MBOOTPACK := y CFLAGS += -m32 -march=i686 LIBDIR := lib + +# Use only if calling $(LD) directly. +ifeq ($(XEN_OS),OpenBSD) +LDFLAGS_DIRECT += -melf_i386_obsd +else +LDFLAGS_DIRECT += -melf_i386 +endif diff -r ee4397571e44 -r 223470316756 config/x86_64.mk --- a/config/x86_64.mk Wed Aug 09 15:38:37 2006 -0400 +++ b/config/x86_64.mk Wed Nov 29 14:16:36 2006 -0600 @@ -1,9 +1,17 @@ CONFIG_X86 := y CONFIG_X86 := y +CONFIG_X86_$(XEN_OS) := y + CONFIG_HVM := y CONFIG_MIGRATE := y CONFIG_XCUTILS := y CONFIG_IOEMU := y -CONFIG_MBOOTPACK := y CFLAGS += -m64 -LIBDIR = lib64 +LIBDIR = $(LIB64DIR) + +# Use only if calling $(LD) directly. +ifeq ($(XEN_OS),OpenBSD) +LDFLAGS_DIRECT += -melf_x86_64_obsd +else +LDFLAGS_DIRECT += -melf_x86_64 +endif diff -r ee4397571e44 -r 223470316756 docs/Makefile --- a/docs/Makefile Wed Aug 09 15:38:37 2006 -0400 +++ b/docs/Makefile Wed Nov 29 14:16:36 2006 -0600 @@ -1,8 +1,9 @@ #!/usr/bin/make -f +XEN_ROOT=.. +include $(XEN_ROOT)/Config.mk + VERSION = xen-unstable -INSTALL = install -INSTALL_DIR = $(INSTALL) -d -m0755 PS2PDF := ps2pdf DVIPS := dvips diff -r ee4397571e44 -r 223470316756 docs/man/xm.pod.1 --- a/docs/man/xm.pod.1 Wed Aug 09 15:38:37 2006 -0400 +++ b/docs/man/xm.pod.1 Wed Nov 29 14:16:36 2006 -0600 @@ -393,7 +393,9 @@ specified, VCPU information for all doma =item B<vcpu-pin> I<domain-id> I<vcpu> I<cpus> -Pins the the VCPU to only run on the specific CPUs. +Pins the the VCPU to only run on the specific CPUs. The keyword +I<all> can be used to apply the I<cpus> list to all VCPUs in the +domain. Normally VCPUs can float between available CPUs whenever Xen deems a different run state is appropriate. Pinning can be used to restrict @@ -808,13 +810,13 @@ Loads the binary representation of the I Loads the binary representation of the I<policy> into Xen. The binary representation can be created with the B<makepolicy> subcommand. -=item B<cfgbootpolicy> I<policy> [I<kernelversion>] +=item B<cfgbootpolicy> I<policy> [I<boot title>] Configures I<policy> as the boot policy for Xen. It copies the binary policy representation into the /boot directory and adds a module line specifying the binary policy to the /boot/grub/menu.lst file. If your boot configuration includes multiple Xen boot titles, then use the -I<kernelversion> parameter to select the proper title. +I<boot title> parameter to specify a unique part of the proper title. =item B<dumppolicy> diff -r ee4397571e44 -r 223470316756 docs/src/interface.tex --- a/docs/src/interface.tex Wed Aug 09 15:38:37 2006 -0400 +++ b/docs/src/interface.tex Wed Nov 29 14:16:36 2006 -0600 @@ -955,7 +955,6 @@ A {\bf /vm} entry contains the following A {\bf /vm} entry contains the following information: \begin{description} -\item[ssidref] ssid reference for domain \item[uuid] uuid of the domain (somewhat redundant) \item[on\_reboot] the action to take on a domain reboot request (destroy or restart) \item[on\_poweroff] the action to take on a domain halt request (destroy or restart) @@ -1125,6 +1124,16 @@ This path contains: \end{description} \end{description} + \item[security/] access control information for the domain + \begin{description} + \item[ssidref] security reference identifier used inside the hypervisor + \item[access\_control/] security label used by management tools + \begin{description} + \item[label] security label name + \item[policy] security policy name + \end{description} + \end{description} + \item[store/] per-domain information for the store \begin{description} \item[port] the event channel used for the store ring queue @@ -2168,18 +2177,45 @@ implementing them (in {\tt xen/common/do implementing them (in {\tt xen/common/dom0\_ops.c}) and in the user-space tools that use them (mostly in {\tt tools/libxc}). +\section{Access Control Module Hypercalls} +\label{s:acmops} + Hypercalls relating to the management of the Access Control Module are -also restricted to domain 0 access for now: +also restricted to domain 0 access for now. For more details on any or +all of these, please see {\tt xen/include/public/acm\_ops.h}. A +complete list is given below: \begin{quote} -\hypercall{acm\_op(struct acm\_op * u\_acm\_op)} +\hypercall{acm\_op(int cmd, void *args)} This hypercall can be used to configure the state of the ACM, query that state, request access control decisions and dump additional information. +\begin{description} + +\item [ACMOP\_SETPOLICY:] set the access control policy + +\item [ACMOP\_GETPOLICY:] get the current access control policy and + status + +\item [ACMOP\_DUMPSTATS:] get current access control hook invocation + statistics + +\item [ACMOP\_GETSSID:] get security access control information for a + domain + +\item [ACMOP\_GETDECISION:] get access decision based on the currently + enforced access control policy + +\end{description} \end{quote} + +Most of the above are best understood by looking at the code +implementing them (in {\tt xen/common/acm\_ops.c}) and in the +user-space tools that use them (mostly in {\tt tools/security} and +{\tt tools/python/xen/lowlevel/acm}). \section{Debugging Hypercalls} diff -r ee4397571e44 -r 223470316756 docs/src/user.tex --- a/docs/src/user.tex Wed Aug 09 15:38:37 2006 -0400 +++ b/docs/src/user.tex Wed Nov 29 14:16:36 2006 -0600 @@ -3192,6 +3192,15 @@ editing \path{grub.conf}. input to DOM0 when it boots --- if it is `x' then auto-switching is disabled. Any other value, or omitting the character, enables auto-switching. [NB. Default switch-char is `a'.] +\item [ loglvl=$<$level$>/<$level$>$ ] + Specify logging level. Messages of the specified severity level (and + higher) will be printed to the Xen console. Valid levels are `none', + `error', `warning', `info', `debug', and `all'. The second level + specifier is optional: it is used to specify message severities + which are to be rate limited. Default is `loglvl=warning'. +\item [ guest\_loglvl=$<$level$>/<$level$>$ ] As for loglvl, but + applies to messages relating to guests. Default is + `guest\_loglvl=none/warning'. \item [ nmi=xxx ] Specify what to do with an NMI parity or I/O error. \\ `nmi=fatal': Xen prints a diagnostic and then hangs. \\ @@ -3202,12 +3211,23 @@ editing \path{grub.conf}. ignored. This parameter may be specified with a B, K, M or G suffix, representing bytes, kilobytes, megabytes and gigabytes respectively. The default unit, if no suffix is specified, is kilobytes. -\item [ dom0\_mem=xxx ] Set the amount of memory to be allocated to - domain0. In Xen 3.x the parameter may be specified with a B, K, M or +\item [ dom0\_mem=$<$specifier list$>$ ] Set the amount of memory to + be allocated to domain 0. This is a comma-separated list containing + the following optional components: + \begin{description} + \item[ min:$<$min\_amt$>$ ] Minimum amount to allocate to domain 0 + \item[ max:$<$min\_amt$>$ ] Maximum amount to allocate to domain 0 + \item[ $<$amt$>$ ] Precise amount to allocate to domain 0 + \end{description} + Each numeric parameter may be specified with a B, K, M or G suffix, representing bytes, kilobytes, megabytes and gigabytes respectively; if no suffix is specified, the parameter defaults to - kilobytes. In previous versions of Xen, suffixes were not supported - and the value is always interpreted as kilobytes. + kilobytes. Negative values are subtracted from total available + memory. If $<$amt$>$ is not specified, it defaults to all available + memory less a small amount (clamped to 128MB) for uses such as DMA + buffers. +\item [ dom0\_vcpus\_pin ] Pins domain 0 VCPUs on their respective + physical CPUS (default=false). \item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in pages (default 0). \item [ sched=xxx ] Select the CPU scheduler Xen should use. The diff -r ee4397571e44 -r 223470316756 extras/mini-os/Makefile --- a/extras/mini-os/Makefile Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/Makefile Wed Nov 29 14:16:36 2006 -0600 @@ -55,9 +55,10 @@ endif endif ifeq ($(TARGET_ARCH),ia64) -CFLAGS += -mfixed-range=f12-f15,f32-f127 -ASFLAGS += -x assembler-with-cpp -ansi -Wall -ASFLAGS += -mfixed-range=f12-f15,f32-f127 +CFLAGS += -mfixed-range=f2-f5,f12-f15,f32-f127 -mconstant-gp +ASFLAGS += -x assembler-with-cpp -Wall +ASFLAGS += -mfixed-range=f2-f5,f12-f15,f32-f127 -fomit-frame-pointer +ASFLAGS += -fno-builtin -fno-common -fno-strict-aliasing -mconstant-gp ARCH_LINKS = IA64_LINKS # Special link on ia64 needed define arch_links [ -e include/ia64/asm-xsi-offsets.h ] || ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/ia64/asm-xsi-offsets.h @@ -122,6 +123,7 @@ clean: rm -f *.o *~ core $(TARGET).elf $(TARGET).raw $(TARGET) $(TARGET).gz rm -f libminios.a find . -type l | xargs rm -f + rm -f tags TAGS %.o: %.c $(HDRS) Makefile $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ @@ -137,4 +139,7 @@ cscope: cscope: $(all_sources) > cscope.files cscope -k -b -q - + +.PHONY: tags +tags: + $(all_sources) | xargs ctags diff -r ee4397571e44 -r 223470316756 extras/mini-os/README --- a/extras/mini-os/README Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/README Wed Nov 29 14:16:36 2006 -0600 @@ -26,5 +26,5 @@ Stuff it doesn't show: - to start it do the following in domain0 (assuming xend is running) # xm create domain_config -this starts the kernel and prints out a bunch of stuff and then every -1000 timer interrupts the system time. +this starts the kernel and prints out a bunch of stuff and then once +every second the system time. diff -r ee4397571e44 -r 223470316756 extras/mini-os/events.c --- a/extras/mini-os/events.c Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/events.c Wed Nov 29 14:16:36 2006 -0600 @@ -35,6 +35,21 @@ static ev_action_t ev_actions[NR_EVS]; static ev_action_t ev_actions[NR_EVS]; void default_handler(evtchn_port_t port, struct pt_regs *regs, void *data); +void unbind_all_ports(void) +{ + int i; + + for(i=0;i<NR_EVS;i++) + { + if(ev_actions[i].handler != default_handler) + { + struct evtchn_close close; + mask_evtchn(i); + close.port = i; + HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + } + } +} /* * Demux events to different handlers. @@ -88,19 +103,18 @@ void unbind_evtchn(evtchn_port_t port ) int bind_virq(uint32_t virq, evtchn_handler_t handler, void *data) { - evtchn_op_t op; + evtchn_bind_virq_t op; /* Try to bind the virq to a port */ - op.cmd = EVTCHNOP_bind_virq; - op.u.bind_virq.virq = virq; - op.u.bind_virq.vcpu = smp_processor_id(); + op.virq = virq; + op.vcpu = smp_processor_id(); - if ( HYPERVISOR_event_channel_op(&op) != 0 ) + if ( HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &op) != 0 ) { printk("Failed to bind virtual IRQ %d\n", virq); return 1; } - bind_evtchn(op.u.bind_virq.port, handler, data); + bind_evtchn(op.port, handler, data); return 0; } @@ -151,14 +165,13 @@ int evtchn_alloc_unbound(domid_t pal, ev int evtchn_alloc_unbound(domid_t pal, evtchn_handler_t handler, void *data, evtchn_port_t *port) { - evtchn_op_t op; - op.cmd = EVTCHNOP_alloc_unbound; - op.u.alloc_unbound.dom = DOMID_SELF; - op.u.alloc_unbound.remote_dom = pal; - int err = HYPERVISOR_event_channel_op(&op); + evtchn_alloc_unbound_t op; + op.dom = DOMID_SELF; + op.remote_dom = pal; + int err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op); if (err) return err; - *port = bind_evtchn(op.u.alloc_unbound.port, handler, data); + *port = bind_evtchn(op.port, handler, data); return err; } @@ -169,14 +182,13 @@ int evtchn_bind_interdomain(domid_t pal, evtchn_handler_t handler, void *data, evtchn_port_t *local_port) { - evtchn_op_t op; - op.cmd = EVTCHNOP_bind_interdomain; - op.u.bind_interdomain.remote_dom = pal; - op.u.bind_interdomain.remote_port = remote_port; - int err = HYPERVISOR_event_channel_op(&op); + evtchn_bind_interdomain_t op; + op.remote_dom = pal; + op.remote_port = remote_port; + int err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &op); if (err) return err; - evtchn_port_t port = op.u.bind_interdomain.local_port; + evtchn_port_t port = op.local_port; clear_evtchn(port); /* Without, handler gets invoked now! */ *local_port = bind_evtchn(port, handler, data); return err; diff -r ee4397571e44 -r 223470316756 extras/mini-os/include/events.h --- a/extras/mini-os/include/events.h Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/include/events.h Wed Nov 29 14:16:36 2006 -0600 @@ -20,7 +20,7 @@ #define _EVENTS_H_ #include<traps.h> -#include <xen/event_channel.h> +#include<xen/event_channel.h> typedef void (*evtchn_handler_t)(evtchn_port_t, struct pt_regs *, void *); @@ -39,10 +39,9 @@ int evtchn_bind_interdomain(domid_t pal, static inline int notify_remote_via_evtchn(evtchn_port_t port) { - evtchn_op_t op; - op.cmd = EVTCHNOP_send; - op.u.send.port = port; - return HYPERVISOR_event_channel_op(&op); + evtchn_send_t op; + op.port = port; + return HYPERVISOR_event_channel_op(EVTCHNOP_send, &op); } diff -r ee4397571e44 -r 223470316756 extras/mini-os/include/mm.h --- a/extras/mini-os/include/mm.h Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/include/mm.h Wed Nov 29 14:16:36 2006 -0600 @@ -29,182 +29,15 @@ #include <xen/arch-x86_32.h> #elif defined(__x86_64__) #include <xen/arch-x86_64.h> +#elif defined(__ia64__) +#include <xen/arch-ia64.h> #else #error "Unsupported architecture" #endif #include <lib.h> +#include <arch_mm.h> -#define L1_FRAME 1 -#define L2_FRAME 2 -#define L3_FRAME 3 - -#define L1_PAGETABLE_SHIFT 12 - -#if defined(__i386__) - -#if !defined(CONFIG_X86_PAE) - -#define L2_PAGETABLE_SHIFT 22 - -#define L1_PAGETABLE_ENTRIES 1024 -#define L2_PAGETABLE_ENTRIES 1024 - -#define PADDR_BITS 32 -#define PADDR_MASK (~0UL) - -#define NOT_L1_FRAMES 1 -#define PRIpte "08lx" -typedef unsigned long pgentry_t; - -#else /* defined(CONFIG_X86_PAE) */ - -#define L2_PAGETABLE_SHIFT 21 -#define L3_PAGETABLE_SHIFT 30 - -#define L1_PAGETABLE_ENTRIES 512 -#define L2_PAGETABLE_ENTRIES 512 -#define L3_PAGETABLE_ENTRIES 4 - -#define PADDR_BITS 44 -#define PADDR_MASK ((1ULL << PADDR_BITS)-1) - -#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) - -/* - * If starting from virtual address greater than 0xc0000000, - * this value will be 2 to account for final mid-level page - * directory which is always mapped in at this location. - */ -#define NOT_L1_FRAMES 3 -#define PRIpte "016llx" -typedef uint64_t pgentry_t; - -#endif /* !defined(CONFIG_X86_PAE) */ - -#elif defined(__x86_64__) - -#define L2_PAGETABLE_SHIFT 21 -#define L3_PAGETABLE_SHIFT 30 -#define L4_PAGETABLE_SHIFT 39 - -#define L1_PAGETABLE_ENTRIES 512 -#define L2_PAGETABLE_ENTRIES 512 -#define L3_PAGETABLE_ENTRIES 512 -#define L4_PAGETABLE_ENTRIES 512 - -/* These are page-table limitations. Current CPUs support only 40-bit phys. */ -#define PADDR_BITS 52 -#define VADDR_BITS 48 -#define PADDR_MASK ((1UL << PADDR_BITS)-1) -#define VADDR_MASK ((1UL << VADDR_BITS)-1) - -#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) -#define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1) - -#define NOT_L1_FRAMES 3 -#define PRIpte "016lx" -typedef unsigned long pgentry_t; - -#endif - -#define L1_MASK ((1UL << L2_PAGETABLE_SHIFT) - 1) - -/* Given a virtual address, get an entry offset into a page table. */ -#define l1_table_offset(_a) \ - (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) -#define l2_table_offset(_a) \ - (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) -#if defined(__x86_64__) || defined(CONFIG_X86_PAE) -#define l3_table_offset(_a) \ - (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) -#endif -#if defined(__x86_64__) -#define l4_table_offset(_a) \ - (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) -#endif - -#define _PAGE_PRESENT 0x001UL -#define _PAGE_RW 0x002UL -#define _PAGE_USER 0x004UL -#define _PAGE_PWT 0x008UL -#define _PAGE_PCD 0x010UL -#define _PAGE_ACCESSED 0x020UL -#define _PAGE_DIRTY 0x040UL -#define _PAGE_PAT 0x080UL -#define _PAGE_PSE 0x080UL -#define _PAGE_GLOBAL 0x100UL - -#if defined(__i386__) -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) -#if defined(CONFIG_X86_PAE) -#define L3_PROT (_PAGE_PRESENT) -#endif /* CONFIG_X86_PAE */ -#elif defined(__x86_64__) -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) -#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) -#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) -#endif /* __i386__ || __x86_64__ */ - -#ifndef CONFIG_X86_PAE -#define PAGE_SIZE (1UL << L1_PAGETABLE_SHIFT) -#else -#define PAGE_SIZE (1ULL << L1_PAGETABLE_SHIFT) -#endif -#define PAGE_SHIFT L1_PAGETABLE_SHIFT -#define PAGE_MASK (~(PAGE_SIZE-1)) - -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT) -#define PFN_DOWN(x) ((x) >> L1_PAGETABLE_SHIFT) -#define PFN_PHYS(x) ((x) << L1_PAGETABLE_SHIFT) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) - -/* Definitions for machine and pseudophysical addresses. */ -#ifdef CONFIG_X86_PAE -typedef unsigned long long paddr_t; -typedef unsigned long long maddr_t; -#else -typedef unsigned long paddr_t; -typedef unsigned long maddr_t; -#endif - -extern unsigned long *phys_to_machine_mapping; -extern char _text, _etext, _edata, _end; -#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) -static __inline__ maddr_t phys_to_machine(paddr_t phys) -{ - maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); - machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); - return machine; -} - -#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) -static __inline__ paddr_t machine_to_phys(maddr_t machine) -{ - paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); - phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); - return phys; -} - -#define VIRT_START ((unsigned long)&_text) - -#define to_phys(x) ((unsigned long)(x)-VIRT_START) -#define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START)) - -#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) -#define virt_to_mfn(_virt) (pfn_to_mfn(virt_to_pfn(_virt))) -#define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) -#define virt_to_mach(_virt) (phys_to_machine(to_phys(_virt))) -#define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT)) -#define pfn_to_virt(_pfn) (to_virt((_pfn) << PAGE_SHIFT)) - -/* Pagetable walking. */ -#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> L1_PAGETABLE_SHIFT) -#define pte_to_virt(_pte) to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << PAGE_SHIFT) void init_mm(void); unsigned long alloc_pages(int order); @@ -220,6 +53,8 @@ static __inline__ int get_order(unsigned return order; } +void arch_init_demand_mapping_area(unsigned long max_pfn); +void arch_init_mm(unsigned long* start_pfn_p, unsigned long* max_pfn_p); void *map_frames(unsigned long *f, unsigned long n); diff -r ee4397571e44 -r 223470316756 extras/mini-os/include/sched.h --- a/extras/mini-os/include/sched.h Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/include/sched.h Wed Nov 29 14:16:36 2006 -0600 @@ -2,39 +2,46 @@ #define __SCHED_H__ #include <list.h> +#include <time.h> +#include <arch_sched.h> struct thread { char *name; char *stack; +#if !defined(__ia64__) unsigned long sp; /* Stack pointer */ unsigned long ip; /* Instruction pointer */ +#else /* !defined(__ia64__) */ + thread_regs_t regs; +#endif /* !defined(__ia64__) */ struct list_head thread_list; u32 flags; + s_time_t wakeup_time; }; +extern struct thread *idle_thread; +void idle_thread_fn(void *unused); +#define RUNNABLE_FLAG 0x00000001 + +#define is_runnable(_thread) (_thread->flags & RUNNABLE_FLAG) +#define set_runnable(_thread) (_thread->flags |= RUNNABLE_FLAG) +#define clear_runnable(_thread) (_thread->flags &= ~RUNNABLE_FLAG) + +#define switch_threads(prev, next) arch_switch_threads(prev, next) + void init_sched(void); void run_idle_thread(void); struct thread* create_thread(char *name, void (*function)(void *), void *data); void schedule(void); -static inline struct thread* get_current(void) -{ - struct thread **current; -#ifdef __i386__ - __asm__("andl %%esp,%0; ":"=r" (current) : "r" (~8191UL)); -#else - __asm__("andq %%rsp,%0; ":"=r" (current) : "r" (~8191UL)); -#endif - return *current; -} - #define current get_current() void wake(struct thread *thread); void block(struct thread *thread); +void sleep(u32 millisecs); #endif /* __SCHED_H__ */ diff -r ee4397571e44 -r 223470316756 extras/mini-os/include/time.h --- a/extras/mini-os/include/time.h Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/include/time.h Wed Nov 29 14:16:36 2006 -0600 @@ -7,8 +7,9 @@ * File: time.h * Author: Rolf Neugebauer (neugebar@xxxxxxxxxxxxx) * Changes: Grzegorz Milos (gm281@xxxxxxxxx) + * Robert Kaiser (kaiser@xxxxxxxxxxxxxxxxxxxxxxxxxx) * - * Date: Jul 2003, changesJun 2005 + * Date: Jul 2003, changes: Jun 2005, Sep 2006 * * Environment: Xen Minimal OS * Description: Time and timer functions @@ -57,7 +58,8 @@ void init_time(void); void init_time(void); s_time_t get_s_time(void); s_time_t get_v_time(void); +u64 monotonic_clock(void); void gettimeofday(struct timeval *tv); -void block_domain(u32 millisecs); +void block_domain(s_time_t until); #endif /* _TIME_H_ */ diff -r ee4397571e44 -r 223470316756 extras/mini-os/include/x86/os.h --- a/extras/mini-os/include/x86/os.h Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/include/x86/os.h Wed Nov 29 14:16:36 2006 -0600 @@ -18,6 +18,8 @@ #ifndef __ASSEMBLY__ #include <types.h> #include <hypervisor.h> + +#define USED __attribute__ ((used)) extern void do_exit(void); #define BUG do_exit @@ -60,6 +62,11 @@ extern shared_info_t *HYPERVISOR_shared_ extern shared_info_t *HYPERVISOR_shared_info; void trap_init(void); + +void arch_init(start_info_t *si); +void arch_print_info(void); + + diff -r ee4397571e44 -r 223470316756 extras/mini-os/include/x86/x86_32/hypercall-x86_32.h --- a/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Wed Nov 29 14:16:36 2006 -0600 @@ -167,7 +167,7 @@ HYPERVISOR_fpu_taskswitch( static inline int HYPERVISOR_sched_op( - int cmd, unsigned long arg) + int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); } @@ -238,9 +238,9 @@ HYPERVISOR_update_va_mapping( static inline int HYPERVISOR_event_channel_op( - void *op) -{ - return _hypercall1(int, event_channel_op, op); + int cmd, void *op) +{ + return _hypercall2(int, event_channel_op, cmd, op); } static inline int diff -r ee4397571e44 -r 223470316756 extras/mini-os/include/x86/x86_64/hypercall-x86_64.h --- a/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Wed Nov 29 14:16:36 2006 -0600 @@ -171,7 +171,7 @@ HYPERVISOR_fpu_taskswitch( static inline int HYPERVISOR_sched_op( - int cmd, unsigned long arg) + int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); } @@ -235,9 +235,9 @@ HYPERVISOR_update_va_mapping( static inline int HYPERVISOR_event_channel_op( - void *op) -{ - return _hypercall1(int, event_channel_op, op); + int cmd, void *op) +{ + return _hypercall2(int, event_channel_op, cmd, op); } static inline int diff -r ee4397571e44 -r 223470316756 extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/kernel.c Wed Nov 29 14:16:36 2006 -0600 @@ -6,6 +6,7 @@ * * Copyright (c) 2002-2003, K A Fraser & R Neugebauer * Copyright (c) 2005, Grzegorz Milos, Intel Research Cambridge + * Copyright (c) 2006, Robert Kaiser, FH Wiesbaden * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -39,49 +40,6 @@ #include <xen/features.h> #include <xen/version.h> -/* - * Shared page for communicating with the hypervisor. - * Events flags go here, for example. - */ -shared_info_t *HYPERVISOR_shared_info; - -/* - * This structure contains start-of-day info, such as pagetable base pointer, - * address of the shared_info structure, and things like that. - */ -union start_info_union start_info_union; - -/* - * Just allocate the kernel stack here. SS:ESP is set up to point here - * in head.S. - */ -char stack[8192]; - - -/* Assembler interface fns in entry.S. */ -void hypervisor_callback(void); -void failsafe_callback(void); - -extern char shared_info[PAGE_SIZE]; - -#if !defined(CONFIG_X86_PAE) -#define __pte(x) ((pte_t) { (x) } ) -#else -#define __pte(x) ({ unsigned long long _x = (x); \ - ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) -#endif - -static shared_info_t *map_shared_info(unsigned long pa) -{ - if ( HYPERVISOR_update_va_mapping( - (unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG) ) - { - printk("Failed to map shared_info!!\n"); - do_exit(); - } - return (shared_info_t *)shared_info; -} - u8 xen_features[XENFEAT_NR_SUBMAPS * 32]; @@ -109,11 +67,24 @@ void xenbus_tester(void *p) /* test_xenbus(); */ } +void periodic_thread(void *p) +{ + struct timeval tv; + printk("Periodic thread started.\n"); + for(;;) + { + gettimeofday(&tv); + printk("T(s=%ld us=%ld)\n", tv.tv_sec, tv.tv_usec); + sleep(1000); + } +} + /* This should be overridden by the application we are linked against. */ __attribute__((weak)) int app_main(start_info_t *si) { printk("Dummy main: start_info=%p\n", si); create_thread("xenbus_tester", xenbus_tester, si); + create_thread("periodic_thread", periodic_thread, si); return 0; } @@ -126,32 +97,10 @@ void start_kernel(start_info_t *si) (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(hello), hello); - /* Copy the start_info struct to a globally-accessible area. */ - /* WARN: don't do printk before here, it uses information from - shared_info. Use xprintk instead. */ - memcpy(&start_info, si, sizeof(*si)); - - /* set up minimal memory infos */ - phys_to_machine_mapping = (unsigned long *)start_info.mfn_list; + arch_init(si); - /* Grab the shared_info pointer and put it in a safe place. */ - HYPERVISOR_shared_info = map_shared_info(start_info.shared_info); - - /* Set up event and failsafe callback addresses. */ -#ifdef __i386__ - HYPERVISOR_set_callbacks( - __KERNEL_CS, (unsigned long)hypervisor_callback, - __KERNEL_CS, (unsigned long)failsafe_callback); -#else - HYPERVISOR_set_callbacks( - (unsigned long)hypervisor_callback, - (unsigned long)failsafe_callback, 0); -#endif trap_init(); - /* ENABLE EVENT DELIVERY. This is disabled at start of day. */ - __sti(); - /* print out some useful information */ printk("Xen Minimal OS!\n"); printk("start_info: %p\n", si); @@ -163,16 +112,20 @@ void start_kernel(start_info_t *si) printk(" flags: 0x%x\n", (unsigned int)si->flags); printk(" cmd_line: %s\n", si->cmd_line ? (const char *)si->cmd_line : "NULL"); - printk(" stack: %p-%p\n", stack, stack + 8192); + + /* Set up events. */ + init_events(); + + /* ENABLE EVENT DELIVERY. This is disabled at start of day. */ + __sti(); + + arch_print_info(); setup_xen_features(); /* Init memory management. */ init_mm(); - /* Set up events. */ - init_events(); - /* Init time and timers. */ init_time(); @@ -206,5 +159,9 @@ void do_exit(void) void do_exit(void) { printk("Do_exit called!\n"); - for ( ;; ) HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_crash); + for( ;; ) + { + struct sched_shutdown sched_shutdown = { .reason = SHUTDOWN_crash }; + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + } } diff -r ee4397571e44 -r 223470316756 extras/mini-os/mm.c --- a/extras/mini-os/mm.c Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/mm.c Wed Nov 29 14:16:36 2006 -0600 @@ -48,10 +48,6 @@ #define DEBUG(_f, _a...) ((void)0) #endif -unsigned long *phys_to_machine_mapping; -extern char *stack; -extern void page_walk(unsigned long virt_addr); - /********************* * ALLOCATION BITMAP * One bit per page of memory. Bit set => page is allocated. @@ -148,7 +144,7 @@ static chunk_head_t free_tail[FREELIST_ * Prints allocation[0/1] for @nr_pages, starting at @start * address (virtual). */ -static void print_allocation(void *start, int nr_pages) +USED static void print_allocation(void *start, int nr_pages) { unsigned long pfn_start = virt_to_pfn(start); int count; @@ -163,7 +159,7 @@ static void print_allocation(void *start * Prints chunks (making them with letters) for @nr_pages starting * at @start (virtual). */ -static void print_chunks(void *start, int nr_pages) +USED static void print_chunks(void *start, int nr_pages) { char chunks[1001], current='A'; int order, count; @@ -226,11 +222,11 @@ static void init_page_allocator(unsigned /* All allocated by default. */ memset(alloc_bitmap, ~0, bitmap_size); /* Free up the memory we've been given to play with. */ - map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT); + map_free(PHYS_PFN(min), range>>PAGE_SHIFT); /* The buddy lists are addressed in high memory. */ - min += VIRT_START; - max += VIRT_START; + min = (unsigned long) to_virt(min); + max = (unsigned long) to_virt(max); while ( range != 0 ) { @@ -297,7 +293,7 @@ unsigned long alloc_pages(int order) free_head[i] = spare_ch; } - map_alloc(to_phys(alloc_ch)>>PAGE_SHIFT, 1<<order); + map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1<<order); return((unsigned long)alloc_ch); @@ -365,353 +361,6 @@ void free_pages(void *pointer, int order } -void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, - unsigned long offset, unsigned long level) -{ - pgentry_t *tab = (pgentry_t *)start_info.pt_base; - unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); - unsigned long prot_e, prot_t, pincmd; - mmu_update_t mmu_updates[1]; - struct mmuext_op pin_request; - - DEBUG("Allocating new L%d pt frame for pt_pfn=%lx, " - "prev_l_mfn=%lx, offset=%lx", - level, *pt_pfn, prev_l_mfn, offset); - - /* We need to clear the page, otherwise we might fail to map it - as a page table page */ - memset((unsigned long*)pfn_to_virt(*pt_pfn), 0, PAGE_SIZE); - - switch ( level ) - { - case L1_FRAME: - prot_e = L1_PROT; - prot_t = L2_PROT; - pincmd = MMUEXT_PIN_L1_TABLE; - break; -#if defined(__x86_64__) || defined(CONFIG_X86_PAE) - case L2_FRAME: - prot_e = L2_PROT; - prot_t = L3_PROT; - pincmd = MMUEXT_PIN_L2_TABLE; - break; -#endif -#if defined(__x86_64__) - case L3_FRAME: - prot_e = L3_PROT; - prot_t = L4_PROT; - pincmd = MMUEXT_PIN_L3_TABLE; - break; -#endif - default: - printk("new_pt_frame() called with invalid level number %d\n", level); - do_exit(); - break; - } - - /* Update the entry */ -#if defined(__x86_64__) - tab = pte_to_virt(tab[l4_table_offset(pt_page)]); - tab = pte_to_virt(tab[l3_table_offset(pt_page)]); -#endif -#if defined(CONFIG_X86_PAE) - tab = pte_to_virt(tab[l3_table_offset(pt_page)]); -#endif - - mmu_updates[0].ptr = ((pgentry_t)tab[l2_table_offset(pt_page)] & PAGE_MASK) + - sizeof(pgentry_t) * l1_table_offset(pt_page); - mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | - (prot_e & ~_PAGE_RW); - if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0) - { - printk("PTE for new page table page could not be updated\n"); - do_exit(); - } - - /* Pin the page to provide correct protection */ - pin_request.cmd = pincmd; - pin_request.arg1.mfn = pfn_to_mfn(*pt_pfn); - if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0) - { - printk("ERROR: pinning failed\n"); - do_exit(); - } - - /* Now fill the new page table page with entries. - Update the page directory as well. */ - mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; - mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t; - if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0) - { - printk("ERROR: mmu_update failed\n"); - do_exit(); - } - - *pt_pfn += 1; -} - -/* Checks if a pagetable frame is needed (if weren't allocated by Xen) */ -static int need_pt_frame(unsigned long virt_address, int level) -{ - unsigned long hyp_virt_start = HYPERVISOR_VIRT_START; -#if defined(__x86_64__) - unsigned long hyp_virt_end = HYPERVISOR_VIRT_END; -#else - unsigned long hyp_virt_end = 0xffffffff; -#endif - - /* In general frames will _not_ be needed if they were already - allocated to map the hypervisor into our VA space */ -#if defined(__x86_64__) - if(level == L3_FRAME) - { - if(l4_table_offset(virt_address) >= - l4_table_offset(hyp_virt_start) && - l4_table_offset(virt_address) <= - l4_table_offset(hyp_virt_end)) - return 0; - return 1; - } else -#endif - -#if defined(__x86_64__) || defined(CONFIG_X86_PAE) - if(level == L2_FRAME) - { -#if defined(__x86_64__) - if(l4_table_offset(virt_address) >= - l4_table_offset(hyp_virt_start) && - l4_table_offset(virt_address) <= - l4_table_offset(hyp_virt_end)) -#endif - if(l3_table_offset(virt_address) >= - l3_table_offset(hyp_virt_start) && - l3_table_offset(virt_address) <= - l3_table_offset(hyp_virt_end)) - return 0; - - return 1; - } else -#endif /* defined(__x86_64__) || defined(CONFIG_X86_PAE) */ - - /* Always need l1 frames */ - if(level == L1_FRAME) - return 1; - - printk("ERROR: Unknown frame level %d, hypervisor %llx,%llx\n", - level, hyp_virt_start, hyp_virt_end); - return -1; -} - -void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn) -{ - unsigned long start_address, end_address; - unsigned long pfn_to_map, pt_pfn = *start_pfn; - static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; - pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; - unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); - unsigned long offset; - int count = 0; - - pfn_to_map = (start_info.nr_pt_frames - NOT_L1_FRAMES) * L1_PAGETABLE_ENTRIES; - - if (*max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START)) - { - printk("WARNING: Mini-OS trying to use Xen virtual space. " - "Truncating memory from %dMB to ", - ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); - *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE); - printk("%dMB\n", - ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); - } - - start_address = (unsigned long)pfn_to_virt(pfn_to_map); - end_address = (unsigned long)pfn_to_virt(*max_pfn); - - /* We worked out the virtual memory range to map, now mapping loop */ - printk("Mapping memory range 0x%lx - 0x%lx\n", start_address, end_address); - - while(start_address < end_address) - { - tab = (pgentry_t *)start_info.pt_base; - mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); - -#if defined(__x86_64__) - offset = l4_table_offset(start_address); - /* Need new L3 pt frame */ - if(!(start_address & L3_MASK)) - if(need_pt_frame(start_address, L3_FRAME)) - new_pt_frame(&pt_pfn, mfn, offset, L3_FRAME); - - page = tab[offset]; - mfn = pte_to_mfn(page); - tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); -#endif -#if defined(__x86_64__) || defined(CONFIG_X86_PAE) - offset = l3_table_offset(start_address); - /* Need new L2 pt frame */ - if(!(start_address & L2_MASK)) - if(need_pt_frame(start_address, L2_FRAME)) - new_pt_frame(&pt_pfn, mfn, offset, L2_FRAME); - - page = tab[offset]; - mfn = pte_to_mfn(page); - tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); -#endif - offset = l2_table_offset(start_address); - /* Need new L1 pt frame */ - if(!(start_address & L1_MASK)) - if(need_pt_frame(start_address, L1_FRAME)) - new_pt_frame(&pt_pfn, mfn, offset, L1_FRAME); - - page = tab[offset]; - mfn = pte_to_mfn(page); - offset = l1_table_offset(start_address); - - mmu_updates[count].ptr = ((pgentry_t)mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; - mmu_updates[count].val = (pgentry_t)pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT; - count++; - if (count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn) - { - if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0) - { - printk("PTE could not be updated\n"); - do_exit(); - } - count = 0; - } - start_address += PAGE_SIZE; - } - - *start_pfn = pt_pfn; -} - - -void mem_test(unsigned long *start_add, unsigned long *end_add) -{ - unsigned long mask = 0x10000; - unsigned long *pointer; - - for(pointer = start_add; pointer < end_add; pointer++) - { - if(!(((unsigned long)pointer) & 0xfffff)) - { - printk("Writing to %lx\n", pointer); - page_walk((unsigned long)pointer); - } - *pointer = (unsigned long)pointer & ~mask; - } - - for(pointer = start_add; pointer < end_add; pointer++) - { - if(((unsigned long)pointer & ~mask) != *pointer) - printk("Read error at 0x%lx. Read: 0x%lx, should read 0x%lx\n", - (unsigned long)pointer, - *pointer, - ((unsigned long)pointer & ~mask)); - } - -} - -static pgentry_t *demand_map_pgt; -static void *demand_map_area_start; - -static void init_demand_mapping_area(unsigned long max_pfn) -{ - unsigned long mfn; - pgentry_t *tab; - unsigned long start_addr; - unsigned long pt_pfn; - unsigned offset; - - /* Round up to four megs. + 1024 rather than + 1023 since we want - to be sure we don't end up in the same place we started. */ - max_pfn = (max_pfn + L1_PAGETABLE_ENTRIES) & ~(L1_PAGETABLE_ENTRIES - 1); - if (max_pfn == 0 || - (unsigned long)pfn_to_virt(max_pfn + L1_PAGETABLE_ENTRIES) >= - HYPERVISOR_VIRT_START) { - printk("Too much memory; no room for demand map hole.\n"); - do_exit(); - } - - demand_map_area_start = pfn_to_virt(max_pfn); - printk("Demand map pfns start at %lx (%p).\n", max_pfn, - demand_map_area_start); - start_addr = (unsigned long)demand_map_area_start; - - tab = (pgentry_t *)start_info.pt_base; - mfn = virt_to_mfn(start_info.pt_base); - pt_pfn = virt_to_pfn(alloc_page()); - -#if defined(__x86_64__) - offset = l4_table_offset(start_addr); - if (!(tab[offset] & _PAGE_PRESENT)) { - new_pt_frame(&pt_pfn, mfn, offset, L3_FRAME); - pt_pfn = virt_to_pfn(alloc_page()); - } - ASSERT(tab[offset] & _PAGE_PRESENT); - mfn = pte_to_mfn(tab[offset]); - tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); -#endif -#if defined(__x86_64__) || defined(CONFIG_X86_PAE) - offset = l3_table_offset(start_addr); - if (!(tab[offset] & _PAGE_PRESENT)) { - new_pt_frame(&pt_pfn, mfn, offset, L2_FRAME); - pt_pfn = virt_to_pfn(alloc_page()); - } - ASSERT(tab[offset] & _PAGE_PRESENT); - mfn = pte_to_mfn(tab[offset]); - tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); -#endif - offset = l2_table_offset(start_addr); - if (tab[offset] & _PAGE_PRESENT) { - printk("Demand map area already has a page table covering it?\n"); - BUG(); - } - demand_map_pgt = pfn_to_virt(pt_pfn); - new_pt_frame(&pt_pfn, mfn, offset, L1_FRAME); - ASSERT(tab[offset] & _PAGE_PRESENT); -} - -void *map_frames(unsigned long *f, unsigned long n) -{ - unsigned long x; - unsigned long y = 0; - mmu_update_t mmu_updates[16]; - int rc; - - if (n > 16) { - printk("Tried to map too many (%ld) frames at once.\n", n); - return NULL; - } - - /* Find a run of n contiguous frames */ - for (x = 0; x <= 1024 - n; x += y + 1) { - for (y = 0; y < n; y++) - if (demand_map_pgt[x+y] & _PAGE_PRESENT) - break; - if (y == n) - break; - } - if (y != n) { - printk("Failed to map %ld frames!\n", n); - return NULL; - } - - /* Found it at x. Map it in. */ - for (y = 0; y < n; y++) { - mmu_updates[y].ptr = virt_to_mach(&demand_map_pgt[x + y]); - mmu_updates[y].val = (f[y] << PAGE_SHIFT) | L1_PROT; - } - - rc = HYPERVISOR_mmu_update(mmu_updates, n, NULL, DOMID_SELF); - if (rc < 0) { - printk("Map %ld failed: %d.\n", n, rc); - return NULL; - } else { - return (void *)(unsigned long)((unsigned long)demand_map_area_start + - x * PAGE_SIZE); - } -} void init_mm(void) { @@ -720,22 +369,7 @@ void init_mm(void) printk("MM: Init\n"); - printk(" _text: %p\n", &_text); - printk(" _etext: %p\n", &_etext); - printk(" _edata: %p\n", &_edata); - printk(" stack start: %p\n", &stack); - printk(" _end: %p\n", &_end); - - /* First page follows page table pages and 3 more pages (store page etc) */ - start_pfn = PFN_UP(to_phys(start_info.pt_base)) + - start_info.nr_pt_frames + 3; - max_pfn = start_info.nr_pages; - - printk(" start_pfn: %lx\n", start_pfn); - printk(" max_pfn: %lx\n", max_pfn); - - build_pagetable(&start_pfn, &max_pfn); - + arch_init_mm(&start_pfn, &max_pfn); /* * now we can initialise the page allocator */ @@ -745,8 +379,7 @@ void init_mm(void) init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn)); printk("MM: done\n"); - init_demand_mapping_area(max_pfn); - printk("Initialised demand area.\n"); + arch_init_demand_mapping_area(max_pfn); } void sanity_check(void) diff -r ee4397571e44 -r 223470316756 extras/mini-os/sched.c --- a/extras/mini-os/sched.c Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/sched.c Wed Nov 29 14:16:36 2006 -0600 @@ -5,7 +5,7 @@ * * File: sched.c * Author: Grzegorz Milos - * Changes: + * Changes: Robert Kaiser * * Date: Aug 2005 * @@ -54,81 +54,8 @@ #define DEBUG(_f, _a...) ((void)0) #endif - -#define RUNNABLE_FLAG 0x00000001 - -#define is_runnable(_thread) (_thread->flags & RUNNABLE_FLAG) -#define set_runnable(_thread) (_thread->flags |= RUNNABLE_FLAG) -#define clear_runnable(_thread) (_thread->flags &= ~RUNNABLE_FLAG) - - struct thread *idle_thread = NULL; LIST_HEAD(exited_threads); - -void idle_thread_fn(void *unused); - -void dump_stack(struct thread *thread) -{ - unsigned long *bottom = (unsigned long *)(thread->stack + 2*4*1024); - unsigned long *pointer = (unsigned long *)thread->sp; - int count; - if(thread == current) - { -#ifdef __i386__ - asm("movl %%esp,%0" - : "=r"(pointer)); -#else - asm("movq %%rsp,%0" - : "=r"(pointer)); -#endif - } - printk("The stack for \"%s\"\n", thread->name); - for(count = 0; count < 25 && pointer < bottom; count ++) - { - printk("[0x%lx] 0x%lx\n", pointer, *pointer); - pointer++; - } - - if(pointer < bottom) printk(" ... continues.\n"); -} - -#ifdef __i386__ -#define switch_threads(prev, next) do { \ - unsigned long esi,edi; \ - __asm__ __volatile__("pushfl\n\t" \ - "pushl %%ebp\n\t" \ - "movl %%esp,%0\n\t" /* save ESP */ \ - "movl %4,%%esp\n\t" /* restore ESP */ \ - "movl $1f,%1\n\t" /* save EIP */ \ - "pushl %5\n\t" /* restore EIP */ \ - "ret\n\t" \ - "1:\t" \ - "popl %%ebp\n\t" \ - "popfl" \ - :"=m" (prev->sp),"=m" (prev->ip), \ - "=S" (esi),"=D" (edi) \ - :"m" (next->sp),"m" (next->ip), \ - "2" (prev), "d" (next)); \ -} while (0) -#elif __x86_64__ -#define switch_threads(prev, next) do { \ - unsigned long rsi,rdi; \ - __asm__ __volatile__("pushfq\n\t" \ - "pushq %%rbp\n\t" \ - "movq %%rsp,%0\n\t" /* save RSP */ \ - "movq %4,%%rsp\n\t" /* restore RSP */ \ - "movq $1f,%1\n\t" /* save RIP */ \ - "pushq %5\n\t" /* restore RIP */ \ - "ret\n\t" \ - "1:\t" \ - "popq %%rbp\n\t" \ - "popfq" \ - :"=m" (prev->sp),"=m" (prev->ip), \ - "=S" (rsi),"=D" (rdi) \ - :"m" (next->sp),"m" (next->ip), \ - "2" (prev), "d" (next)); \ -} while (0) -#endif void inline print_runqueue(void) { @@ -142,6 +69,54 @@ void inline print_runqueue(void) printk("\n"); } +/* Find the time when the next timeout expires. If this is more than + 10 seconds from now, return 10 seconds from now. */ +static s_time_t blocking_time(void) +{ + struct thread *thread; + struct list_head *iterator; + s_time_t min_wakeup_time; + unsigned long flags; + local_irq_save(flags); + /* default-block the domain for 10 seconds: */ + min_wakeup_time = NOW() + SECONDS(10); + + /* Thread list needs to be protected */ + list_for_each(iterator, &idle_thread->thread_list) + { + thread = list_entry(iterator, struct thread, thread_list); + if(!is_runnable(thread) && thread->wakeup_time != 0LL) + { + if(thread->wakeup_time < min_wakeup_time) + { + min_wakeup_time = thread->wakeup_time; + } + } + } + local_irq_restore(flags); + return(min_wakeup_time); +} + +/* Wake up all threads with expired timeouts. */ +static void wake_expired(void) +{ + struct thread *thread; + struct list_head *iterator; + s_time_t now = NOW(); + unsigned long flags; + local_irq_save(flags); + /* Thread list needs to be protected */ + list_for_each(iterator, &idle_thread->thread_list) + { + thread = list_entry(iterator, struct thread, thread_list); + if(!is_runnable(thread) && thread->wakeup_time != 0LL) + { + if(thread->wakeup_time <= now) + wake(thread); + } + } + local_irq_restore(flags); +} void schedule(void) { @@ -202,87 +177,38 @@ void exit_thread(void) schedule(); } -/* Pushes the specified value onto the stack of the specified thread */ -static void stack_push(struct thread *thread, unsigned long value) -{ - thread->sp -= sizeof(unsigned long); - *((unsigned long *)thread->sp) = value; -} - -struct thread* create_thread(char *name, void (*function)(void *), void *data) -{ - struct thread *thread; - unsigned long flags; - - thread = xmalloc(struct thread); - /* Allocate 2 pages for stack, stack will be 2pages aligned */ - thread->stack = (char *)alloc_pages(1); - thread->name = name; - printk("Thread \"%s\": pointer: 0x%lx, stack: 0x%lx\n", name, thread, - thread->stack); - - thread->sp = (unsigned long)thread->stack + 4096 * 2; - /* Save pointer to the thread on the stack, used by current macro */ - *((unsigned long *)thread->stack) = (unsigned long)thread; - - stack_push(thread, (unsigned long) function); - stack_push(thread, (unsigned long) data); - thread->ip = (unsigned long) thread_starter; - - /* Not runable, not exited */ - thread->flags = 0; +void block(struct thread *thread) +{ + thread->wakeup_time = 0LL; + clear_runnable(thread); +} + +void sleep(u32 millisecs) +{ + struct thread *thread = get_current(); + thread->wakeup_time = NOW() + MILLISECS(millisecs); + clear_runnable(thread); + schedule(); +} + +void wake(struct thread *thread) +{ + thread->wakeup_time = 0LL; set_runnable(thread); - local_irq_save(flags); - if(idle_thread != NULL) { - list_add_tail(&thread->thread_list, &idle_thread->thread_list); - } else if(function != idle_thread_fn) - { - printk("BUG: Not allowed to create thread before initialising scheduler.\n"); - BUG(); - } - local_irq_restore(flags); - return thread; -} - - -void block(struct thread *thread) -{ - clear_runnable(thread); -} - -void wake(struct thread *thread) -{ - set_runnable(thread); } void idle_thread_fn(void *unused) { - for(;;) - { - schedule(); - block_domain(10000); - } -} - -void run_idle_thread(void) -{ - /* Switch stacks and run the thread */ -#if defined(__i386__) - __asm__ __volatile__("mov %0,%%esp\n\t" - "push %1\n\t" - "ret" - :"=m" (idle_thread->sp) - :"m" (idle_thread->ip)); -#elif defined(__x86_64__) - __asm__ __volatile__("mov %0,%%rsp\n\t" - "push %1\n\t" - "ret" - :"=m" (idle_thread->sp) - :"m" (idle_thread->ip)); -#endif -} - - + s_time_t until; + for(;;) + { + schedule(); + /* block until the next timeout expires, or for 10 secs, whichever comes first */ + until = blocking_time(); + block_domain(until); + wake_expired(); + } +} DECLARE_MUTEX(mutex); diff -r ee4397571e44 -r 223470316756 extras/mini-os/time.c --- a/extras/mini-os/time.c Wed Aug 09 15:38:37 2006 -0400 +++ b/extras/mini-os/time.c Wed Nov 29 14:16:36 2006 -0600 @@ -3,6 +3,7 @@ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge * (C) 2002-2003 - Keir Fraser - University of Cambridge * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + * (C) 2006 - Robert Kaiser - FH Wiesbaden **************************************************************************** * * File: time.c @@ -194,21 +195,15 @@ void gettimeofday(struct timeval *tv) } -static void print_current_time(void) -{ - struct timeval tv; - - gettimeofday(&tv); - printk("T(s=%ld us=%ld)\n", tv.tv_sec, tv.tv_usec); -} - - -void block_domain(u32 millisecs) +void block_domain(s_time_t until) { struct timeval tv; gettimeofday(&tv); - HYPERVISOR_set_timer_op(monotonic_clock() + 1000000LL * (s64) millisecs); - HYPERVISOR_sched_op(SCHEDOP_block, 0); + if(monotonic_clock() < until) + { + HYPERVISOR_set_timer_op(until); + HYPERVISOR_sched_op(SCHEDOP_block, 0); + } } @@ -217,15 +212,8 @@ void block_domain(u32 millisecs) */ static void timer_handler(evtchn_port_t ev, struct pt_regs *regs, void *ign) { - static int i; - get_time_values_from_xen(); update_wallclock(); - i++; - if (i >= 1000) { - print_current_time(); - i = 0; - } } diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/i386/Kconfig Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Wed Nov 29 14:16:36 2006 -0600 @@ -789,6 +789,9 @@ config DOUBLEFAULT endmenu +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + depends on HIGHMEM menu "Power management options (ACPI, APM)" depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST) diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/kernel/fixup.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Wed Nov 29 14:16:36 2006 -0600 @@ -46,6 +46,9 @@ fastcall void do_fixup_4gb_segment(struc if (test_and_set_bit(0, &printed)) return; + if (current->tgid == 1) /* Ignore statically linked init */ + return; + HYPERVISOR_vm_assist( VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify); diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Wed Nov 29 14:16:36 2006 -0600 @@ -9,7 +9,7 @@ #include <asm/page.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> -#include <xen/interface/arch-x86_32.h> +#include <xen/interface/xen.h> #include <xen/interface/elfnote.h> /* @@ -192,6 +192,7 @@ ENTRY(cpu_gdt_table) #endif /* !CONFIG_XEN_COMPAT_030002 */ ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel") #ifdef CONFIG_X86_PAE ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes") diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -50,9 +50,6 @@ MODULE_LICENSE("GPL"); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DECLARE_MUTEX(microcode_sem); - -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ static int microcode_open (struct inode *unused1, struct file *unused2) { @@ -60,21 +57,26 @@ static int microcode_open (struct inode } -static int do_microcode_update (void) +static int do_microcode_update (const void __user *ubuf, size_t len) { int err; - dom0_op_t op; + void *kbuf; - err = sys_mlock((unsigned long)user_buffer, user_buffer_size); - if (err != 0) - return err; + kbuf = vmalloc(len); + if (!kbuf) + return -ENOMEM; - op.cmd = DOM0_MICROCODE; - set_xen_guest_handle(op.u.microcode.data, user_buffer); - op.u.microcode.length = user_buffer_size; - err = HYPERVISOR_dom0_op(&op); + if (copy_from_user(kbuf, ubuf, len) == 0) { + dom0_op_t op; - (void)sys_munlock((unsigned long)user_buffer, user_buffer_size); + op.cmd = DOM0_MICROCODE; + set_xen_guest_handle(op.u.microcode.data, kbuf); + op.u.microcode.length = len; + err = HYPERVISOR_dom0_op(&op); + } else + err = -EFAULT; + + vfree(kbuf); return err; } @@ -88,17 +90,9 @@ static ssize_t microcode_write (struct f return -EINVAL; } - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - down(µcode_sem); - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); + ret = do_microcode_update(buf, len); if (!ret) ret = (ssize_t)len; diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -65,6 +65,7 @@ #include <xen/interface/physdev.h> #include <xen/interface/memory.h> #include <xen/features.h> +#include <xen/xencons.h> #include "setup_arch_pre.h" #include <bios_ebda.h> @@ -155,6 +156,9 @@ EXPORT_SYMBOL(ist_info); EXPORT_SYMBOL(ist_info); #endif struct e820map e820; +#ifdef CONFIG_XEN +struct e820map machine_e820; +#endif extern void early_cpu_init(void); extern void generic_apic_probe(char *); @@ -1450,7 +1454,6 @@ static void __init register_memory(void) static void __init register_memory(void) { #ifdef CONFIG_XEN - struct e820entry *machine_e820; struct xen_memory_map memmap; #endif int i; @@ -1460,14 +1463,14 @@ static void __init register_memory(void) return; #ifdef CONFIG_XEN - machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE); - memmap.nr_entries = E820MAX; - set_xen_guest_handle(memmap.buffer, machine_e820); - - BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)); - - legacy_init_iomem_resources(machine_e820, memmap.nr_entries, + set_xen_guest_handle(memmap.buffer, machine_e820.map); + + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) + BUG(); + machine_e820.nr_map = memmap.nr_entries; + + legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map, &code_resource, &data_resource); #else if (efi_enabled) @@ -1485,8 +1488,7 @@ static void __init register_memory(void) request_resource(&ioport_resource, &standard_io_resources[i]); #ifdef CONFIG_XEN - e820_setup_gap(machine_e820, memmap.nr_entries); - free_bootmem(__pa(machine_e820), PAGE_SIZE); + e820_setup_gap(machine_e820.map, machine_e820.nr_map); #else e820_setup_gap(e820.map, e820.nr_map); #endif @@ -1665,33 +1667,15 @@ void __init setup_arch(char **cmdline_p) screen_info.orig_video_cols = 80; screen_info.orig_video_ega_bx = 3; screen_info.orig_video_points = 16; + screen_info.orig_y = screen_info.orig_video_lines - 1; if (xen_start_info->console.dom0.info_size >= sizeof(struct dom0_vga_console_info)) { const struct dom0_vga_console_info *info = (struct dom0_vga_console_info *)( (char *)xen_start_info + xen_start_info->console.dom0.info_off); - screen_info.orig_video_mode = info->txt_mode; - screen_info.orig_video_isVGA = info->video_type; - screen_info.orig_video_lines = info->video_height; - screen_info.orig_video_cols = info->video_width; - screen_info.orig_video_points = info->txt_points; - screen_info.lfb_width = info->video_width; - screen_info.lfb_height = info->video_height; - screen_info.lfb_depth = info->lfb_depth; - screen_info.lfb_base = info->lfb_base; - screen_info.lfb_size = info->lfb_size; - screen_info.lfb_linelength = info->lfb_linelen; - screen_info.red_size = info->red_size; - screen_info.red_pos = info->red_pos; - screen_info.green_size = info->green_size; - screen_info.green_pos = info->green_pos; - screen_info.blue_size = info->blue_size; - screen_info.blue_pos = info->blue_pos; - screen_info.rsvd_size = info->rsvd_size; - screen_info.rsvd_pos = info->rsvd_pos; - } - screen_info.orig_y = screen_info.orig_video_lines - 1; + dom0_init_screen_info(info); + } xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; } else diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c Wed Nov 29 14:16:36 2006 -0600 @@ -60,7 +60,7 @@ int __init sysenter_setup(void) #ifdef CONFIG_XEN if (boot_cpu_has(X86_FEATURE_SEP)) { - struct callback_register sysenter = { + static struct callback_register __initdata sysenter = { .type = CALLBACKTYPE_sysenter, .address = { __KERNEL_CS, (unsigned long)sysenter_entry }, }; diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -716,6 +716,7 @@ irqreturn_t timer_interrupt(int irq, voi rcu_check_callbacks(cpu, user_mode(regs)); scheduler_tick(); run_posix_cpu_timers(current); + profile_tick(CPU_PROFILING, regs); return IRQ_HANDLED; } diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -282,12 +282,6 @@ static int spurious_fault(struct pt_regs pmd_t *pmd; pte_t *pte; -#ifdef CONFIG_XEN - /* Faults in hypervisor area are never spurious. */ - if (address >= HYPERVISOR_VIRT_START) - return 0; -#endif - /* Reserved-bit violation or user access to kernel space? */ if (error_code & 0x0c) return 0; @@ -372,7 +366,7 @@ fastcall void __kprobes do_page_fault(st if (unlikely(address >= TASK_SIZE)) { #ifdef CONFIG_XEN /* Faults in hypervisor area can never be patched up. */ - if (address >= HYPERVISOR_VIRT_START) + if (address >= hypervisor_virt_start) goto bad_area_nosemaphore; #endif if (!(error_code & 5)) diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Wed Nov 29 14:16:36 2006 -0600 @@ -98,18 +98,6 @@ void xen_l4_entry_update(pgd_t *ptr, pgd BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } #endif /* CONFIG_X86_64 */ - -void xen_machphys_update(unsigned long mfn, unsigned long pfn) -{ - mmu_update_t u; - if (xen_feature(XENFEAT_auto_translated_physmap)) { - BUG_ON(pfn != mfn); - return; - } - u.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; - u.val = pfn; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); -} void xen_pt_switch(unsigned long ptr) { @@ -325,6 +313,7 @@ int xen_create_contiguous_region( success = (exchange.nr_exchanged == (1UL << order)); BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); BUG_ON(success && (rc != 0)); +#ifdef CONFIG_XEN_COMPAT_030002 if (unlikely(rc == -ENOSYS)) { /* Compatibility when XENMEM_exchange is unsupported. */ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, @@ -341,6 +330,7 @@ int xen_create_contiguous_region( BUG(); } } +#endif /* 3. Map the new extent in place of old pages. */ for (i = 0; i < (1UL<<order); i++) { @@ -419,6 +409,7 @@ void xen_destroy_contiguous_region(unsig success = (exchange.nr_exchanged == 1); BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); BUG_ON(success && (rc != 0)); +#ifdef CONFIG_XEN_COMPAT_030002 if (unlikely(rc == -ENOSYS)) { /* Compatibility when XENMEM_exchange is unsupported. */ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, @@ -429,6 +420,7 @@ void xen_destroy_contiguous_region(unsig BUG(); success = 1; } +#endif /* 4. Map new pages in place of old pages. */ for (i = 0; i < (1UL<<order); i++) { diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -130,7 +130,7 @@ static void __init page_table_range_init pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { - if (vaddr < HYPERVISOR_VIRT_START && pmd_none(*pmd)) + if (vaddr < hypervisor_virt_start && pmd_none(*pmd)) one_page_table_init(pmd); vaddr += PMD_SIZE; @@ -187,7 +187,7 @@ static void __init kernel_physical_mappi pmd += pmd_idx; for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; - if (address >= HYPERVISOR_VIRT_START) + if (address >= hypervisor_virt_start) continue; /* Map with big pages if possible, otherwise create normal page tables. */ @@ -410,7 +410,7 @@ static void __init pagetable_init (void) * created - mappings will be set by set_fixmap(): */ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - page_table_range_init(vaddr, 0, pgd_base); + page_table_range_init(vaddr, hypervisor_virt_start, pgd_base); permanent_kmaps_init(pgd_base); } @@ -663,8 +663,8 @@ void __init mem_init(void) totalram_pages += free_all_bootmem(); /* XEN: init and count low-mem pages outside initial allocation. */ for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) { - ClearPageReserved(&mem_map[pfn]); - set_page_count(&mem_map[pfn], 1); + ClearPageReserved(pfn_to_page(pfn)); + set_page_count(pfn_to_page(pfn), 1); totalram_pages++; } diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -28,6 +28,8 @@ static int direct_remap_area_pte_fn(pte_ void *data) { mmu_update_t **v = (mmu_update_t **)data; + + BUG_ON(!pte_none(*pte)); (*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pmd_page)) << PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK); @@ -110,11 +112,13 @@ int direct_remap_pfn_range(struct vm_are pgprot_t prot, domid_t domid) { - /* Same as remap_pfn_range(). */ - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return remap_pfn_range(vma, address, mfn, size, prot); if (domid == DOMID_SELF) return -EINVAL; + + vma->vm_flags |= VM_IO | VM_RESERVED; vma->vm_mm->context.has_foreign_mappings = 1; @@ -245,7 +249,7 @@ void __iomem * __ioremap(unsigned long p return NULL; area->phys_addr = phys_addr; addr = (void __iomem *) area->addr; - flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; + flags |= _KERNPG_TABLE; if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr, phys_addr>>PAGE_SHIFT, size, __pgprot(flags), domid)) { diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -102,8 +102,11 @@ static void set_pte_pfn(unsigned long va return; } pte = pte_offset_kernel(pmd, vaddr); - /* <pfn,flags> stored as-is, to permit clearing entries */ - set_pte(pte, pfn_pte(pfn, flags)); + if (pgprot_val(flags)) + /* <pfn,flags> stored as-is, to permit clearing entries */ + set_pte(pte, pfn_pte(pfn, flags)); + else + pte_clear(&init_mm, vaddr, pte); /* * It's enough to flush this one mapping. @@ -140,8 +143,11 @@ static void set_pte_pfn_ma(unsigned long return; } pte = pte_offset_kernel(pmd, vaddr); - /* <pfn,flags> stored as-is, to permit clearing entries */ - set_pte(pte, pfn_pte_ma(pfn, flags)); + if (pgprot_val(flags)) + /* <pfn,flags> stored as-is, to permit clearing entries */ + set_pte(pte, pfn_pte_ma(pfn, flags)); + else + pte_clear(&init_mm, vaddr, pte); /* * It's enough to flush this one mapping. @@ -186,8 +192,15 @@ void set_pmd_pfn(unsigned long vaddr, un } static int nr_fixmaps = 0; +unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START; unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE); EXPORT_SYMBOL(__FIXADDR_TOP); + +void __init set_fixaddr_top() +{ + BUG_ON(nr_fixmaps > 0); + __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE; +} void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags) { @@ -209,12 +222,6 @@ void __set_fixmap (enum fixed_addresses break; } nr_fixmaps++; -} - -void set_fixaddr_top(unsigned long top) -{ - BUG_ON(nr_fixmaps > 0); - __FIXADDR_TOP = top - PAGE_SIZE; } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/oprofile/Makefile --- a/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile Wed Nov 29 14:16:36 2006 -0600 @@ -7,7 +7,10 @@ DRIVER_OBJS = $(addprefix ../../../drive timer_int.o ) ifdef CONFIG_XEN -oprofile-y := $(DRIVER_OBJS) xenoprof.o +XENOPROF_COMMON_OBJS = $(addprefix ../../../drivers/xen/xenoprof/, \ + xenoprofile.o) +oprofile-y := $(DRIVER_OBJS) \ + $(XENOPROF_COMMON_OBJS) xenoprof.o else oprofile-y := $(DRIVER_OBJS) init.o backtrace.o oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c --- a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Wed Nov 29 14:16:36 2006 -0600 @@ -9,249 +9,83 @@ * Modified by Aravind Menon and Jose Renato Santos for Xen * These modifications are: * Copyright (C) 2005 Hewlett-Packard Co. + * + * x86-specific part + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. */ #include <linux/init.h> -#include <linux/notifier.h> -#include <linux/smp.h> #include <linux/oprofile.h> -#include <linux/sysdev.h> -#include <linux/slab.h> -#include <linux/interrupt.h> -#include <linux/vmalloc.h> -#include <asm/nmi.h> -#include <asm/msr.h> -#include <asm/apic.h> +#include <linux/sched.h> #include <asm/pgtable.h> -#include <xen/evtchn.h> -#include "op_counter.h" #include <xen/driver_util.h> #include <xen/interface/xen.h> #include <xen/interface/xenoprof.h> -#include <../../../drivers/oprofile/cpu_buffer.h> -#include <../../../drivers/oprofile/event_buffer.h> +#include <xen/xenoprof.h> +#include "op_counter.h" -#define MAX_XENOPROF_SAMPLES 16 +static unsigned int num_events = 0; -static int xenoprof_start(void); -static void xenoprof_stop(void); - -static int xenoprof_enabled = 0; -static unsigned int num_events = 0; -static int is_primary = 0; -static int active_defined; - -/* sample buffers shared with Xen */ -xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS]; -/* Shared buffer area */ -char * shared_buffer = NULL; -/* Number of buffers in shared area (one per VCPU) */ -int nbuf; -/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */ -int ovf_irq[NR_CPUS]; -/* cpu model type string - copied from Xen memory space on XENOPROF_init command */ -char cpu_type[XENOPROF_CPU_TYPE_SIZE]; - -/* Passive sample buffers shared with Xen */ -xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS]; -/* Passive shared buffer area */ -char *p_shared_buffer[MAX_OPROF_DOMAINS]; - -#ifdef CONFIG_PM - -static int xenoprof_suspend(struct sys_device * dev, pm_message_t state) +void __init xenoprof_arch_init_counter(struct xenoprof_init *init) { - if (xenoprof_enabled == 1) - xenoprof_stop(); - return 0; -} - - -static int xenoprof_resume(struct sys_device * dev) -{ - if (xenoprof_enabled == 1) - xenoprof_start(); - return 0; -} - - -static struct sysdev_class oprofile_sysclass = { - set_kset_name("oprofile"), - .resume = xenoprof_resume, - .suspend = xenoprof_suspend -}; - - -static struct sys_device device_oprofile = { - .id = 0, - .cls = &oprofile_sysclass, -}; - - -static int __init init_driverfs(void) -{ - int error; - if (!(error = sysdev_class_register(&oprofile_sysclass))) - error = sysdev_register(&device_oprofile); - return error; -} - - -static void __exit exit_driverfs(void) -{ - sysdev_unregister(&device_oprofile); - sysdev_class_unregister(&oprofile_sysclass); -} - -#else -#define init_driverfs() do { } while (0) -#define exit_driverfs() do { } while (0) -#endif /* CONFIG_PM */ - -unsigned long long oprofile_samples = 0; -unsigned long long p_oprofile_samples = 0; - -unsigned int pdomains; -struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS]; - -static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive) -{ - int head, tail, size; - - head = buf->event_head; - tail = buf->event_tail; - size = buf->event_size; - - if (tail > head) { - while (tail < size) { - oprofile_add_pc(buf->event_log[tail].eip, - buf->event_log[tail].mode, - buf->event_log[tail].event); - if (!is_passive) - oprofile_samples++; - else - p_oprofile_samples++; - tail++; - } - tail = 0; - } - while (tail < head) { - oprofile_add_pc(buf->event_log[tail].eip, - buf->event_log[tail].mode, - buf->event_log[tail].event); - if (!is_passive) - oprofile_samples++; - else - p_oprofile_samples++; - tail++; - } - - buf->event_tail = tail; -} - -static void xenoprof_handle_passive(void) -{ - int i, j; - int flag_domain, flag_switch = 0; - - for (i = 0; i < pdomains; i++) { - flag_domain = 0; - for (j = 0; j < passive_domains[i].nbuf; j++) { - xenoprof_buf_t *buf = p_xenoprof_buf[i][j]; - if (buf->event_head == buf->event_tail) - continue; - if (!flag_domain) { - if (!oprofile_add_domain_switch(passive_domains[i]. - domain_id)) - goto done; - flag_domain = 1; - } - xenoprof_add_pc(buf, 1); - flag_switch = 1; - } - } -done: - if (flag_switch) - oprofile_add_domain_switch(COORDINATOR_DOMAIN); -} - -static irqreturn_t -xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs) -{ - struct xenoprof_buf * buf; - int cpu; - static unsigned long flag; - - cpu = smp_processor_id(); - buf = xenoprof_buf[cpu]; - - xenoprof_add_pc(buf, 0); - - if (is_primary && !test_and_set_bit(0, &flag)) { - xenoprof_handle_passive(); - smp_mb__before_clear_bit(); - clear_bit(0, &flag); - } - - return IRQ_HANDLED; -} - - -static void unbind_virq(void) -{ - int i; - - for_each_cpu(i) { - if (ovf_irq[i] >= 0) { - unbind_from_irqhandler(ovf_irq[i], NULL); - ovf_irq[i] = -1; - } + num_events = init->num_events; + /* just in case - make sure we do not overflow event list + (i.e. counter_config list) */ + if (num_events > OP_MAX_COUNTER) { + num_events = OP_MAX_COUNTER; + init->num_events = num_events; } } +void xenoprof_arch_counter(void) +{ + int i; + struct xenoprof_counter counter; -static int bind_virq(void) -{ - int i, result; - - for_each_cpu(i) { - result = bind_virq_to_irqhandler(VIRQ_XENOPROF, - i, - xenoprof_ovf_interrupt, - SA_INTERRUPT, - "xenoprof", - NULL); - - if (result < 0) { - unbind_virq(); - return result; - } - - ovf_irq[i] = result; + for (i=0; i<num_events; i++) { + counter.ind = i; + counter.count = (uint64_t)counter_config[i].count; + counter.enabled = (uint32_t)counter_config[i].enabled; + counter.event = (uint32_t)counter_config[i].event; + counter.kernel = (uint32_t)counter_config[i].kernel; + counter.user = (uint32_t)counter_config[i].user; + counter.unit_mask = (uint64_t)counter_config[i].unit_mask; + HYPERVISOR_xenoprof_op(XENOPROF_counter, + &counter); } - - return 0; } +void xenoprof_arch_start(void) +{ + /* nothing */ +} -static int map_xenoprof_buffer(int max_samples) +void xenoprof_arch_stop(void) { - struct xenoprof_get_buffer get_buffer; - struct xenoprof_buf *buf; - int npages, ret, i; + /* nothing */ +} + +void xenoprof_arch_unmap_shared_buffer(struct xenoprof_shared_buffer * sbuf) +{ + if (sbuf->buffer) { + vunmap(sbuf->buffer); + sbuf->buffer = NULL; + } +} + +int xenoprof_arch_map_shared_buffer(struct xenoprof_get_buffer * get_buffer, + struct xenoprof_shared_buffer * sbuf) +{ + int npages, ret; struct vm_struct *area; - if ( shared_buffer ) - return 0; - - get_buffer.max_samples = max_samples; - - if ( (ret = HYPERVISOR_xenoprof_op(XENOPROF_get_buffer, &get_buffer)) ) + sbuf->buffer = NULL; + if ( (ret = HYPERVISOR_xenoprof_op(XENOPROF_get_buffer, get_buffer)) ) return ret; - nbuf = get_buffer.nbuf; - npages = (get_buffer.bufsize * nbuf - 1) / PAGE_SIZE + 1; + npages = (get_buffer->bufsize * get_buffer->nbuf - 1) / PAGE_SIZE + 1; area = alloc_vm_area(npages * PAGE_SIZE); if (area == NULL) @@ -259,231 +93,55 @@ static int map_xenoprof_buffer(int max_s if ( (ret = direct_kernel_remap_pfn_range( (unsigned long)area->addr, - get_buffer.buf_maddr >> PAGE_SHIFT, - npages * PAGE_SIZE, __pgprot(_KERNPG_TABLE), DOMID_SELF)) ) { + get_buffer->buf_gmaddr >> PAGE_SHIFT, + npages * PAGE_SIZE, __pgprot(_KERNPG_TABLE), + DOMID_SELF)) ) { vunmap(area->addr); return ret; } - shared_buffer = area->addr; - for (i=0; i< nbuf; i++) { - buf = (struct xenoprof_buf*) - &shared_buffer[i * get_buffer.bufsize]; - BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); - xenoprof_buf[buf->vcpu_id] = buf; - } - - return 0; -} - - -static int xenoprof_setup(void) -{ - int ret; - int i; - - if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) ) - return ret; - - if ( (ret = bind_virq()) ) - return ret; - - if (is_primary) { - struct xenoprof_counter counter; - - /* Define dom0 as an active domain if not done yet */ - if (!active_defined) { - domid_t domid; - ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL); - if (ret) - goto err; - domid = 0; - ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid); - if (ret) - goto err; - active_defined = 1; - } - - ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL); - if (ret) - goto err; - for (i=0; i<num_events; i++) { - counter.ind = i; - counter.count = (uint64_t)counter_config[i].count; - counter.enabled = (uint32_t)counter_config[i].enabled; - counter.event = (uint32_t)counter_config[i].event; - counter.kernel = (uint32_t)counter_config[i].kernel; - counter.user = (uint32_t)counter_config[i].user; - counter.unit_mask = (uint64_t)counter_config[i].unit_mask; - HYPERVISOR_xenoprof_op(XENOPROF_counter, - &counter); - } - ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL); - - if (ret) - goto err; - } - - ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL); - if (ret) - goto err; - - xenoprof_enabled = 1; - return 0; - err: - unbind_virq(); + sbuf->buffer = area->addr; return ret; } - -static void xenoprof_shutdown(void) -{ - xenoprof_enabled = 0; - - HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL); - - if (is_primary) { - HYPERVISOR_xenoprof_op(XENOPROF_release_counters, NULL); - active_defined = 0; - } - - unbind_virq(); - -} - - -static int xenoprof_start(void) -{ - int ret = 0; - - if (is_primary) - ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL); - - return ret; -} - - -static void xenoprof_stop(void) -{ - if (is_primary) - HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL); -} - - -static int xenoprof_set_active(int * active_domains, - unsigned int adomains) -{ - int ret = 0; - int i; - int set_dom0 = 0; - domid_t domid; - - if (!is_primary) - return 0; - - if (adomains > MAX_OPROF_DOMAINS) - return -E2BIG; - - ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL); - if (ret) - return ret; - - for (i=0; i<adomains; i++) { - domid = active_domains[i]; - if (domid != active_domains[i]) { - ret = -EINVAL; - goto out; - } - ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid); - if (ret) - goto out; - if (active_domains[i] == 0) - set_dom0 = 1; - } - /* dom0 must always be active but may not be in the list */ - if (!set_dom0) { - domid = 0; - ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid); - } - -out: - if (ret) - HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL); - active_defined = !ret; - return ret; -} - -static int xenoprof_set_passive(int * p_domains, - unsigned int pdoms) +int xenoprof_arch_set_passive(struct xenoprof_passive * pdomain, + struct xenoprof_shared_buffer * sbuf) { int ret; - int i, j; int npages; - struct xenoprof_buf *buf; struct vm_struct *area; pgprot_t prot = __pgprot(_KERNPG_TABLE); - if (!is_primary) - return 0; + sbuf->buffer = NULL; + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, pdomain); + if (ret) + goto out; - if (pdoms > MAX_OPROF_DOMAINS) - return -E2BIG; + npages = (pdomain->bufsize * pdomain->nbuf - 1) / PAGE_SIZE + 1; - ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL); - if (ret) - return ret; - - for (i = 0; i < pdoms; i++) { - passive_domains[i].domain_id = p_domains[i]; - passive_domains[i].max_samples = 2048; - ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, - &passive_domains[i]); - if (ret) - goto out; - - npages = (passive_domains[i].bufsize * passive_domains[i].nbuf - 1) / PAGE_SIZE + 1; - - area = alloc_vm_area(npages * PAGE_SIZE); - if (area == NULL) { - ret = -ENOMEM; - goto out; - } - - ret = direct_kernel_remap_pfn_range( - (unsigned long)area->addr, - passive_domains[i].buf_maddr >> PAGE_SHIFT, - npages * PAGE_SIZE, prot, DOMID_SELF); - if (ret) { - vunmap(area->addr); - goto out; - } - - p_shared_buffer[i] = area->addr; - - for (j = 0; j < passive_domains[i].nbuf; j++) { - buf = (struct xenoprof_buf *) - &p_shared_buffer[i][j * passive_domains[i].bufsize]; - BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); - p_xenoprof_buf[i][buf->vcpu_id] = buf; - } - + area = alloc_vm_area(npages * PAGE_SIZE); + if (area == NULL) { + ret = -ENOMEM; + goto out; } - pdomains = pdoms; - return 0; + ret = direct_kernel_remap_pfn_range( + (unsigned long)area->addr, + pdomain->buf_gmaddr >> PAGE_SHIFT, + npages * PAGE_SIZE, prot, DOMID_SELF); + if (ret) { + vunmap(area->addr); + goto out; + } + sbuf->buffer = area->addr; out: - for (j = 0; j < i; j++) { - vunmap(p_shared_buffer[j]); - p_shared_buffer[j] = NULL; - } - - return ret; + return ret; } struct op_counter_config counter_config[OP_MAX_COUNTER]; -static int xenoprof_create_files(struct super_block * sb, struct dentry * root) +int xenoprof_create_files(struct super_block * sb, struct dentry * root) { unsigned int i; @@ -510,75 +168,12 @@ static int xenoprof_create_files(struct return 0; } - -struct oprofile_operations xenoprof_ops = { - .create_files = xenoprof_create_files, - .set_active = xenoprof_set_active, - .set_passive = xenoprof_set_passive, - .setup = xenoprof_setup, - .shutdown = xenoprof_shutdown, - .start = xenoprof_start, - .stop = xenoprof_stop -}; - - -/* in order to get driverfs right */ -static int using_xenoprof; - int __init oprofile_arch_init(struct oprofile_operations * ops) { - struct xenoprof_init init; - int ret, i; - - ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init); - - if (!ret) { - num_events = init.num_events; - is_primary = init.is_primary; - - /* just in case - make sure we do not overflow event list - (i.e. counter_config list) */ - if (num_events > OP_MAX_COUNTER) - num_events = OP_MAX_COUNTER; - - /* cpu_type is detected by Xen */ - cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0; - strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1); - xenoprof_ops.cpu_type = cpu_type; - - init_driverfs(); - using_xenoprof = 1; - *ops = xenoprof_ops; - - for (i=0; i<NR_CPUS; i++) - ovf_irq[i] = -1; - - active_defined = 0; - } - printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, " - "is_primary %d\n", ret, num_events, is_primary); - return ret; + return xenoprofile_init(ops); } - -void __exit oprofile_arch_exit(void) +void oprofile_arch_exit(void) { - int i; - - if (using_xenoprof) - exit_driverfs(); - - if (shared_buffer) { - vunmap(shared_buffer); - shared_buffer = NULL; - } - if (is_primary) { - for (i = 0; i < pdomains; i++) - if (p_shared_buffer[i]) { - vunmap(p_shared_buffer[i]); - p_shared_buffer[i] = NULL; - } - HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL); - } - + xenoprofile_exit(); } diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/ia64/Kconfig --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Wed Nov 29 14:16:36 2006 -0600 @@ -63,6 +63,20 @@ config XEN_IA64_VDSO_PARAVIRT default y help vDSO paravirtualization + +config XEN_IA64_EXPOSE_P2M + bool "Xen/IA64 exposure p2m table" + depends on XEN + default y + help + expose p2m from xen + +config XEN_IA64_EXPOSE_P2M_USE_DTR + bool "Xen/IA64 map p2m table with dtr" + depends on XEN_IA64_EXPOSE_P2M + default y + help + use dtr to map the exposed p2m table config SCHED_NO_NO_OMIT_FRAME_POINTER bool @@ -276,6 +290,9 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + config SCHED_SMT bool "SMT scheduler support" depends on SMP diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/ia64/dig/setup.c --- a/linux-2.6-xen-sparse/arch/ia64/dig/setup.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/dig/setup.c Wed Nov 29 14:16:36 2006 -0600 @@ -24,6 +24,8 @@ #include <asm/io.h> #include <asm/machvec.h> #include <asm/system.h> + +#include <xen/xencons.h> void __init dig_setup (char **cmdline_p) @@ -78,27 +80,8 @@ dig_setup (char **cmdline_p) (struct dom0_vga_console_info *)( (char *)xen_start_info + xen_start_info->console.dom0.info_off); - screen_info.orig_video_mode = info->txt_mode; - screen_info.orig_video_isVGA = info->video_type; - screen_info.orig_video_lines = info->video_height; - screen_info.orig_video_cols = info->video_width; - screen_info.orig_video_points = info->txt_points; - screen_info.lfb_width = info->video_width; - screen_info.lfb_height = info->video_height; - screen_info.lfb_depth = info->lfb_depth; - screen_info.lfb_base = info->lfb_base; - screen_info.lfb_size = info->lfb_size; - screen_info.lfb_linelength = info->lfb_linelen; - screen_info.red_size = info->red_size; - screen_info.red_pos = info->red_pos; - screen_info.green_size = info->green_size; - screen_info.green_pos = info->green_pos; - screen_info.blue_size = info->blue_size; - screen_info.blue_pos = info->blue_pos; - screen_info.rsvd_size = info->rsvd_size; - screen_info.rsvd_pos = info->rsvd_pos; + dom0_init_screen_info(info); } - screen_info.orig_y = screen_info.orig_video_lines - 1; xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; #endif diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S --- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S Wed Nov 29 14:16:36 2006 -0600 @@ -13,6 +13,7 @@ SECTIONS . = GATE_ADDR + SIZEOF_HEADERS; .hash : { *(.hash) } :readable + .gnu.hash : { *(.gnu.hash) } .dynsym : { *(.dynsym) } .dynstr : { *(.dynstr) } .gnu.version : { *(.gnu.version) } diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Wed Nov 29 14:16:36 2006 -0600 @@ -63,6 +63,7 @@ #include <asm/system.h> #ifdef CONFIG_XEN #include <asm/hypervisor.h> +#include <asm/xen/xencomm.h> #endif #include <linux/dma-mapping.h> @@ -433,6 +434,9 @@ setup_arch (char **cmdline_p) #ifdef CONFIG_XEN if (is_running_on_xen()) { + /* Must be done before any hypercall. */ + xencomm_init(); + setup_xen_features(); /* Register a call for panic conditions. */ notifier_chain_register(&panic_notifier_list, &xen_panic_block); diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/ia64/xen/Makefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Wed Nov 29 14:16:36 2006 -0600 @@ -3,6 +3,7 @@ # obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o \ - hypervisor.o pci-dma-xen.o util.o + hypervisor.o pci-dma-xen.o util.o xencomm.o xcom_hcall.o \ + xcom_mini.o xcom_privcmd.o pci-dma-xen-y := ../../i386/kernel/pci-dma-xen.o diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Wed Nov 29 14:16:36 2006 -0600 @@ -40,59 +40,11 @@ int running_on_xen; int running_on_xen; EXPORT_SYMBOL(running_on_xen); -//XXX xen/ia64 copy_from_guest() is broken. -// This is a temporal work around until it is fixed. -// used by balloon.c netfront.c - -// get_xen_guest_handle is defined only when __XEN_TOOLS__ is defined -// if the definition in arch-ia64.h is changed, this must be updated. -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) - -int -ia64_xenmem_reservation_op(unsigned long op, - struct xen_memory_reservation* reservation__) -{ - struct xen_memory_reservation reservation = *reservation__; - unsigned long* frame_list; - unsigned long nr_extents = reservation__->nr_extents; - int ret = 0; - get_xen_guest_handle(frame_list, reservation__->extent_start); - - BUG_ON(op != XENMEM_increase_reservation && - op != XENMEM_decrease_reservation && - op != XENMEM_populate_physmap); - - while (nr_extents > 0) { - int tmp_ret; - volatile unsigned long dummy; - - set_xen_guest_handle(reservation.extent_start, frame_list); - reservation.nr_extents = nr_extents; - - dummy = frame_list[0];// re-install tlb entry before hypercall - tmp_ret = ____HYPERVISOR_memory_op(op, &reservation); - if (tmp_ret < 0) { - if (ret == 0) { - ret = tmp_ret; - } - break; - } - if (tmp_ret == 0) { - //XXX dirty work around for skbuff_ctor() - // of a non-privileged domain, - if ((op == XENMEM_increase_reservation || - op == XENMEM_populate_physmap) && - !is_initial_xendomain() && - reservation.extent_order > 0) - return ret; - } - frame_list += tmp_ret; - nr_extents -= tmp_ret; - ret += tmp_ret; - } - return ret; -} -EXPORT_SYMBOL(ia64_xenmem_reservation_op); +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M +static int p2m_expose_init(void); +#else +#define p2m_expose_init() (-ENOSYS) +#endif //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear() // move those to lib/contiguous_bitmap? @@ -371,8 +323,6 @@ int int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) { - __u64 va1, va2, pa1, pa2; - if (cmd == GNTTABOP_map_grant_ref) { unsigned int i; for (i = 0; i < count; i++) { @@ -380,29 +330,7 @@ HYPERVISOR_grant_table_op(unsigned int c (struct gnttab_map_grant_ref*)uop + i); } } - va1 = (__u64)uop & PAGE_MASK; - pa1 = pa2 = 0; - if ((REGION_NUMBER(va1) == 5) && - ((va1 - KERNEL_START) >= KERNEL_TR_PAGE_SIZE)) { - pa1 = ia64_tpa(va1); - if (cmd <= GNTTABOP_transfer) { - static uint32_t uop_size[GNTTABOP_transfer + 1] = { - sizeof(struct gnttab_map_grant_ref), - sizeof(struct gnttab_unmap_grant_ref), - sizeof(struct gnttab_setup_table), - sizeof(struct gnttab_dump_table), - sizeof(struct gnttab_transfer), - }; - va2 = (__u64)uop + (uop_size[cmd] * count) - 1; - va2 &= PAGE_MASK; - if (va1 != va2) { - /* maximum size of uop is 2pages */ - BUG_ON(va2 > va1 + PAGE_SIZE); - pa2 = ia64_tpa(va2); - } - } - } - return ____HYPERVISOR_grant_table_op(cmd, uop, count, pa1, pa2); + return xencomm_mini_hypercall_grant_table_op(cmd, uop, count); } EXPORT_SYMBOL(HYPERVISOR_grant_table_op); @@ -526,6 +454,10 @@ out: privcmd_resource_min, privcmd_resource_max, (privcmd_resource_max - privcmd_resource_min) >> 20); BUG_ON(privcmd_resource_min >= privcmd_resource_max); + + // XXX this should be somewhere appropriate + (void)p2m_expose_init(); + return 0; } late_initcall(xen_ia64_privcmd_init); @@ -546,6 +478,7 @@ struct xen_ia64_privcmd_range { }; struct xen_ia64_privcmd_vma { + int is_privcmd_mmapped; struct xen_ia64_privcmd_range* range; unsigned long num_entries; @@ -684,12 +617,15 @@ static void static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma) { + struct xen_ia64_privcmd_vma* old_privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data; struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data; struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range; atomic_inc(&privcmd_range->ref_count); // vm_op->open() can't fail. privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL); + // copy original value if necessary + privcmd_vma->is_privcmd_mmapped = old_privcmd_vma->is_privcmd_mmapped; __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range); } @@ -722,6 +658,14 @@ xen_ia64_privcmd_vma_close(struct vm_are kfree(privcmd_range->res); vfree(privcmd_range); } +} + +int +privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) +{ + struct xen_ia64_privcmd_vma* privcmd_vma = + (struct xen_ia64_privcmd_vma *)vma->vm_private_data; + return (xchg(&privcmd_vma->is_privcmd_mmapped, 1) == 0); } int @@ -749,6 +693,8 @@ privcmd_mmap(struct file * file, struct if (privcmd_vma == NULL) { goto out_enomem1; } + privcmd_vma->is_privcmd_mmapped = 0; + res = kzalloc(sizeof(*res), GFP_KERNEL); if (res == NULL) { goto out_enomem1; @@ -831,3 +777,276 @@ time_resume(void) /* Just trigger a tick. */ ia64_cpu_local_tick(); } + +/////////////////////////////////////////////////////////////////////////// +// expose p2m table +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M +#include <linux/cpu.h> +#include <asm/uaccess.h> + +int p2m_initialized __read_mostly = 0; + +unsigned long p2m_min_low_pfn __read_mostly; +unsigned long p2m_max_low_pfn __read_mostly; +unsigned long p2m_convert_min_pfn __read_mostly; +unsigned long p2m_convert_max_pfn __read_mostly; + +static struct resource p2m_resource = { + .name = "Xen p2m table", + .flags = IORESOURCE_MEM, +}; +static unsigned long p2m_assign_start_pfn __read_mostly; +static unsigned long p2m_assign_end_pfn __read_mostly; +volatile const pte_t* p2m_pte __read_mostly; + +#define GRNULE_PFN PTRS_PER_PTE +static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN; + +#define ROUNDDOWN(x, y) ((x) & ~((y) - 1)) +#define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1)) + +#define P2M_PREFIX "Xen p2m: " + +static int xen_ia64_p2m_expose __read_mostly = 1; +module_param(xen_ia64_p2m_expose, int, 0); +MODULE_PARM_DESC(xen_ia64_p2m_expose, + "enable/disable xen/ia64 p2m exposure optimization\n"); + +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR +static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1; +module_param(xen_ia64_p2m_expose_use_dtr, int, 0); +MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr, + "use/unuse dtr to map exposed p2m table\n"); + +static const int p2m_page_shifts[] = { + _PAGE_SIZE_4K, + _PAGE_SIZE_8K, + _PAGE_SIZE_16K, + _PAGE_SIZE_64K, + _PAGE_SIZE_256K, + _PAGE_SIZE_1M, + _PAGE_SIZE_4M, + _PAGE_SIZE_16M, + _PAGE_SIZE_64M, + _PAGE_SIZE_256M, +}; + +struct p2m_itr_arg { + unsigned long vaddr; + unsigned long pteval; + unsigned long log_page_size; +}; +static struct p2m_itr_arg p2m_itr_arg __read_mostly; + +// This should be in asm-ia64/kregs.h +#define IA64_TR_P2M_TABLE 3 + +static void +p2m_itr(void* info) +{ + struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info; + ia64_itr(0x2, IA64_TR_P2M_TABLE, + arg->vaddr, arg->pteval, arg->log_page_size); + ia64_srlz_d(); +} + +static int +p2m_expose_dtr_call(struct notifier_block *self, + unsigned long event, void* ptr) +{ + unsigned int cpu = (unsigned int)(long)ptr; + if (event != CPU_ONLINE) + return 0; + if (!(p2m_initialized && xen_ia64_p2m_expose_use_dtr)) + smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg, 1, 1); + return 0; +} + +static struct notifier_block p2m_expose_dtr_hotplug_notifier = { + .notifier_call = p2m_expose_dtr_call, + .next = NULL, + .priority = 0 +}; +#endif + +static int +p2m_expose_init(void) +{ + unsigned long num_pfn; + unsigned long size = 0; + unsigned long p2m_size = 0; + unsigned long align = ~0UL; + int error = 0; +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + int i; + unsigned long page_size; + unsigned long log_page_size = 0; +#endif + + if (!xen_ia64_p2m_expose) + return -ENOSYS; + if (p2m_initialized) + return 0; + +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier); + if (error < 0) + return error; +#endif + + lock_cpu_hotplug(); + if (p2m_initialized) + goto out; + +#ifdef CONFIG_DISCONTIGMEM + p2m_min_low_pfn = min_low_pfn; + p2m_max_low_pfn = max_low_pfn; +#else + p2m_min_low_pfn = 0; + p2m_max_low_pfn = max_pfn; +#endif + +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + if (xen_ia64_p2m_expose_use_dtr) { + unsigned long granule_pfn = 0; + p2m_size = p2m_max_low_pfn - p2m_min_low_pfn; + for (i = 0; + i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]); + i++) { + log_page_size = p2m_page_shifts[i]; + page_size = 1UL << log_page_size; + if (page_size < p2m_size) + continue; + + granule_pfn = max(page_size >> PAGE_SHIFT, + p2m_granule_pfn); + p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn, + granule_pfn); + p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, + granule_pfn); + num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn; + size = num_pfn << PAGE_SHIFT; + p2m_size = num_pfn / PTRS_PER_PTE; + p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT); + if (p2m_size == page_size) + break; + } + if (p2m_size != page_size) { + printk(KERN_ERR "p2m_size != page_size\n"); + error = -EINVAL; + goto out; + } + align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT); + } else +#endif + { + BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1)); + p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn, + p2m_granule_pfn); + p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn); + num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn; + size = num_pfn << PAGE_SHIFT; + p2m_size = num_pfn / PTRS_PER_PTE; + p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT); + align = max(privcmd_resource_align, + p2m_granule_pfn << PAGE_SHIFT); + } + + // use privcmd region + error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size, + privcmd_resource_min, privcmd_resource_max, + align, NULL, NULL); + if (error) { + printk(KERN_ERR P2M_PREFIX + "can't allocate region for p2m exposure " + "[0x%016lx, 0x%016lx) 0x%016lx\n", + p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size); + goto out; + } + + p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT; + p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT; + + error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn, + p2m_assign_start_pfn, + size, p2m_granule_pfn); + if (error) { + printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n", + error); + printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx " + "size 0x%016lx granule 0x%016lx\n", + p2m_convert_min_pfn, p2m_assign_start_pfn, + size, p2m_granule_pfn);; + release_resource(&p2m_resource); + goto out; + } + p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn); +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + if (xen_ia64_p2m_expose_use_dtr) { + p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn + << PAGE_SHIFT); + p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn, + PAGE_KERNEL)); + p2m_itr_arg.log_page_size = log_page_size; + smp_mb(); + smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1); + p2m_itr(&p2m_itr_arg); + } +#endif + smp_mb(); + p2m_initialized = 1; + printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n", + p2m_convert_min_pfn << PAGE_SHIFT, + p2m_convert_max_pfn << PAGE_SHIFT); + printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n", + p2m_assign_start_pfn << PAGE_SHIFT, + p2m_assign_end_pfn << PAGE_SHIFT, + p2m_size / 1024); +out: + unlock_cpu_hotplug(); + return error; +} + +#ifdef notyet +void +p2m_expose_cleanup(void) +{ + BUG_ON(!p2m_initialized); +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier); +#endif + release_resource(&p2m_resource); +} +#endif + +//XXX inlinize? +unsigned long +p2m_phystomach(unsigned long gpfn) +{ + volatile const pte_t* pte; + unsigned long mfn; + unsigned long pteval; + + if (!p2m_initialized || + gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn + /* || !pfn_valid(gpfn) */) + return INVALID_MFN; + pte = p2m_pte + (gpfn - p2m_convert_min_pfn); + + mfn = INVALID_MFN; + if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 && + pte_present(__pte(pteval)) && + pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT))) + mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT; + + return mfn; +} + +EXPORT_SYMBOL_GPL(p2m_initialized); +EXPORT_SYMBOL_GPL(p2m_min_low_pfn); +EXPORT_SYMBOL_GPL(p2m_max_low_pfn); +EXPORT_SYMBOL_GPL(p2m_convert_min_pfn); +EXPORT_SYMBOL_GPL(p2m_convert_max_pfn); +EXPORT_SYMBOL_GPL(p2m_pte); +EXPORT_SYMBOL_GPL(p2m_phystomach); +#endif diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/ia64/xen/util.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/util.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/util.c Wed Nov 29 14:16:36 2006 -0600 @@ -28,6 +28,8 @@ #include <linux/vmalloc.h> #include <asm/uaccess.h> #include <xen/driver_util.h> +#include <xen/interface/memory.h> +#include <asm/hypercall.h> struct vm_struct *alloc_vm_area(unsigned long size) { diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Wed Nov 29 14:16:36 2006 -0600 @@ -23,12 +23,11 @@ GLOBAL_ENTRY(early_xen_setup) mov cr.iva=r10 -#if XSI_BASE != 0xf100000000000000UL - /* Backward compatibility. */ -(isBP) mov r2=0x600 + /* Set xsi base. */ +#define FW_HYPERCALL_SET_SHARED_INFO_VA 0x600 +(isBP) mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA (isBP) movl r28=XSI_BASE;; (isBP) break 0x1000;; -#endif br.ret.sptk.many rp ;; @@ -38,18 +37,18 @@ END(early_xen_setup) /* Stub for suspend. Just force the stacked registers to be written in memory. */ -GLOBAL_ENTRY(HYPERVISOR_suspend) +GLOBAL_ENTRY(xencomm_arch_hypercall_suspend) + mov r15=r32 + ;; alloc r20=ar.pfs,0,0,0,0 - mov r14=2 - mov r15=r12 - ;; + mov r2=__HYPERVISOR_sched_op + ;; /* We don't want to deal with RSE. */ flushrs - mov r2=__HYPERVISOR_sched_op - st4 [r12]=r14 + mov r14=2 // SCHEDOP_shutdown ;; break 0x1000 ;; mov ar.pfs=r20 br.ret.sptk.many b0 -END(HYPERVISOR_suspend) +END(xencomm_arch_hypercall_suspend) diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/x86_64/Kconfig --- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Wed Nov 29 14:16:36 2006 -0600 @@ -368,6 +368,8 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y config HPET_TIMER bool diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile Wed Nov 29 14:16:36 2006 -0600 @@ -4,6 +4,7 @@ obj-$(CONFIG_ACPI_SLEEP) += sleep.o wake ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += processor.o +processor-y := ../../../i386/kernel/acpi/processor.o ../../../i386/kernel/acpi/cstate.o endif boot-$(CONFIG_XEN) := ../../../i386/kernel/acpi/boot-xen.o diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Wed Nov 29 14:16:36 2006 -0600 @@ -316,12 +316,7 @@ tracesys: ja 1f movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) - movq %rax,RAX-ARGOFFSET(%rsp) -1: SAVE_REST - movq %rsp,%rdi - call syscall_trace_leave - RESTORE_TOP_OF_STACK %rbx - RESTORE_REST +1: movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ jmp int_ret_from_sys_call CFI_ENDPROC diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -350,7 +350,6 @@ static inline void set_32bit_tls(struct struct user_desc ud = { .base_addr = addr, .limit = 0xfffff, - .contents = (3 << 3), /* user */ .seg_32bit = 1, .limit_in_pages = 1, .useable = 1, diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -74,6 +74,7 @@ #include <asm/hypervisor.h> #include <xen/interface/nmi.h> #include <xen/features.h> +#include <xen/xencons.h> #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) #define PFN_PHYS(x) ((x) << PAGE_SHIFT) #include <asm/mach-xen/setup_arch_post.h> @@ -143,6 +144,9 @@ struct sys_desc_table_struct { struct edid_info edid_info; struct e820map e820; +#ifdef CONFIG_XEN +struct e820map machine_e820; +#endif extern int root_mountflags; @@ -625,7 +629,6 @@ void __init setup_arch(char **cmdline_p) void __init setup_arch(char **cmdline_p) { unsigned long kernel_end; - struct e820entry *machine_e820; struct xen_memory_map memmap; #ifdef CONFIG_XEN @@ -645,33 +648,15 @@ void __init setup_arch(char **cmdline_p) screen_info.orig_video_cols = 80; screen_info.orig_video_ega_bx = 3; screen_info.orig_video_points = 16; + screen_info.orig_y = screen_info.orig_video_lines - 1; if (xen_start_info->console.dom0.info_size >= sizeof(struct dom0_vga_console_info)) { const struct dom0_vga_console_info *info = (struct dom0_vga_console_info *)( (char *)xen_start_info + xen_start_info->console.dom0.info_off); - screen_info.orig_video_mode = info->txt_mode; - screen_info.orig_video_isVGA = info->video_type; - screen_info.orig_video_lines = info->video_height; - screen_info.orig_video_cols = info->video_width; - screen_info.orig_video_points = info->txt_points; - screen_info.lfb_width = info->video_width; - screen_info.lfb_height = info->video_height; - screen_info.lfb_depth = info->lfb_depth; - screen_info.lfb_base = info->lfb_base; - screen_info.lfb_size = info->lfb_size; - screen_info.lfb_linelength = info->lfb_linelen; - screen_info.red_size = info->red_size; - screen_info.red_pos = info->red_pos; - screen_info.green_size = info->green_size; - screen_info.green_pos = info->green_pos; - screen_info.blue_size = info->blue_size; - screen_info.blue_pos = info->blue_pos; - screen_info.rsvd_size = info->rsvd_size; - screen_info.rsvd_pos = info->rsvd_pos; - } - screen_info.orig_y = screen_info.orig_video_lines - 1; + dom0_init_screen_info(info); + } xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; } else @@ -936,14 +921,14 @@ void __init setup_arch(char **cmdline_p) probe_roms(); #ifdef CONFIG_XEN if (is_initial_xendomain()) { - machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE); - memmap.nr_entries = E820MAX; - set_xen_guest_handle(memmap.buffer, machine_e820); - - BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)); - - e820_reserve_resources(machine_e820, memmap.nr_entries); + set_xen_guest_handle(memmap.buffer, machine_e820.map); + + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) + BUG(); + machine_e820.nr_map = memmap.nr_entries; + + e820_reserve_resources(machine_e820.map, machine_e820.nr_map); } #else e820_reserve_resources(e820.map, e820.nr_map); @@ -959,10 +944,8 @@ void __init setup_arch(char **cmdline_p) } #ifdef CONFIG_XEN - if (is_initial_xendomain()) { - e820_setup_gap(machine_e820, memmap.nr_entries); - free_bootmem(__pa(machine_e820), PAGE_SIZE); - } + if (is_initial_xendomain()) + e820_setup_gap(machine_e820.map, machine_e820.nr_map); #else e820_setup_gap(e820.map, e820.nr_map); #endif diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -30,6 +30,7 @@ #include <linux/moduleparam.h> #include <linux/nmi.h> #include <linux/kprobes.h> +#include <linux/kexec.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -438,6 +439,8 @@ void __kprobes __die(const char * str, s printk(KERN_ALERT "RIP "); printk_address(regs->rip); printk(" RSP <%016lx>\n", regs->rsp); + if (kexec_should_crash(current)) + crash_kexec(regs); } void die(const char * str, struct pt_regs * regs, long err) @@ -461,6 +464,8 @@ void __kprobes die_nmi(char *str, struct */ printk(str, safe_smp_processor_id()); show_registers(regs); + if (kexec_should_crash(current)) + crash_kexec(regs); if (panic_on_timeout || panic_on_oops) panic("nmi watchdog"); printk("console shuts up ...\n"); diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -56,6 +56,11 @@ struct dma_mapping_ops* dma_ops; struct dma_mapping_ops* dma_ops; EXPORT_SYMBOL(dma_ops); +#ifdef CONFIG_XEN_COMPAT_030002 +unsigned int __kernel_page_user; +EXPORT_SYMBOL(__kernel_page_user); +#endif + extern unsigned long *contiguous_bitmap; static unsigned long dma_reserve __initdata; @@ -260,7 +265,10 @@ static void set_pte_phys(unsigned long v return; } } - new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); + if (pgprot_val(prot)) + new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); + else + new_pte = __pte(0); pte = pte_offset_kernel(pmd, vaddr); if (!pte_none(*pte) && @@ -523,6 +531,33 @@ void __init xen_init_pt(void) addr_to_page(addr, page); addr = page[pud_index(__START_KERNEL_map)]; addr_to_page(addr, page); + +#ifdef CONFIG_XEN_COMPAT_030002 + /* On Xen 3.0.2 and older we may need to explicitly specify _PAGE_USER + in kernel PTEs. We check that here. */ + if (HYPERVISOR_xen_version(XENVER_version, NULL) <= 0x30000) { + unsigned long *pg; + pte_t pte; + + /* Mess with the initial mapping of page 0. It's not needed. */ + BUILD_BUG_ON(__START_KERNEL <= __START_KERNEL_map); + addr = page[pmd_index(__START_KERNEL_map)]; + addr_to_page(addr, pg); + pte.pte = pg[pte_index(__START_KERNEL_map)]; + BUG_ON(!(pte.pte & _PAGE_PRESENT)); + + /* If _PAGE_USER isn't set, we obviously do not need it. */ + if (pte.pte & _PAGE_USER) { + /* _PAGE_USER is needed, but is it set implicitly? */ + pte.pte &= ~_PAGE_USER; + if ((HYPERVISOR_update_va_mapping(__START_KERNEL_map, + pte, 0) != 0) || + !(pg[pte_index(__START_KERNEL_map)] & _PAGE_USER)) + /* We need to explicitly specify _PAGE_USER. */ + __kernel_page_user = _PAGE_USER; + } + } +#endif /* Construct mapping of initial pte page in our own directories. */ init_level4_pgt[pgd_index(__START_KERNEL_map)] = @@ -913,8 +948,8 @@ void __init mem_init(void) #endif /* XEN: init and count pages outside initial allocation. */ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { - ClearPageReserved(&mem_map[pfn]); - set_page_count(&mem_map[pfn], 1); + ClearPageReserved(pfn_to_page(pfn)); + set_page_count(pfn_to_page(pfn), 1); totalram_pages++; } reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn); diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile --- a/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile Wed Nov 29 14:16:36 2006 -0600 @@ -12,6 +12,8 @@ DRIVER_OBJS = $(addprefix ../../../drive timer_int.o ) ifdef CONFIG_XEN +XENOPROF_COMMON_OBJS = $(addprefix ../../../drivers/xen/xenoprof/, \ + xenoprofile.o) OPROFILE-y := xenoprof.o else OPROFILE-y := init.o backtrace.o @@ -19,4 +21,5 @@ OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi op_model_ppro.o OPROFILE-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o endif -oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y)) +oprofile-y = $(DRIVER_OBJS) $(XENOPROF_COMMON_OBJS) \ + $(addprefix ../../i386/oprofile/, $(OPROFILE-y)) diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Wed Nov 29 14:16:36 2006 -0600 @@ -41,6 +41,7 @@ #include <xen/evtchn.h> #include <xen/interface/grant_table.h> #include <xen/interface/io/tpmif.h> +#include <xen/gnttab.h> #include <xen/xenbus.h> #include "tpm.h" #include "tpm_vtpm.h" @@ -343,6 +344,7 @@ static void backend_changed(struct xenbu case XenbusStateInitialising: case XenbusStateInitWait: case XenbusStateInitialised: + case XenbusStateUnknown: break; case XenbusStateConnected: @@ -351,13 +353,14 @@ static void backend_changed(struct xenbu case XenbusStateClosing: tpmif_set_connected_state(tp, 0); + xenbus_frontend_closed(dev); break; - case XenbusStateUnknown: case XenbusStateClosed: + tpmif_set_connected_state(tp, 0); if (tp->is_suspended == 0) device_unregister(&dev->dev); - xenbus_switch_state(dev, XenbusStateClosed); + xenbus_frontend_closed(dev); break; } } @@ -419,9 +422,10 @@ static int tpmfront_suspend(struct xenbu mutex_lock(&suspend_lock); tp->is_suspended = 1; - for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 25; ctr++) { + for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 300; ctr++) { if ((ctr % 10) == 0) - printk("TPM-FE [INFO]: Waiting for outstanding request.\n"); + printk("TPM-FE [INFO]: Waiting for outstanding " + "request.\n"); /* * Wait for a request to be responded to. */ diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/char/tty_io.c --- a/linux-2.6-xen-sparse/drivers/char/tty_io.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c Wed Nov 29 14:16:36 2006 -0600 @@ -2761,7 +2761,7 @@ static void flush_to_ldisc(void *private struct tty_struct *tty = (struct tty_struct *) private_; unsigned long flags; struct tty_ldisc *disc; - struct tty_buffer *tbuf; + struct tty_buffer *tbuf, *head; int count; char *char_buf; unsigned char *flag_buf; @@ -2778,7 +2778,9 @@ static void flush_to_ldisc(void *private goto out; } spin_lock_irqsave(&tty->buf.lock, flags); - while((tbuf = tty->buf.head) != NULL) { + head = tty->buf.head; + tty->buf.head = NULL; + while((tbuf = head) != NULL) { while ((count = tbuf->commit - tbuf->read) != 0) { char_buf = tbuf->char_buf_ptr + tbuf->read; flag_buf = tbuf->flag_buf_ptr + tbuf->read; @@ -2787,10 +2789,12 @@ static void flush_to_ldisc(void *private disc->receive_buf(tty, char_buf, flag_buf, count); spin_lock_irqsave(&tty->buf.lock, flags); } - if (tbuf->active) + if (tbuf->active) { + tty->buf.head = head; break; - tty->buf.head = tbuf->next; - if (tty->buf.head == NULL) + } + head = tbuf->next; + if (head == NULL) tty->buf.tail = NULL; tty_buffer_free(tty, tbuf); } diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/serial/Kconfig --- a/linux-2.6-xen-sparse/drivers/serial/Kconfig Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/serial/Kconfig Wed Nov 29 14:16:36 2006 -0600 @@ -821,6 +821,7 @@ config SERIAL_ICOM tristate "IBM Multiport Serial Adapter" depends on PCI && (PPC_ISERIES || PPC_PSERIES) select SERIAL_CORE + select FW_LOADER help This driver is for a family of multiport serial adapters including 2 port RVX, 2 port internal modem, 4 port internal diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/balloon/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile Wed Nov 29 14:16:36 2006 -0600 @@ -1,2 +1,2 @@ -obj-y += balloon.o +obj-y := balloon.o sysfs.o diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Wed Nov 29 14:16:36 2006 -0600 @@ -53,10 +53,8 @@ #include <asm/uaccess.h> #include <asm/tlb.h> #include <linux/list.h> - #include <xen/xenbus.h> - -#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) +#include "common.h" #ifdef CONFIG_PROC_FS static struct proc_dir_entry *balloon_pde; @@ -71,9 +69,7 @@ static DECLARE_MUTEX(balloon_mutex); */ DEFINE_SPINLOCK(balloon_lock); -/* We aim for 'current allocation' == 'target allocation'. */ -static unsigned long current_pages; -static unsigned long target_pages; +struct balloon_stats balloon_stats; /* We increase/decrease in batches which fit in a page */ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; @@ -81,18 +77,8 @@ static unsigned long frame_list[PAGE_SIZ /* VM /proc information for memory */ extern unsigned long totalram_pages; -/* We may hit the hard limit in Xen. If we do then we remember it. */ -static unsigned long hard_limit; - -/* - * Drivers may alter the memory reservation independently, but they must - * inform the balloon driver so that we can avoid hitting the hard limit. - */ -static unsigned long driver_pages; - /* List of ballooned pages, threaded through the mem_map array. */ static LIST_HEAD(ballooned_pages); -static unsigned long balloon_low, balloon_high; /* Main work function, always executed in process context. */ static void balloon_process(void *unused); @@ -124,10 +110,10 @@ static void balloon_append(struct page * /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); - balloon_high++; + bs.balloon_high++; } else { list_add(PAGE_TO_LIST(page), &ballooned_pages); - balloon_low++; + bs.balloon_low++; } } @@ -143,9 +129,9 @@ static struct page *balloon_retrieve(voi UNLIST_PAGE(page); if (PageHighMem(page)) - balloon_high--; + bs.balloon_high--; else - balloon_low--; + bs.balloon_low--; return page; } @@ -172,9 +158,9 @@ static void balloon_alarm(unsigned long static unsigned long current_target(void) { - unsigned long target = min(target_pages, hard_limit); - if (target > (current_pages + balloon_low + balloon_high)) - target = current_pages + balloon_low + balloon_high; + unsigned long target = min(bs.target_pages, bs.hard_limit); + if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) + target = bs.current_pages + bs.balloon_low + bs.balloon_high; return target; } @@ -216,7 +202,8 @@ static int increase_reservation(unsigned BUG_ON(ret != rc); } if (rc >= 0) - hard_limit = current_pages + rc - driver_pages; + bs.hard_limit = (bs.current_pages + rc - + bs.driver_pages); goto out; } @@ -228,9 +215,7 @@ static int increase_reservation(unsigned BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); - /* Update P->M and M->P tables. */ set_phys_to_machine(pfn, frame_list[i]); - xen_machphys_update(frame_list[i], pfn); /* Link back into the page tables if not highmem. */ if (pfn < max_low_pfn) { @@ -248,8 +233,8 @@ static int increase_reservation(unsigned __free_page(page); } - current_pages += nr_pages; - totalram_pages = current_pages; + bs.current_pages += nr_pages; + totalram_pages = bs.current_pages; out: balloon_unlock(flags); @@ -317,8 +302,8 @@ static int decrease_reservation(unsigned ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); - current_pages -= nr_pages; - totalram_pages = current_pages; + bs.current_pages -= nr_pages; + totalram_pages = bs.current_pages; balloon_unlock(flags); @@ -339,7 +324,7 @@ static void balloon_process(void *unused down(&balloon_mutex); do { - credit = current_target() - current_pages; + credit = current_target() - bs.current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) @@ -352,18 +337,18 @@ static void balloon_process(void *unused } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ - if (current_target() != current_pages) + if (current_target() != bs.current_pages) mod_timer(&balloon_timer, jiffies + HZ); up(&balloon_mutex); } /* Resets the Xen limit, sets new target, and kicks off processing. */ -static void set_new_target(unsigned long target) +void balloon_set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ - hard_limit = ~0UL; - target_pages = target; + bs.hard_limit = ~0UL; + bs.target_pages = target; schedule_work(&balloon_worker); } @@ -388,7 +373,7 @@ static void watch_target(struct xenbus_w /* The given memory/target value is in KiB, so it needs converting to * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ - set_new_target(new_target >> (PAGE_SHIFT - 10)); + balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); } static int balloon_init_watcher(struct notifier_block *notifier, @@ -424,7 +409,7 @@ static int balloon_write(struct file *fi memstring[sizeof(memstring)-1] = '\0'; target_bytes = memparse(memstring, &endchar); - set_new_target(target_bytes >> PAGE_SHIFT); + balloon_set_new_target(target_bytes >> PAGE_SHIFT); return count; } @@ -442,12 +427,13 @@ static int balloon_read(char *page, char "High-mem balloon: %8lu kB\n" "Driver pages: %8lu kB\n" "Xen hard limit: ", - PAGES2KB(current_pages), PAGES2KB(target_pages), - PAGES2KB(balloon_low), PAGES2KB(balloon_high), - PAGES2KB(driver_pages)); - - if (hard_limit != ~0UL) - len += sprintf(page + len, "%8lu kB\n", PAGES2KB(hard_limit)); + PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages), + PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high), + PAGES2KB(bs.driver_pages)); + + if (bs.hard_limit != ~0UL) + len += sprintf(page + len, "%8lu kB\n", + PAGES2KB(bs.hard_limit)); else len += sprintf(page + len, " ??? kB\n"); @@ -468,13 +454,13 @@ static int __init balloon_init(void) IPRINTK("Initialising balloon driver.\n"); - current_pages = min(xen_start_info->nr_pages, max_pfn); - totalram_pages = current_pages; - target_pages = current_pages; - balloon_low = 0; - balloon_high = 0; - driver_pages = 0UL; - hard_limit = ~0UL; + bs.current_pages = min(xen_start_info->nr_pages, max_pfn); + totalram_pages = bs.current_pages; + bs.target_pages = bs.current_pages; + bs.balloon_low = 0; + bs.balloon_high = 0; + bs.driver_pages = 0UL; + bs.hard_limit = ~0UL; init_timer(&balloon_timer); balloon_timer.data = 0; @@ -489,6 +475,7 @@ static int __init balloon_init(void) balloon_pde->read_proc = balloon_read; balloon_pde->write_proc = balloon_write; #endif + balloon_sysfs_init(); /* Initialise the balloon with excess memory space. */ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { @@ -512,7 +499,7 @@ void balloon_update_driver_allowance(lon unsigned long flags; balloon_lock(flags); - driver_pages += delta; + bs.driver_pages += delta; balloon_unlock(flags); } @@ -534,74 +521,86 @@ static int dealloc_pte_fn( return 0; } -struct page *balloon_alloc_empty_page_range(unsigned long nr_pages) -{ - unsigned long vstart, flags; - unsigned int order = get_order(nr_pages * PAGE_SIZE); - int ret; - unsigned long i; - struct page *page; - - vstart = __get_free_pages(GFP_KERNEL, order); - if (vstart == 0) +struct page **alloc_empty_pages_and_pagevec(int nr_pages) +{ + unsigned long vaddr, flags; + struct page *page, **pagevec; + int i, ret; + + pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); + if (pagevec == NULL) return NULL; - scrub_pages(vstart, 1 << order); - + for (i = 0; i < nr_pages; i++) { + page = pagevec[i] = alloc_page(GFP_KERNEL); + if (page == NULL) + goto err; + + vaddr = (unsigned long)page_address(page); + + scrub_pages(vaddr, 1); + + balloon_lock(flags); + + if (xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long gmfn = page_to_pfn(page); + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; + set_xen_guest_handle(reservation.extent_start, &gmfn); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + if (ret == 1) + ret = 0; /* success */ + } else { + ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, + dealloc_pte_fn, NULL); + } + + if (ret != 0) { + balloon_unlock(flags); + __free_page(page); + goto err; + } + + totalram_pages = --bs.current_pages; + + balloon_unlock(flags); + } + + out: + schedule_work(&balloon_worker); + flush_tlb_all(); + return pagevec; + + err: balloon_lock(flags); - if (xen_feature(XENFEAT_auto_translated_physmap)) { - unsigned long gmfn = __pa(vstart) >> PAGE_SHIFT; - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .extent_order = order, - .domid = DOMID_SELF - }; - set_xen_guest_handle(reservation.extent_start, &gmfn); - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); - if (ret == -ENOSYS) - goto err; - BUG_ON(ret != 1); - } else { - ret = apply_to_page_range(&init_mm, vstart, PAGE_SIZE << order, - dealloc_pte_fn, NULL); - if (ret == -ENOSYS) - goto err; - BUG_ON(ret); - } - current_pages -= 1UL << order; - totalram_pages = current_pages; + while (--i >= 0) + balloon_append(pagevec[i]); balloon_unlock(flags); - - schedule_work(&balloon_worker); - - flush_tlb_all(); - - page = virt_to_page(vstart); - - for (i = 0; i < (1UL << order); i++) - set_page_count(page + i, 1); - - return page; - - err: - free_pages(vstart, order); + kfree(pagevec); + pagevec = NULL; + goto out; +} + +void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) +{ + unsigned long flags; + int i; + + if (pagevec == NULL) + return; + + balloon_lock(flags); + for (i = 0; i < nr_pages; i++) { + BUG_ON(page_count(pagevec[i]) != 1); + balloon_append(pagevec[i]); + } balloon_unlock(flags); - return NULL; -} - -void balloon_dealloc_empty_page_range( - struct page *page, unsigned long nr_pages) -{ - unsigned long i, flags; - unsigned int order = get_order(nr_pages * PAGE_SIZE); - - balloon_lock(flags); - for (i = 0; i < (1UL << order); i++) { - BUG_ON(page_count(page + i) != 1); - balloon_append(page + i); - } - balloon_unlock(flags); + + kfree(pagevec); schedule_work(&balloon_worker); } @@ -612,15 +611,15 @@ void balloon_release_driver_page(struct balloon_lock(flags); balloon_append(page); - driver_pages--; + bs.driver_pages--; balloon_unlock(flags); schedule_work(&balloon_worker); } EXPORT_SYMBOL_GPL(balloon_update_driver_allowance); -EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range); -EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range); +EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec); +EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec); EXPORT_SYMBOL_GPL(balloon_release_driver_page); MODULE_LICENSE("Dual BSD/GPL"); diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Wed Nov 29 14:16:36 2006 -0600 @@ -55,8 +55,6 @@ static int blkif_reqs = 64; static int blkif_reqs = 64; module_param_named(reqs, blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); - -static int mmap_pages; /* Run-time switchable: /sys/module/blkback/parameters/ */ static unsigned int log_stats = 0; @@ -87,8 +85,7 @@ static DECLARE_WAIT_QUEUE_HEAD(pending_f #define BLKBACK_INVALID_HANDLE (~0) -static unsigned long mmap_vstart; -static unsigned long *pending_vaddrs; +static struct page **pending_pages; static grant_handle_t *pending_grant_handles; static inline int vaddr_pagenr(pending_req_t *req, int seg) @@ -98,7 +95,8 @@ static inline int vaddr_pagenr(pending_r static inline unsigned long vaddr(pending_req_t *req, int seg) { - return pending_vaddrs[vaddr_pagenr(req, seg)]; + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + return (unsigned long)pfn_to_kaddr(pfn); } #define pending_handle(_req, _seg) \ @@ -191,9 +189,9 @@ static void fast_flush_area(pending_req_ static void print_stats(blkif_t *blkif) { - printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n", + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", current->comm, blkif->st_oo_req, - blkif->st_rd_req, blkif->st_wr_req); + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; @@ -243,11 +241,17 @@ int blkif_schedule(void *arg) * COMPLETION CALLBACK -- Called as bh->b_end_io() */ -static void __end_block_io_op(pending_req_t *pending_req, int uptodate) +static void __end_block_io_op(pending_req_t *pending_req, int error) { /* An error fails the entire request. */ - if (!uptodate) { - DPRINTK("Buffer not up-to-date at end of operation\n"); + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + (error == -EOPNOTSUPP)) { + DPRINTK("blkback: write barrier op failed, not supported\n"); + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); + pending_req->status = BLKIF_RSP_EOPNOTSUPP; + } else if (error) { + DPRINTK("Buffer not up-to-date at end of operation, " + "error=%d\n", error); pending_req->status = BLKIF_RSP_ERROR; } @@ -264,7 +268,7 @@ static int end_block_io_op(struct bio *b { if (bio->bi_size != 0) return 1; - __end_block_io_op(bio->bi_private, !error); + __end_block_io_op(bio->bi_private, error); bio_put(bio); return error; } @@ -295,7 +299,7 @@ static int do_block_io_op(blkif_t *blkif static int do_block_io_op(blkif_t *blkif) { blkif_back_ring_t *blk_ring = &blkif->blk_ring; - blkif_request_t *req; + blkif_request_t req; pending_req_t *pending_req; RING_IDX rc, rp; int more_to_do = 0; @@ -313,22 +317,25 @@ static int do_block_io_op(blkif_t *blkif break; } - req = RING_GET_REQUEST(blk_ring, rc); + memcpy(&req, RING_GET_REQUEST(blk_ring, rc), sizeof(req)); blk_ring->req_cons = ++rc; /* before make_response() */ - switch (req->operation) { + switch (req.operation) { case BLKIF_OP_READ: blkif->st_rd_req++; - dispatch_rw_block_io(blkif, req, pending_req); + dispatch_rw_block_io(blkif, &req, pending_req); break; + case BLKIF_OP_WRITE_BARRIER: + blkif->st_br_req++; + /* fall through */ case BLKIF_OP_WRITE: blkif->st_wr_req++; - dispatch_rw_block_io(blkif, req, pending_req); + dispatch_rw_block_io(blkif, &req, pending_req); break; default: DPRINTK("error: unknown block io operation [%d]\n", - req->operation); - make_response(blkif, req->id, req->operation, + req.operation); + make_response(blkif, req.id, req.operation, BLKIF_RSP_ERROR); free_req(pending_req); break; @@ -342,7 +349,6 @@ static void dispatch_rw_block_io(blkif_t pending_req_t *pending_req) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); - int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; struct { @@ -351,6 +357,22 @@ static void dispatch_rw_block_io(blkif_t unsigned int nseg; struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int ret, i, nbio = 0; + int operation; + + switch (req->operation) { + case BLKIF_OP_READ: + operation = READ; + break; + case BLKIF_OP_WRITE: + operation = WRITE; + break; + case BLKIF_OP_WRITE_BARRIER: + operation = WRITE_BARRIER; + break; + default: + operation = 0; /* make gcc happy */ + BUG(); + } /* Check that number of segments is sane. */ nseg = req->nr_segments; @@ -366,7 +388,7 @@ static void dispatch_rw_block_io(blkif_t pending_req->blkif = blkif; pending_req->id = req->id; - pending_req->operation = operation; + pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; @@ -377,12 +399,12 @@ static void dispatch_rw_block_io(blkif_t req->seg[i].first_sect + 1; if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || - (seg[i].nsec <= 0)) + (req->seg[i].last_sect < req->seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; flags = GNTMAP_host_map; - if ( operation == WRITE ) + if (operation != READ) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, req->seg[i].gref, blkif->domid); @@ -394,16 +416,24 @@ static void dispatch_rw_block_io(blkif_t for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { DPRINTK("invalid buffer -- could not remap it\n"); - goto fail_flush; + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; } pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + set_phys_to_machine(__pa(vaddr( pending_req, i)) >> PAGE_SHIFT, FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->seg[i].first_sect << 9); } + + if (ret) + goto fail_flush; if (vbd_translate(&preq, blkif, operation) != 0) { DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", @@ -506,52 +536,43 @@ static void make_response(blkif_t *blkif static int __init blkif_init(void) { - struct page *page; - int i; + int i, mmap_pages; if (!is_running_on_xen()) return -ENODEV; - mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; - - page = balloon_alloc_empty_page_range(mmap_pages); - if (page == NULL) - return -ENOMEM; - mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; pending_reqs = kmalloc(sizeof(pending_reqs[0]) * blkif_reqs, GFP_KERNEL); pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); - pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) * - mmap_pages, GFP_KERNEL); - if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) { - kfree(pending_reqs); - kfree(pending_grant_handles); - kfree(pending_vaddrs); - printk("%s: out of memory\n", __FUNCTION__); - return -ENOMEM; - } + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + + if (!pending_reqs || !pending_grant_handles || !pending_pages) + goto out_of_memory; + + for (i = 0; i < mmap_pages; i++) + pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; blkif_interface_init(); - - printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n", - __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart); - BUG_ON(mmap_vstart == 0); - for (i = 0; i < mmap_pages; i++) { - pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT); - pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - } memset(pending_reqs, 0, sizeof(pending_reqs)); INIT_LIST_HEAD(&pending_free); for (i = 0; i < blkif_reqs; i++) list_add_tail(&pending_reqs[i].free_list, &pending_free); - + blkif_xenbus_init(); return 0; + + out_of_memory: + kfree(pending_reqs); + kfree(pending_grant_handles); + free_empty_pages_and_pagevec(pending_pages, mmap_pages); + printk("%s: out of memory\n", __FUNCTION__); + return -ENOMEM; } module_init(blkif_init); diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/blkback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Wed Nov 29 14:16:36 2006 -0600 @@ -44,6 +44,7 @@ #include <xen/interface/io/ring.h> #include <xen/gnttab.h> #include <xen/driver_util.h> +#include <xen/xenbus.h> #define DPRINTK(_f, _a...) \ pr_debug("(file=%s, line=%d) " _f, \ @@ -87,6 +88,7 @@ typedef struct blkif_st { int st_rd_req; int st_wr_req; int st_oo_req; + int st_br_req; wait_queue_head_t waiting_to_free; @@ -111,7 +113,7 @@ int vbd_create(blkif_t *blkif, blkif_vde unsigned minor, int readonly); void vbd_free(struct vbd *vbd); -unsigned long vbd_size(struct vbd *vbd); +unsigned long long vbd_size(struct vbd *vbd); unsigned int vbd_info(struct vbd *vbd); unsigned long vbd_secsize(struct vbd *vbd); @@ -131,4 +133,7 @@ irqreturn_t blkif_be_int(int irq, void * irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); int blkif_schedule(void *arg); +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); + #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Wed Nov 29 14:16:36 2006 -0600 @@ -31,12 +31,11 @@ */ #include "common.h" -#include <xen/xenbus.h> #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) -unsigned long vbd_size(struct vbd *vbd) +unsigned long long vbd_size(struct vbd *vbd) { return vbd_sz(vbd); } @@ -104,7 +103,7 @@ int vbd_translate(struct phys_req *req, struct vbd *vbd = &blkif->vbd; int rc = -EACCES; - if ((operation == WRITE) && vbd->readonly) + if ((operation != READ) && vbd->readonly) goto out; if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Wed Nov 29 14:16:36 2006 -0600 @@ -20,7 +20,6 @@ #include <stdarg.h> #include <linux/module.h> #include <linux/kthread.h> -#include <xen/xenbus.h> #include "common.h" #undef DPRINTK @@ -91,11 +90,13 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_o VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); static struct attribute *vbdstat_attrs[] = { &dev_attr_oo_req.attr, &dev_attr_rd_req.attr, &dev_attr_wr_req.attr, + &dev_attr_br_req.attr, NULL }; @@ -165,6 +166,19 @@ static int blkback_remove(struct xenbus_ return 0; } +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) +{ + struct xenbus_device *dev = be->dev; + int err; + + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + "%d", state); + if (err) + xenbus_dev_fatal(dev, err, "writing feature-barrier"); + + return err; +} /** * Entry point to this code when a new device is created. Allocate the basic @@ -366,13 +380,16 @@ static void connect(struct backend_info /* Supply the information about the device the frontend needs */ again: err = xenbus_transaction_start(&xbt); - if (err) { xenbus_dev_fatal(dev, err, "starting transaction"); return; } - err = xenbus_printf(xbt, dev->nodename, "sectors", "%lu", + err = blkback_barrier(xbt, be, 1); + if (err) + goto abort; + + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", vbd_size(&be->blkif->vbd)); if (err) { xenbus_dev_fatal(dev, err, "writing %s/sectors", diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Wed Nov 29 14:16:36 2006 -0600 @@ -48,6 +48,10 @@ #include <asm/hypervisor.h> #include <asm/maddr.h> +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif + #define BLKIF_STATE_DISCONNECTED 0 #define BLKIF_STATE_CONNECTED 1 #define BLKIF_STATE_SUSPENDED 2 @@ -134,10 +138,10 @@ static int blkfront_resume(struct xenbus DPRINTK("blkfront_resume: %s\n", dev->nodename); - blkif_free(info, 1); + blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); err = talk_to_backend(dev, info); - if (!err) + if (info->connected == BLKIF_STATE_SUSPENDED && !err) blkif_recover(info); return err; @@ -273,7 +277,7 @@ static void backend_changed(struct xenbu xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); down(&bd->bd_sem); - if (info->users > 0 && system_state == SYSTEM_RUNNING) + if (info->users > 0) xenbus_dev_error(dev, -EBUSY, "Device in use; refusing to close"); else @@ -294,7 +298,8 @@ static void backend_changed(struct xenbu */ static void connect(struct blkfront_info *info) { - unsigned long sectors, sector_size; + unsigned long long sectors; + unsigned long sector_size; unsigned int binfo; int err; @@ -305,7 +310,7 @@ static void connect(struct blkfront_info DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend); err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "sectors", "%lu", §ors, + "sectors", "%llu", §ors, "info", "%u", &binfo, "sector-size", "%lu", §or_size, NULL); @@ -316,6 +321,12 @@ static void connect(struct blkfront_info return; } + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-barrier", "%lu", &info->feature_barrier, + NULL); + if (err) + info->feature_barrier = 0; + err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", @@ -355,8 +366,10 @@ static void blkfront_closing(struct xenb blk_stop_queue(info->rq); /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); + spin_unlock_irqrestore(&blkif_io_lock, flags); + + /* Flush gnttab callback work. Must be done with no locks held. */ flush_scheduled_work(); - spin_unlock_irqrestore(&blkif_io_lock, flags); xlvbd_del(info); @@ -466,6 +479,27 @@ int blkif_ioctl(struct inode *inode, str command, (long)argument, inode->i_rdev); switch (command) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + case HDIO_GETGEO: { + struct block_device *bd = inode->i_bdev; + struct hd_geometry geo; + int ret; + + if (!argument) + return -EINVAL; + + geo.start = get_start_sect(bd); + ret = blkif_getgeo(bd, &geo); + if (ret) + return ret; + + if (copy_to_user((struct hd_geometry __user *)argument, &geo, + sizeof(geo))) + return -EFAULT; + + return 0; + } +#endif case CDROMMULTISESSION: DPRINTK("FIXME: support multisession CDs later\n"); for (i = 0; i < sizeof(struct cdrom_multisession); i++) @@ -542,10 +576,13 @@ static int blkif_queue_request(struct re info->shadow[id].request = (unsigned long)req; ring_req->id = id; + ring_req->sector_number = (blkif_sector_t)req->sector; + ring_req->handle = info->handle; + ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - ring_req->sector_number = (blkif_sector_t)req->sector; - ring_req->handle = info->handle; + if (blk_barrier_rq(req)) + ring_req->operation = BLKIF_OP_WRITE_BARRIER; ring_req->nr_segments = 0; rq_for_each_bio (bio, req) { @@ -643,6 +680,7 @@ static irqreturn_t blkif_int(int irq, vo RING_IDX i, rp; unsigned long flags; struct blkfront_info *info = (struct blkfront_info *)dev_id; + int uptodate; spin_lock_irqsave(&blkif_io_lock, flags); @@ -667,19 +705,27 @@ static irqreturn_t blkif_int(int irq, vo ADD_ID_TO_FREELIST(info, id); + uptodate = (bret->status == BLKIF_RSP_OKAY); switch (bret->operation) { + case BLKIF_OP_WRITE_BARRIER: + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + printk("blkfront: %s: write barrier op failed\n", + info->gd->disk_name); + uptodate = -EOPNOTSUPP; + info->feature_barrier = 0; + xlvbd_barrier(info); + } + /* fall through */ case BLKIF_OP_READ: case BLKIF_OP_WRITE: if (unlikely(bret->status != BLKIF_RSP_OKAY)) DPRINTK("Bad return from blkdev data " "request: %x\n", bret->status); - ret = end_that_request_first( - req, (bret->status == BLKIF_RSP_OKAY), + ret = end_that_request_first(req, uptodate, req->hard_nr_sectors); BUG_ON(ret); - end_that_request_last( - req, (bret->status == BLKIF_RSP_OKAY)); + end_that_request_last(req, uptodate); break; default: BUG(); @@ -714,8 +760,10 @@ static void blkif_free(struct blkfront_i blk_stop_queue(info->rq); /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); + spin_unlock_irq(&blkif_io_lock); + + /* Flush gnttab callback work. Must be done with no locks held. */ flush_scheduled_work(); - spin_unlock_irq(&blkif_io_lock); /* Free resources associated with old device channel. */ if (info->ring_ref != GRANT_INVALID_REF) { diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Nov 29 14:16:36 2006 -0600 @@ -126,6 +126,7 @@ struct blkfront_info struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; unsigned long shadow_free; + int feature_barrier; /** * The number of people holding this device open. We won't allow a @@ -152,5 +153,6 @@ int xlvbd_add(blkif_sector_t capacity, i int xlvbd_add(blkif_sector_t capacity, int device, u16 vdisk_info, u16 sector_size, struct blkfront_info *info); void xlvbd_del(struct blkfront_info *info); +int xlvbd_barrier(struct blkfront_info *info); #endif /* __XEN_DRIVERS_BLOCK_H__ */ diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Wed Nov 29 14:16:36 2006 -0600 @@ -36,6 +36,10 @@ #include <linux/blkdev.h> #include <linux/list.h> +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif + #define BLKIF_MAJOR(dev) ((dev)>>8) #define BLKIF_MINOR(dev) ((dev) & 0xff) @@ -46,7 +50,7 @@ */ #define NUM_IDE_MAJORS 10 -#define NUM_SCSI_MAJORS 9 +#define NUM_SCSI_MAJORS 17 #define NUM_VBD_MAJORS 1 static struct xlbd_type_info xlbd_ide_type = { @@ -91,7 +95,9 @@ static struct block_device_operations xl .open = blkif_open, .release = blkif_release, .ioctl = blkif_ioctl, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) .getgeo = blkif_getgeo +#endif }; DEFINE_SPINLOCK(blkif_io_lock); @@ -159,8 +165,11 @@ xlbd_get_major_info(int vdevice) case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR: index = 11 + major - SCSI_DISK1_MAJOR; break; - case SCSI_CDROM_MAJOR: index = 18; break; - default: index = 19; break; + case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR: + index = 18 + major - SCSI_DISK8_MAJOR; + break; + case SCSI_CDROM_MAJOR: index = 26; break; + default: index = 27; break; } mi = ((major_info[index] != NULL) ? major_info[index] : @@ -186,7 +195,11 @@ xlvbd_init_blk_queue(struct gendisk *gd, if (rq == NULL) return -1; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) elevator_init(rq, "noop"); +#else + elevator_init(rq, &elevator_noop); +#endif /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_hardsect_size(rq, sector_size); @@ -217,6 +230,7 @@ xlvbd_alloc_gendisk(int minor, blkif_sec struct xlbd_major_info *mi; int nr_minors = 1; int err = -ENODEV; + unsigned int offset; BUG_ON(info->gd != NULL); BUG_ON(info->mi != NULL); @@ -234,15 +248,33 @@ xlvbd_alloc_gendisk(int minor, blkif_sec if (gd == NULL) goto out; - if (nr_minors > 1) - sprintf(gd->disk_name, "%s%c", mi->type->diskname, - 'a' + mi->index * mi->type->disks_per_major + - (minor >> mi->type->partn_shift)); - else - sprintf(gd->disk_name, "%s%c%d", mi->type->diskname, - 'a' + mi->index * mi->type->disks_per_major + - (minor >> mi->type->partn_shift), - minor & ((1 << mi->type->partn_shift) - 1)); + offset = mi->index * mi->type->disks_per_major + + (minor >> mi->type->partn_shift); + if (nr_minors > 1) { + if (offset < 26) { + sprintf(gd->disk_name, "%s%c", + mi->type->diskname, 'a' + offset ); + } + else { + sprintf(gd->disk_name, "%s%c%c", + mi->type->diskname, + 'a' + ((offset/26)-1), 'a' + (offset%26) ); + } + } + else { + if (offset < 26) { + sprintf(gd->disk_name, "%s%c%d", + mi->type->diskname, + 'a' + offset, + minor & ((1 << mi->type->partn_shift) - 1)); + } + else { + sprintf(gd->disk_name, "%s%c%c%d", + mi->type->diskname, + 'a' + ((offset/26)-1), 'a' + (offset%26), + minor & ((1 << mi->type->partn_shift) - 1)); + } + } gd->major = mi->major; gd->first_minor = minor; @@ -257,6 +289,10 @@ xlvbd_alloc_gendisk(int minor, blkif_sec } info->rq = gd->queue; + info->gd = gd; + + if (info->feature_barrier) + xlvbd_barrier(info); if (vdisk_info & VDISK_READONLY) set_disk_ro(gd, 1); @@ -266,8 +302,6 @@ xlvbd_alloc_gendisk(int minor, blkif_sec if (vdisk_info & VDISK_CDROM) gd->flags |= GENHD_FL_CD; - - info->gd = gd; return 0; @@ -316,3 +350,26 @@ xlvbd_del(struct blkfront_info *info) blk_cleanup_queue(info->rq); info->rq = NULL; } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +int +xlvbd_barrier(struct blkfront_info *info) +{ + int err; + + err = blk_queue_ordered(info->rq, + info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL); + if (err) + return err; + printk("blkfront: %s: barriers %s\n", + info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled"); + return 0; +} +#else +int +xlvbd_barrier(struct blkfront_info *info) +{ + printk("blkfront: %s: barriers disabled\n", info->gd->disk_name); + return -ENOSYS; +} +#endif diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Wed Nov 29 14:16:36 2006 -0600 @@ -9,6 +9,9 @@ * Based on the blkback driver code. * * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield + * + * Clean ups and fix ups: + * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -44,7 +47,6 @@ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/mm.h> -#include <linux/miscdevice.h> #include <linux/errno.h> #include <linux/major.h> #include <linux/gfp.h> @@ -52,8 +54,32 @@ #include <asm/tlbflush.h> #include <linux/devfs_fs_kernel.h> -#define MAX_TAP_DEV 100 /*the maximum number of tapdisk ring devices */ +#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */ #define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */ + + +struct class *xen_class; +EXPORT_SYMBOL_GPL(xen_class); + +/* + * Setup the xen class. This should probably go in another file, but + * since blktap is the only user of it so far, it gets to keep it. + */ +int setup_xen_class(void) +{ + int ret; + + if (xen_class) + return 0; + + xen_class = class_create(THIS_MODULE, "xen"); + if ((ret = IS_ERR(xen_class))) { + xen_class = NULL; + return ret; + } + + return 0; +} /* * The maximum number of requests that can be outstanding at any time @@ -67,8 +93,9 @@ * mmap_alloc is initialised to 2 and should be adjustable on the fly via * sysfs. */ -#define MAX_DYNAMIC_MEM 64 -#define MAX_PENDING_REQS 64 +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) +#define MAX_DYNAMIC_MEM BLK_RING_SIZE +#define MAX_PENDING_REQS BLK_RING_SIZE #define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) #define MMAP_VADDR(_start, _req,_seg) \ (_start + \ @@ -81,6 +108,12 @@ static int mmap_pages = MMAP_PAGES; * have a bunch of pages reserved for shared * memory rings. */ + +/*Data struct handed back to userspace for tapdisk device to VBD mapping*/ +typedef struct domid_translate { + unsigned short domid; + unsigned short busid; +} domid_translate_t ; /*Data struct associated with each of the tapdisk devices*/ typedef struct tap_blkif { @@ -100,22 +133,11 @@ typedef struct tap_blkif { unsigned long *idx_map; /*Record the user ring id to kern [req id, idx] tuple */ blkif_t *blkif; /*Associate blkif with tapdev */ + struct domid_translate trans; /*Translation from domid to bus. */ } tap_blkif_t; -/*Private data struct associated with the inode*/ -typedef struct private_info { - int idx; -} private_info_t; - -/*Data struct handed back to userspace for tapdisk device to VBD mapping*/ -typedef struct domid_translate { - unsigned short domid; - unsigned short busid; -} domid_translate_t ; - - -static domid_translate_t translate_domid[MAX_TAP_DEV]; -static tap_blkif_t *tapfds[MAX_TAP_DEV]; +static struct tap_blkif *tapfds[MAX_TAP_DEV]; +static int blktap_next_minor; static int __init set_blkif_reqs(char *str) { @@ -168,16 +190,18 @@ static inline unsigned int RTN_PEND_IDX( #define BLKBACK_INVALID_HANDLE (~0) -typedef struct mmap_page { - unsigned long start; - struct page *mpage; -} mmap_page_t; - -static mmap_page_t mmap_start[MAX_DYNAMIC_MEM]; +static struct page **foreign_pages[MAX_DYNAMIC_MEM]; +static inline unsigned long idx_to_kaddr( + unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx) +{ + unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx; + unsigned long pfn = page_to_pfn(foreign_pages[mmap_idx][arr_idx]); + return (unsigned long)pfn_to_kaddr(pfn); +} + static unsigned short mmap_alloc = 0; static unsigned short mmap_lock = 0; static unsigned short mmap_inuse = 0; -static unsigned long *pending_addrs[MAX_DYNAMIC_MEM]; /****************************************************************** * GRANT HANDLES @@ -192,6 +216,7 @@ struct grant_handle_pair grant_handle_t kernel; grant_handle_t user; }; +#define INVALID_GRANT_HANDLE 0xFFFF static struct grant_handle_pair pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES]; @@ -200,14 +225,12 @@ static struct grant_handle_pair + (_i)]) -static int blktap_read_ufe_ring(int idx); /*local prototypes*/ - -#define BLKTAP_MINOR 0 /*/dev/xen/blktap resides at device number - major=254, minor numbers begin at 0 */ -#define BLKTAP_DEV_MAJOR 254 /* TODO: Make major number dynamic * - * and create devices in the kernel * - */ +static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/ + +#define BLKTAP_MINOR 0 /*/dev/xen/blktap has a dynamic major */ #define BLKTAP_DEV_DIR "/dev/xen" + +static int blktap_major; /* blktap IOCTLs: */ #define BLKTAP_IOCTL_KICK_FE 1 @@ -264,17 +287,19 @@ static inline int GET_NEXT_REQ(unsigned { int i; for (i = 0; i < MAX_PENDING_REQS; i++) - if (idx_map[i] == INVALID_REQ) return i; + if (idx_map[i] == INVALID_REQ) + return i; return INVALID_REQ; } #define BLKTAP_INVALID_HANDLE(_g) \ - (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF)) + (((_g->kernel) == INVALID_GRANT_HANDLE) && \ + ((_g->user) == INVALID_GRANT_HANDLE)) #define BLKTAP_INVALIDATE_HANDLE(_g) do { \ - (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \ + (_g)->kernel = INVALID_GRANT_HANDLE; (_g)->user = INVALID_GRANT_HANDLE; \ } while(0) @@ -303,15 +328,13 @@ struct vm_operations_struct blktap_vm_op */ /*Function Declarations*/ -static int get_next_free_dev(void); +static tap_blkif_t *get_next_free_dev(void); static int blktap_open(struct inode *inode, struct file *filp); static int blktap_release(struct inode *inode, struct file *filp); static int blktap_mmap(struct file *filp, struct vm_area_struct *vma); static int blktap_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); static unsigned int blktap_poll(struct file *file, poll_table *wait); - -struct miscdevice *set_misc(int minor, char *name, int dev); static struct file_operations blktap_fops = { .owner = THIS_MODULE, @@ -323,41 +346,96 @@ static struct file_operations blktap_fop }; -static int get_next_free_dev(void) +static tap_blkif_t *get_next_free_dev(void) { tap_blkif_t *info; - int i = 0, ret = -1; - unsigned long flags; - - spin_lock_irqsave(&pending_free_lock, flags); - - while (i < MAX_TAP_DEV) { + int minor; + + /* + * This is called only from the ioctl, which + * means we should always have interrupts enabled. + */ + BUG_ON(irqs_disabled()); + + spin_lock_irq(&pending_free_lock); + + /* tapfds[0] is always NULL */ + + for (minor = 1; minor < blktap_next_minor; minor++) { + info = tapfds[minor]; + /* we could have failed a previous attempt. */ + if (!info || + ((info->dev_inuse == 0) && + (info->dev_pending == 0)) ) { + info->dev_pending = 1; + goto found; + } + } + info = NULL; + minor = -1; + + /* + * We didn't find free device. If we can still allocate + * more, then we grab the next device minor that is + * available. This is done while we are still under + * the protection of the pending_free_lock. + */ + if (blktap_next_minor < MAX_TAP_DEV) + minor = blktap_next_minor++; +found: + spin_unlock_irq(&pending_free_lock); + + if (!info && minor > 0) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (unlikely(!info)) { + /* + * If we failed here, try to put back + * the next minor number. But if one + * was just taken, then we just lose this + * minor. We can try to allocate this + * minor again later. + */ + spin_lock_irq(&pending_free_lock); + if (blktap_next_minor == minor+1) + blktap_next_minor--; + spin_unlock_irq(&pending_free_lock); + goto out; + } + + info->minor = minor; + /* + * Make sure that we have a minor before others can + * see us. + */ + wmb(); + tapfds[minor] = info; + + class_device_create(xen_class, NULL, + MKDEV(blktap_major, minor), NULL, + "blktap%d", minor); + devfs_mk_cdev(MKDEV(blktap_major, minor), + S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", minor); + } + +out: + return info; +} + +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) +{ + tap_blkif_t *info; + int i; + + for (i = 1; i < blktap_next_minor; i++) { info = tapfds[i]; - if ( (tapfds[i] != NULL) && (info->dev_inuse == 0) - && (info->dev_pending == 0) ) { - info->dev_pending = 1; - ret = i; - goto done; - } - i++; - } - -done: - spin_unlock_irqrestore(&pending_free_lock, flags); - return ret; -} - -int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) -{ - int i; - - for (i = 0; i < MAX_TAP_DEV; i++) - if ( (translate_domid[i].domid == domid) - && (translate_domid[i].busid == xenbus_id) ) { - tapfds[i]->blkif = blkif; - tapfds[i]->status = RUNNING; + if ( info && + (info->trans.domid == domid) && + (info->trans.busid == xenbus_id) ) { + info->blkif = blkif; + info->status = RUNNING; return i; } + } return -1; } @@ -367,13 +445,16 @@ void signal_tapdisk(int idx) struct task_struct *ptask; info = tapfds[idx]; - if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) { + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) + return; + + if (info->pid > 0) { ptask = find_task_by_pid(info->pid); - if (ptask) { + if (ptask) info->status = CLEANSHUTDOWN; - } } info->blkif = NULL; + return; } @@ -382,17 +463,21 @@ static int blktap_open(struct inode *ino blkif_sring_t *sring; int idx = iminor(inode) - BLKTAP_MINOR; tap_blkif_t *info; - private_info_t *prv; int i; - if (tapfds[idx] == NULL) { + /* ctrl device, treat differently */ + if (!idx) + return 0; + + info = tapfds[idx]; + + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) { WPRINTK("Unable to open device /dev/xen/blktap%d\n", - idx); - return -ENOMEM; - } + idx); + return -ENODEV; + } + DPRINTK("Opening device /dev/xen/blktap%d\n",idx); - - info = tapfds[idx]; /*Only one process can access device at a time*/ if (test_and_set_bit(0, &info->dev_inuse)) @@ -410,9 +495,7 @@ static int blktap_open(struct inode *ino SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE); - prv = kzalloc(sizeof(private_info_t),GFP_KERNEL); - prv->idx = idx; - filp->private_data = prv; + filp->private_data = info; info->vma = NULL; info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, @@ -433,17 +516,14 @@ static int blktap_open(struct inode *ino static int blktap_release(struct inode *inode, struct file *filp) { - int idx = iminor(inode) - BLKTAP_MINOR; - tap_blkif_t *info; + tap_blkif_t *info = filp->private_data; - if (tapfds[idx] == NULL) { - WPRINTK("Trying to free device that doesn't exist " - "[/dev/xen/blktap%d]\n",idx); - return -1; - } - info = tapfds[idx]; + /* check for control device */ + if (!info) + return 0; + info->dev_inuse = 0; - DPRINTK("Freeing device [/dev/xen/blktap%d]\n",idx); + DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor); /* Free the ring page. */ ClearPageReserved(virt_to_page(info->ufe_ring.sring)); @@ -457,11 +537,11 @@ static int blktap_release(struct inode * info->vma = NULL; } - if (filp->private_data) kfree(filp->private_data); - if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) { - kthread_stop(info->blkif->xenblkd); - info->blkif->xenblkd = NULL; + if (info->blkif->xenblkd != NULL) { + kthread_stop(info->blkif->xenblkd); + info->blkif->xenblkd = NULL; + } info->status = CLEANSHUTDOWN; } return 0; @@ -491,16 +571,12 @@ static int blktap_mmap(struct file *filp int size; struct page **map; int i; - private_info_t *prv; - tap_blkif_t *info; - - /*Retrieve the dev info*/ - prv = (private_info_t *)filp->private_data; - if (prv == NULL) { + tap_blkif_t *info = filp->private_data; + + if (info == NULL) { WPRINTK("blktap: mmap, retrieving idx failed\n"); return -ENOMEM; } - info = tapfds[prv->idx]; vma->vm_flags |= VM_RESERVED; vma->vm_ops = &blktap_vm_ops; @@ -517,8 +593,6 @@ static int blktap_mmap(struct file *filp info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); /* Map the ring pages to the start of the region and reserve it. */ - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vma->vm_start, __pa(info->ufe_ring.sring) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) { @@ -556,20 +630,17 @@ static int blktap_ioctl(struct inode *in static int blktap_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { - int idx = iminor(inode) - BLKTAP_MINOR; + tap_blkif_t *info = filp->private_data; + switch(cmd) { case BLKTAP_IOCTL_KICK_FE: { /* There are fe messages to process. */ - return blktap_read_ufe_ring(idx); + return blktap_read_ufe_ring(info); } case BLKTAP_IOCTL_SETMODE: { - tap_blkif_t *info = tapfds[idx]; - - if ( (idx > 0) && (idx < MAX_TAP_DEV) - && (tapfds[idx] != NULL) ) - { + if (info) { if (BLKTAP_MODE_VALID(arg)) { info->mode = arg; /* XXX: may need to flush rings here. */ @@ -582,11 +653,7 @@ static int blktap_ioctl(struct inode *in } case BLKTAP_IOCTL_PRINT_IDXS: { - tap_blkif_t *info = tapfds[idx]; - - if ( (idx > 0) && (idx < MAX_TAP_DEV) - && (tapfds[idx] != NULL) ) - { + if (info) { printk("User Rings: \n-----------\n"); printk("UF: rsp_cons: %2d, req_prod_prv: %2d " "| req_prod: %2d, rsp_prod: %2d\n", @@ -599,11 +666,7 @@ static int blktap_ioctl(struct inode *in } case BLKTAP_IOCTL_SENDPID: { - tap_blkif_t *info = tapfds[idx]; - - if ( (idx > 0) && (idx < MAX_TAP_DEV) - && (tapfds[idx] != NULL) ) - { + if (info) { info->pid = (pid_t)arg; DPRINTK("blktap: pid received %d\n", info->pid); @@ -614,43 +677,49 @@ static int blktap_ioctl(struct inode *in { uint64_t val = (uint64_t)arg; domid_translate_t *tr = (domid_translate_t *)&val; - int newdev; DPRINTK("NEWINTF Req for domid %d and bus id %d\n", tr->domid, tr->busid); - newdev = get_next_free_dev(); - if (newdev < 1) { + info = get_next_free_dev(); + if (!info) { WPRINTK("Error initialising /dev/xen/blktap - " "No more devices\n"); return -1; } - translate_domid[newdev].domid = tr->domid; - translate_domid[newdev].busid = tr->busid; - return newdev; + info->trans.domid = tr->domid; + info->trans.busid = tr->busid; + return info->minor; } case BLKTAP_IOCTL_FREEINTF: { unsigned long dev = arg; - tap_blkif_t *info = NULL; - - if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev]; - - if ( (info != NULL) && (info->dev_pending) ) + unsigned long flags; + + info = tapfds[dev]; + + if ((dev > MAX_TAP_DEV) || !info) + return 0; /* should this be an error? */ + + spin_lock_irqsave(&pending_free_lock, flags); + if (info->dev_pending) info->dev_pending = 0; + spin_unlock_irqrestore(&pending_free_lock, flags); + return 0; } case BLKTAP_IOCTL_MINOR: { unsigned long dev = arg; - tap_blkif_t *info = NULL; - - if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev]; - - if (info != NULL) return info->minor; - else return -1; + + info = tapfds[dev]; + + if ((dev > MAX_TAP_DEV) || !info) + return -EINVAL; + + return info->minor; } case BLKTAP_IOCTL_MAJOR: - return BLKTAP_DEV_MAJOR; + return blktap_major; case BLKTAP_QUERY_ALLOC_REQS: { @@ -662,25 +731,16 @@ static int blktap_ioctl(struct inode *in return -ENOIOCTLCMD; } -static unsigned int blktap_poll(struct file *file, poll_table *wait) -{ - private_info_t *prv; - tap_blkif_t *info; +static unsigned int blktap_poll(struct file *filp, poll_table *wait) +{ + tap_blkif_t *info = filp->private_data; - /*Retrieve the dev info*/ - prv = (private_info_t *)file->private_data; - if (prv == NULL) { - WPRINTK(" poll, retrieving idx failed\n"); + /* do not work on the control device */ + if (!info) return 0; - } - - if (prv->idx == 0) return 0; - - info = tapfds[prv->idx]; - - poll_wait(file, &info->wait, wait); + + poll_wait(filp, &info->wait, wait); if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) { - flush_tlb_all(); RING_PUSH_REQUESTS(&info->ufe_ring); return POLLIN | POLLRDNORM; } @@ -691,11 +751,13 @@ void blktap_kick_user(int idx) { tap_blkif_t *info; - if (idx == 0) return; - info = tapfds[idx]; - - if (info != NULL) wake_up_interruptible(&info->wait); + + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) + return; + + wake_up_interruptible(&info->wait); + return; } @@ -712,66 +774,21 @@ static int req_increase(void) static int req_increase(void) { int i, j; - struct page *page; - unsigned long flags; - int ret; - - spin_lock_irqsave(&pending_free_lock, flags); - - ret = -EINVAL; + if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) - goto done; - -#ifdef __ia64__ - extern unsigned long alloc_empty_foreign_map_page_range( - unsigned long pages); - mmap_start[mmap_alloc].start = (unsigned long) - alloc_empty_foreign_map_page_range(mmap_pages); -#else /* ! ia64 */ - page = balloon_alloc_empty_page_range(mmap_pages); - ret = -ENOMEM; - if (page == NULL) { - printk("%s balloon_alloc_empty_page_range gave NULL\n", __FUNCTION__); - goto done; - } - - /* Pin all of the pages. */ - for (i=0; i<mmap_pages; i++) - get_page(&page[i]); - - mmap_start[mmap_alloc].start = - (unsigned long)pfn_to_kaddr(page_to_pfn(page)); - mmap_start[mmap_alloc].mpage = page; - -#endif - - pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) * - blkif_reqs, GFP_KERNEL); - pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) * - mmap_pages, GFP_KERNEL); - - ret = -ENOMEM; - if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) { - kfree(pending_reqs[mmap_alloc]); - kfree(pending_addrs[mmap_alloc]); - WPRINTK("%s: out of memory\n", __FUNCTION__); - ret = -ENOMEM; - goto done; - } - - ret = 0; - - DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n", - __FUNCTION__, blkif_reqs, mmap_pages, - mmap_start[mmap_alloc].start); - - BUG_ON(mmap_start[mmap_alloc].start == 0); - - for (i = 0; i < mmap_pages; i++) - pending_addrs[mmap_alloc][i] = - mmap_start[mmap_alloc].start + (i << PAGE_SHIFT); - - for (i = 0; i < MAX_PENDING_REQS ; i++) { + return -EINVAL; + + pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) + * blkif_reqs, GFP_KERNEL); + foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages); + + if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc]) + goto out_of_memory; + + DPRINTK("%s: reqs=%d, pages=%d\n", + __FUNCTION__, blkif_reqs, mmap_pages); + + for (i = 0; i < MAX_PENDING_REQS; i++) { list_add_tail(&pending_reqs[mmap_alloc][i].free_list, &pending_free); pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc; @@ -782,65 +799,28 @@ static int req_increase(void) mmap_alloc++; DPRINTK("# MMAPs increased to %d\n",mmap_alloc); - done: - spin_unlock_irqrestore(&pending_free_lock, flags); - return ret; + return 0; + + out_of_memory: + free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages); + kfree(pending_reqs[mmap_alloc]); + WPRINTK("%s: out of memory\n", __FUNCTION__); + return -ENOMEM; } static void mmap_req_del(int mmap) { - int i; - struct page *page; - - /*Spinlock already acquired*/ + BUG_ON(!spin_is_locked(&pending_free_lock)); + kfree(pending_reqs[mmap]); - kfree(pending_addrs[mmap]); - -#ifdef __ia64__ - /*Not sure what goes here yet!*/ -#else - - /* Unpin all of the pages. */ - page = mmap_start[mmap].mpage; - for (i=0; i<mmap_pages; i++) - put_page(&page[i]); - - balloon_dealloc_empty_page_range(mmap_start[mmap].mpage, mmap_pages); -#endif + pending_reqs[mmap] = NULL; + + free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages); + foreign_pages[mmap] = NULL; mmap_lock = 0; DPRINTK("# MMAPs decreased to %d\n",mmap_alloc); mmap_alloc--; -} - -/*N.B. Currently unused - will be accessed via sysfs*/ -static void req_decrease(void) -{ - pending_req_t *req; - int i; - unsigned long flags; - - spin_lock_irqsave(&pending_free_lock, flags); - - DPRINTK("Req decrease called.\n"); - if (mmap_lock || mmap_alloc == 1) - goto done; - - mmap_lock = 1; - mmap_inuse = MAX_PENDING_REQS; - - /*Go through reqs and remove any that aren't in use*/ - for (i = 0; i < MAX_PENDING_REQS ; i++) { - req = &pending_reqs[mmap_alloc-1][i]; - if (req->inuse == 0) { - list_del(&req->free_list); - mmap_inuse--; - } - } - if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1); - done: - spin_unlock_irqrestore(&pending_free_lock, flags); - return; } static pending_req_t* alloc_req(void) @@ -888,8 +868,8 @@ static void free_req(pending_req_t *req) wake_up(&pending_free_wq); } -static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int - tapidx) +static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, + int tapidx) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; unsigned int i, invcount = 0; @@ -897,49 +877,65 @@ static void fast_flush_area(pending_req_ uint64_t ptep; int ret, mmap_idx; unsigned long kvaddr, uvaddr; - - tap_blkif_t *info = tapfds[tapidx]; + tap_blkif_t *info; - if (info == NULL) { + + info = tapfds[tapidx]; + + if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) { WPRINTK("fast_flush: Couldn't get info!\n"); return; } + + if (info->vma != NULL && + xen_feature(XENFEAT_auto_translated_physmap)) { + down_write(&info->vma->vm_mm->mmap_sem); + zap_page_range(info->vma, + MMAP_VADDR(info->user_vstart, u_idx, 0), + req->nr_pages << PAGE_SHIFT, NULL); + up_write(&info->vma->vm_mm->mmap_sem); + } + mmap_idx = req->mem_idx; for (i = 0; i < req->nr_pages; i++) { - kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i); + kvaddr = idx_to_kaddr(mmap_idx, k_idx, i); uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i); khandle = &pending_handle(mmap_idx, k_idx, i); - if (BLKTAP_INVALID_HANDLE(khandle)) { - WPRINTK("BLKTAP_INVALID_HANDLE\n"); - continue; - } - gnttab_set_unmap_op(&unmap[invcount], - MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i), - GNTMAP_host_map, khandle->kernel); - invcount++; - - if (create_lookup_pte_addr( - info->vma->vm_mm, - MMAP_VADDR(info->user_vstart, u_idx, i), - &ptep) !=0) { - WPRINTK("Couldn't get a pte addr!\n"); - return; - } - - gnttab_set_unmap_op(&unmap[invcount], - ptep, GNTMAP_host_map, - khandle->user); - invcount++; - + + if (khandle->kernel != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&unmap[invcount], + idx_to_kaddr(mmap_idx, k_idx, i), + GNTMAP_host_map, khandle->kernel); + invcount++; + } + + if (khandle->user != INVALID_GRANT_HANDLE) { + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); + if (create_lookup_pte_addr( + info->vma->vm_mm, + MMAP_VADDR(info->user_vstart, u_idx, i), + &ptep) !=0) { + WPRINTK("Couldn't get a pte addr!\n"); + return; + } + + gnttab_set_unmap_op(&unmap[invcount], ptep, + GNTMAP_host_map + | GNTMAP_application_map + | GNTMAP_contains_pte, + khandle->user); + invcount++; + } + BLKTAP_INVALIDATE_HANDLE(khandle); } ret = HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, unmap, invcount); BUG_ON(ret); - if (info->vma != NULL) + if (info->vma != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) zap_page_range(info->vma, MMAP_VADDR(info->user_vstart, u_idx, 0), req->nr_pages << PAGE_SHIFT, NULL); @@ -1002,7 +998,7 @@ int tap_blkif_schedule(void *arg) * COMPLETION CALLBACK -- Called by user level ioctl() */ -static int blktap_read_ufe_ring(int idx) +static int blktap_read_ufe_ring(tap_blkif_t *info) { /* This is called to read responses from the UFE ring. */ RING_IDX i, j, rp; @@ -1010,12 +1006,9 @@ static int blktap_read_ufe_ring(int idx) blkif_t *blkif=NULL; int pending_idx, usr_idx, mmap_idx; pending_req_t *pending_req; - tap_blkif_t *info; - info = tapfds[idx]; - if (info == NULL) { + if (!info) return 0; - } /* We currently only forward packets in INTERCEPT_FE mode. */ if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE)) @@ -1026,11 +1019,14 @@ static int blktap_read_ufe_ring(int idx) rmb(); for (i = info->ufe_ring.rsp_cons; i != rp; i++) { + blkif_response_t res; resp = RING_GET_RESPONSE(&info->ufe_ring, i); + memcpy(&res, resp, sizeof(res)); + mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */ ++info->ufe_ring.rsp_cons; /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/ - usr_idx = (int)resp->id; + usr_idx = (int)res.id; pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); @@ -1053,9 +1049,8 @@ static int blktap_read_ufe_ring(int idx) struct page *pg; int offset; - uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j); - kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, - pending_idx, j); + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, j); pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); ClearPageReserved(pg); @@ -1063,10 +1058,10 @@ static int blktap_read_ufe_ring(int idx) >> PAGE_SHIFT; map[offset] = NULL; } - fast_flush_area(pending_req, pending_idx, usr_idx, idx); - make_response(blkif, pending_req->id, resp->operation, - resp->status); + fast_flush_area(pending_req, pending_idx, usr_idx, info->minor); info->idx_map[usr_idx] = INVALID_REQ; + make_response(blkif, pending_req->id, res.operation, + res.status); blkif_put(pending_req->blkif); free_req(pending_req); } @@ -1100,7 +1095,7 @@ static int do_block_io_op(blkif_t *blkif static int do_block_io_op(blkif_t *blkif) { blkif_back_ring_t *blk_ring = &blkif->blk_ring; - blkif_request_t *req; + blkif_request_t req; pending_req_t *pending_req; RING_IDX rc, rp; int more_to_do = 0; @@ -1111,7 +1106,7 @@ static int do_block_io_op(blkif_t *blkif rmb(); /* Ensure we see queued requests up to 'rp'. */ /*Check blkif has corresponding UE ring*/ - if (blkif->dev_num == -1) { + if (blkif->dev_num < 0) { /*oops*/ if (print_dbug) { WPRINTK("Corresponding UE " @@ -1122,7 +1117,8 @@ static int do_block_io_op(blkif_t *blkif } info = tapfds[blkif->dev_num]; - if (info == NULL || !info->dev_inuse) { + + if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) { if (print_dbug) { WPRINTK("Can't get UE info!\n"); print_dbug = 0; @@ -1152,24 +1148,24 @@ static int do_block_io_op(blkif_t *blkif break; } - req = RING_GET_REQUEST(blk_ring, rc); + memcpy(&req, RING_GET_REQUEST(blk_ring, rc), sizeof(req)); blk_ring->req_cons = ++rc; /* before make_response() */ - switch (req->operation) { + switch (req.operation) { case BLKIF_OP_READ: blkif->st_rd_req++; - dispatch_rw_block_io(blkif, req, pending_req); + dispatch_rw_block_io(blkif, &req, pending_req); break; case BLKIF_OP_WRITE: blkif->st_wr_req++; - dispatch_rw_block_io(blkif, req, pending_req); + dispatch_rw_block_io(blkif, &req, pending_req); break; default: WPRINTK("unknown operation [%d]\n", - req->operation); - make_response(blkif, req->id, req->operation, + req.operation); + make_response(blkif, req.id, req.operation, BLKIF_RSP_ERROR); free_req(pending_req); break; @@ -1190,17 +1186,27 @@ static void dispatch_rw_block_io(blkif_t struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; unsigned int nseg; int ret, i; - tap_blkif_t *info = tapfds[blkif->dev_num]; + tap_blkif_t *info; uint64_t sector; - blkif_request_t *target; int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx); - int usr_idx = GET_NEXT_REQ(info->idx_map); + int usr_idx; uint16_t mmap_idx = pending_req->mem_idx; - /*Check we have space on user ring - should never fail*/ - if(usr_idx == INVALID_REQ) goto fail_flush; - + if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV) + goto fail_response; + + info = tapfds[blkif->dev_num]; + if (info == NULL) + goto fail_response; + + /* Check we have space on user ring - should never fail. */ + usr_idx = GET_NEXT_REQ(info->idx_map); + if (usr_idx == INVALID_REQ) { + BUG(); + goto fail_response; + } + /* Check that number of segments is sane. */ nseg = req->nr_segments; if ( unlikely(nseg == 0) || @@ -1233,15 +1239,12 @@ static void dispatch_rw_block_io(blkif_t unsigned long uvaddr; unsigned long kvaddr; uint64_t ptep; - struct page *page; uint32_t flags; uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i); - kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, - pending_idx, i); - page = virt_to_page(kvaddr); - - sector = req->sector_number + (8*i); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); + + sector = req->sector_number + ((PAGE_SIZE / 512) * i); if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) { WPRINTK("BLKTAP: Sector request greater" "than size\n"); @@ -1251,7 +1254,7 @@ static void dispatch_rw_block_io(blkif_t BLKIF_OP_WRITE ? "WRITE" : "READ"), (long long unsigned) sector, (long long unsigned) sector>>9, - blkif->sectors); + (long long unsigned) blkif->sectors); } flags = GNTMAP_host_map; @@ -1261,71 +1264,123 @@ static void dispatch_rw_block_io(blkif_t req->seg[i].gref, blkif->domid); op++; - /* Now map it to user. */ - ret = create_lookup_pte_addr(info->vma->vm_mm, - uvaddr, &ptep); - if (ret) { - WPRINTK("Couldn't get a pte addr!\n"); - fast_flush_area(pending_req, pending_idx, usr_idx, - blkif->dev_num); - goto fail_flush; - } - - flags = GNTMAP_host_map | GNTMAP_application_map - | GNTMAP_contains_pte; - if (operation == WRITE) - flags |= GNTMAP_readonly; - gnttab_set_map_op(&map[op], ptep, flags, - req->seg[i].gref, blkif->domid); - op++; + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Now map it to user. */ + ret = create_lookup_pte_addr(info->vma->vm_mm, + uvaddr, &ptep); + if (ret) { + WPRINTK("Couldn't get a pte addr!\n"); + goto fail_flush; + } + + flags = GNTMAP_host_map | GNTMAP_application_map + | GNTMAP_contains_pte; + if (operation == WRITE) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[op], ptep, flags, + req->seg[i].gref, blkif->domid); + op++; + } } ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op); BUG_ON(ret); - for (i = 0; i < (nseg*2); i+=2) { - unsigned long uvaddr; - unsigned long kvaddr; - unsigned long offset; - struct page *pg; - - uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2); - kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, - pending_idx, i/2); - - if (unlikely(map[i].status != 0)) { - WPRINTK("invalid kernel buffer -- " - "could not remap it\n"); - goto fail_flush; - } - - if (unlikely(map[i+1].status != 0)) { - WPRINTK("invalid user buffer -- " - "could not remap it\n"); - goto fail_flush; - } - - pending_handle(mmap_idx, pending_idx, i/2).kernel - = map[i].handle; - pending_handle(mmap_idx, pending_idx, i/2).user - = map[i+1].handle; - set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, - FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); - offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; - pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); - ((struct page **)info->vma->vm_private_data)[offset] = - pg; - } + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + for (i = 0; i < (nseg*2); i+=2) { + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long offset; + struct page *pg; + + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i/2); + + if (unlikely(map[i].status != 0)) { + WPRINTK("invalid kernel buffer -- " + "could not remap it\n"); + ret |= 1; + map[i].handle = INVALID_GRANT_HANDLE; + } + + if (unlikely(map[i+1].status != 0)) { + WPRINTK("invalid user buffer -- " + "could not remap it\n"); + ret |= 1; + map[i+1].handle = INVALID_GRANT_HANDLE; + } + + pending_handle(mmap_idx, pending_idx, i/2).kernel + = map[i].handle; + pending_handle(mmap_idx, pending_idx, i/2).user + = map[i+1].handle; + + if (ret) + continue; + + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr + >> PAGE_SHIFT)); + offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + ((struct page **)info->vma->vm_private_data)[offset] = + pg; + } + } else { + for (i = 0; i < nseg; i++) { + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long offset; + struct page *pg; + + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); + + if (unlikely(map[i].status != 0)) { + WPRINTK("invalid kernel buffer -- " + "could not remap it\n"); + ret |= 1; + map[i].handle = INVALID_GRANT_HANDLE; + } + + pending_handle(mmap_idx, pending_idx, i).kernel + = map[i].handle; + + if (ret) + continue; + + offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + ((struct page **)info->vma->vm_private_data)[offset] = + pg; + } + } + + if (ret) + goto fail_flush; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + down_write(&info->vma->vm_mm->mmap_sem); /* Mark mapped pages as reserved: */ for (i = 0; i < req->nr_segments; i++) { unsigned long kvaddr; struct page *pg; - kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, - pending_idx, i); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); SetPageReserved(pg); - } + if (xen_feature(XENFEAT_auto_translated_physmap)) { + ret = vm_insert_page(info->vma, + MMAP_VADDR(info->user_vstart, + usr_idx, i), pg); + if (ret) { + up_write(&info->vma->vm_mm->mmap_sem); + goto fail_flush; + } + } + } + if (xen_feature(XENFEAT_auto_translated_physmap)) + up_write(&info->vma->vm_mm->mmap_sem); /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx); @@ -1336,6 +1391,7 @@ static void dispatch_rw_block_io(blkif_t info->ufe_ring.req_prod_pvt); memcpy(target, req, sizeof(*req)); target->id = usr_idx; + wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */ info->ufe_ring.req_prod_pvt++; return; @@ -1393,7 +1449,6 @@ static int __init blkif_init(void) static int __init blkif_init(void) { int i,ret,blktap_dir; - tap_blkif_t *info; if (!is_running_on_xen()) return -ENODEV; @@ -1413,10 +1468,8 @@ static int __init blkif_init(void) tap_blkif_xenbus_init(); - /*Create the blktap devices, but do not map memory or waitqueue*/ - for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF; - - ret = register_chrdev(BLKTAP_DEV_MAJOR,"blktap",&blktap_fops); + /* Dynamically allocate a major for this device */ + ret = register_chrdev(0, "blktap", &blktap_fops); blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL); if ( (ret < 0)||(blktap_dir < 0) ) { @@ -1424,22 +1477,36 @@ static int __init blkif_init(void) return -ENOMEM; } - for(i = 0; i < MAX_TAP_DEV; i++ ) { - info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL); - if(tapfds[i] == NULL) return -ENOMEM; - info->minor = i; - info->pid = 0; - info->blkif = NULL; - - ret = devfs_mk_cdev(MKDEV(BLKTAP_DEV_MAJOR, i), - S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i); - - if(ret != 0) return -ENOMEM; - info->dev_pending = info->dev_inuse = 0; - - DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); - } - + blktap_major = ret; + + /* tapfds[0] is always NULL */ + blktap_next_minor++; + + ret = devfs_mk_cdev(MKDEV(blktap_major, i), + S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i); + + if(ret != 0) + return -ENOMEM; + + DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); + + /* Make sure the xen class exists */ + if (!setup_xen_class()) { + /* + * This will allow udev to create the blktap ctrl device. + * We only want to create blktap0 first. We don't want + * to flood the sysfs system with needless blktap devices. + * We only create the device when a request of a new device is + * made. + */ + class_device_create(xen_class, NULL, + MKDEV(blktap_major, 0), NULL, + "blktap0"); + } else { + /* this is bad, but not fatal */ + WPRINTK("blktap: sysfs xen_class not created\n"); + } + DPRINTK("Blktap device successfully created\n"); return 0; diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Wed Nov 29 14:16:36 2006 -0600 @@ -189,7 +189,7 @@ static int blktap_probe(struct xenbus_de return 0; fail: - DPRINTK("blktap probe failed"); + DPRINTK("blktap probe failed\n"); blktap_remove(dev); return err; } @@ -243,7 +243,7 @@ static void tap_frontend_changed(struct struct backend_info *be = dev->dev.driver_data; int err; - DPRINTK(""); + DPRINTK("\n"); switch (frontend_state) { case XenbusStateInitialising: @@ -273,7 +273,6 @@ static void tap_frontend_changed(struct kthread_stop(be->blkif->xenblkd); be->blkif->xenblkd = NULL; } - tap_blkif_unmap(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; @@ -319,7 +318,7 @@ static int connect_ring(struct backend_i unsigned int evtchn; int err; - DPRINTK("%s", dev->otherend); + DPRINTK("%s\n", dev->otherend); err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, "event-channel", "%u", &evtchn, NULL); diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/char/mem.c --- a/linux-2.6-xen-sparse/drivers/xen/char/mem.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/char/mem.c Wed Nov 29 14:16:36 2006 -0600 @@ -28,13 +28,12 @@ #include <asm/io.h> #include <asm/hypervisor.h> -static inline int uncached_access(struct file *file) -{ - if (file->f_flags & O_SYNC) - return 1; - /* Xen sets correct MTRR type on non-RAM for us. */ - return 0; -} +#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE +static inline int valid_phys_addr_range(unsigned long addr, size_t *count) +{ + return 1; +} +#endif /* * This funcion reads the *physical* memory. The f_pos points directly to the @@ -46,6 +45,9 @@ static ssize_t read_mem(struct file * fi unsigned long p = *ppos, ignored; ssize_t read = 0, sz; void __iomem *v; + + if (!valid_phys_addr_range(p, &count)) + return -EFAULT; while (count > 0) { /* @@ -58,13 +60,15 @@ static ssize_t read_mem(struct file * fi sz = min_t(unsigned long, sz, count); - if ((v = ioremap(p, sz)) == NULL) { + v = xlate_dev_mem_ptr(p, sz); + if (IS_ERR(v) || v == NULL) { /* - * Some programs (e.g., dmidecode) groove off into weird RAM - * areas where no tables can possibly exist (because Xen will - * have stomped on them!). These programs get rather upset if - * we let them know that Xen failed their access, so we fake - * out a read of all zeroes. :-) + * Some programs (e.g., dmidecode) groove off into + * weird RAM areas where no tables can possibly exist + * (because Xen will have stomped on them!). These + * programs get rather upset if we let them know that + * Xen failed their access, so we fake out a read of + * all zeroes. */ if (clear_user(buf, count)) return -EFAULT; @@ -73,7 +77,7 @@ static ssize_t read_mem(struct file * fi } ignored = copy_to_user(buf, v, sz); - iounmap(v); + xlate_dev_mem_ptr_unmap(v); if (ignored) return -EFAULT; buf += sz; @@ -92,6 +96,9 @@ static ssize_t write_mem(struct file * f unsigned long p = *ppos, ignored; ssize_t written = 0, sz; void __iomem *v; + + if (!valid_phys_addr_range(p, &count)) + return -EFAULT; while (count > 0) { /* @@ -104,11 +111,17 @@ static ssize_t write_mem(struct file * f sz = min_t(unsigned long, sz, count); - if ((v = ioremap(p, sz)) == NULL) - break; + v = xlate_dev_mem_ptr(p, sz); + if (v == NULL) + break; + if (IS_ERR(v)) { + if (written == 0) + return PTR_ERR(v); + break; + } ignored = copy_from_user(v, buf, sz); - iounmap(v); + xlate_dev_mem_ptr_unmap(v); if (ignored) { written += sz - ignored; if (written) @@ -125,6 +138,15 @@ static ssize_t write_mem(struct file * f return written; } +#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM +static inline int uncached_access(struct file *file) +{ + if (file->f_flags & O_SYNC) + return 1; + /* Xen sets correct MTRR type on non-RAM for us. */ + return 0; +} + static int mmap_mem(struct file * file, struct vm_area_struct * vma) { size_t size = vma->vm_end - vma->vm_start; @@ -136,6 +158,7 @@ static int mmap_mem(struct file * file, return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, vma->vm_page_prot, DOMID_IO); } +#endif /* * The memory devices use the full 32/64 bits of the offset, and so we cannot diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/console/console.c --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Wed Nov 29 14:16:36 2006 -0600 @@ -49,6 +49,7 @@ #include <linux/console.h> #include <linux/bootmem.h> #include <linux/sysrq.h> +#include <linux/screen_info.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/uaccess.h> @@ -262,6 +263,41 @@ void xencons_force_flush(void) sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); if (sent > 0) wc += sent; + } +} + + +void dom0_init_screen_info(const struct dom0_vga_console_info *info) +{ + switch (info->video_type) { + case XEN_VGATYPE_TEXT_MODE_3: + screen_info.orig_video_mode = 3; + screen_info.orig_video_ega_bx = 3; + screen_info.orig_video_isVGA = 1; + screen_info.orig_video_lines = info->u.text_mode_3.rows; + screen_info.orig_video_cols = info->u.text_mode_3.columns; + screen_info.orig_x = info->u.text_mode_3.cursor_x; + screen_info.orig_y = info->u.text_mode_3.cursor_y; + screen_info.orig_video_points = + info->u.text_mode_3.font_height; + break; + case XEN_VGATYPE_VESA_LFB: + screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB; + screen_info.lfb_width = info->u.vesa_lfb.width; + screen_info.lfb_height = info->u.vesa_lfb.height; + screen_info.lfb_depth = info->u.vesa_lfb.bits_per_pixel; + screen_info.lfb_base = info->u.vesa_lfb.lfb_base; + screen_info.lfb_size = info->u.vesa_lfb.lfb_size; + screen_info.lfb_linelength = info->u.vesa_lfb.bytes_per_line; + screen_info.red_size = info->u.vesa_lfb.red_size; + screen_info.red_pos = info->u.vesa_lfb.red_pos; + screen_info.green_size = info->u.vesa_lfb.green_size; + screen_info.green_pos = info->u.vesa_lfb.green_pos; + screen_info.blue_size = info->u.vesa_lfb.blue_size; + screen_info.blue_pos = info->u.vesa_lfb.blue_pos; + screen_info.rsvd_size = info->u.vesa_lfb.rsvd_size; + screen_info.rsvd_pos = info->u.vesa_lfb.rsvd_pos; + break; } } diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/core/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile Wed Nov 29 14:16:36 2006 -0600 @@ -9,5 +9,5 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o obj-$(CONFIG_XEN_SKBUFF) += skbuff.o -obj-$(CONFIG_XEN_REBOOT) += reboot.o +obj-$(CONFIG_XEN_REBOOT) += reboot.o machine_reboot.o obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/core/features.c --- a/linux-2.6-xen-sparse/drivers/xen/core/features.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/core/features.c Wed Nov 29 14:16:36 2006 -0600 @@ -10,6 +10,10 @@ #include <linux/module.h> #include <asm/hypervisor.h> #include <xen/features.h> + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; /* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */ diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Wed Nov 29 14:16:36 2006 -0600 @@ -44,6 +44,10 @@ #include <asm/io.h> #include <xen/interface/memory.h> +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif + /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Wed Nov 29 14:16:36 2006 -0600 @@ -1,25 +1,15 @@ #define __KERNEL_SYSCALLS__ #include <linux/version.h> #include <linux/kernel.h> -#include <linux/mm.h> #include <linux/unistd.h> #include <linux/module.h> #include <linux/reboot.h> #include <linux/sysrq.h> -#include <linux/stringify.h> -#include <asm/irq.h> -#include <asm/mmu_context.h> -#include <xen/evtchn.h> #include <asm/hypervisor.h> -#include <xen/interface/dom0_ops.h> #include <xen/xenbus.h> -#include <linux/cpu.h> #include <linux/kthread.h> -#include <xen/gnttab.h> -#include <xen/xencons.h> -#include <xen/cpu_hotplug.h> - -extern void ctrl_alt_del(void); + +MODULE_LICENSE("Dual BSD/GPL"); #define SHUTDOWN_INVALID -1 #define SHUTDOWN_POWEROFF 0 @@ -31,185 +21,17 @@ extern void ctrl_alt_del(void); */ #define SHUTDOWN_HALT 4 -#if defined(__i386__) || defined(__x86_64__) - -/* - * Power off function, if any - */ -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -void machine_emergency_restart(void) -{ - /* We really want to get pending console data out before we die. */ - xencons_force_flush(); - HYPERVISOR_shutdown(SHUTDOWN_reboot); -} - -void machine_restart(char * __unused) -{ - machine_emergency_restart(); -} - -void machine_halt(void) -{ - machine_power_off(); -} - -void machine_power_off(void) -{ - /* We really want to get pending console data out before we die. */ - xencons_force_flush(); - if (pm_power_off) - pm_power_off(); - HYPERVISOR_shutdown(SHUTDOWN_poweroff); -} - -int reboot_thru_bios = 0; /* for dmi_scan.c */ -EXPORT_SYMBOL(machine_restart); -EXPORT_SYMBOL(machine_halt); -EXPORT_SYMBOL(machine_power_off); - -#endif /* defined(__i386__) || defined(__x86_64__) */ - -/****************************************************************************** - * Stop/pickle callback handling. - */ - /* Ignore multiple shutdown requests. */ static int shutting_down = SHUTDOWN_INVALID; + static void __shutdown_handler(void *unused); static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); -#if defined(__i386__) || defined(__x86_64__) - -/* Ensure we run on the idle task page tables so that we will - switch page tables before running user space. This is needed - on architectures with separate kernel and user page tables - because the user page table pointer is not saved/restored. */ -static void switch_idle_mm(void) -{ - struct mm_struct *mm = current->active_mm; - - if (mm == &init_mm) - return; - - atomic_inc(&init_mm.mm_count); - switch_mm(mm, &init_mm, current); - current->active_mm = &init_mm; - mmdrop(mm); -} - -static void pre_suspend(void) -{ - HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; - clear_fixmap(FIX_SHARED_INFO); - - xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); - xen_start_info->console.domU.mfn = - mfn_to_pfn(xen_start_info->console.domU.mfn); -} - -static void post_suspend(void) -{ - int i, j, k, fpp; - extern unsigned long max_pfn; - extern unsigned long *pfn_to_mfn_frame_list_list; - extern unsigned long *pfn_to_mfn_frame_list[]; - - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); - - HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - - memset(empty_zero_page, 0, PAGE_SIZE); - - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = - virt_to_mfn(pfn_to_mfn_frame_list_list); - - fpp = PAGE_SIZE/sizeof(unsigned long); - for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) { - if ((j % fpp) == 0) { - k++; - pfn_to_mfn_frame_list_list[k] = - virt_to_mfn(pfn_to_mfn_frame_list[k]); - j = 0; - } - pfn_to_mfn_frame_list[k][j] = - virt_to_mfn(&phys_to_machine_mapping[i]); - } - HYPERVISOR_shared_info->arch.max_pfn = max_pfn; -} - -#else /* !(defined(__i386__) || defined(__x86_64__)) */ - -#define switch_idle_mm() ((void)0) -#define mm_pin_all() ((void)0) -#define pre_suspend() ((void)0) -#define post_suspend() ((void)0) - +#ifdef CONFIG_XEN +int __xen_suspend(void); +#else +#define __xen_suspend() (void)0 #endif - -static int __do_suspend(void *ignore) -{ - int err; - - extern void time_resume(void); - - BUG_ON(smp_processor_id() != 0); - BUG_ON(in_interrupt()); - -#if defined(__i386__) || defined(__x86_64__) - if (xen_feature(XENFEAT_auto_translated_physmap)) { - printk(KERN_WARNING "Cannot suspend in " - "auto_translated_physmap mode.\n"); - return -EOPNOTSUPP; - } -#endif - - err = smp_suspend(); - if (err) - return err; - - xenbus_suspend(); - - preempt_disable(); - - mm_pin_all(); - local_irq_disable(); - preempt_enable(); - - gnttab_suspend(); - - pre_suspend(); - - /* - * We'll stop somewhere inside this hypercall. When it returns, - * we'll start resuming after the restore. - */ - HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); - - shutting_down = SHUTDOWN_INVALID; - - post_suspend(); - - gnttab_resume(); - - irq_resume(); - - time_resume(); - - switch_idle_mm(); - - local_irq_enable(); - - xencons_resume(); - - xenbus_resume(); - - smp_resume(); - - return err; -} static int shutdown_process(void *__unused) { @@ -222,16 +44,25 @@ static int shutdown_process(void *__unus if ((shutting_down == SHUTDOWN_POWEROFF) || (shutting_down == SHUTDOWN_HALT)) { - if (execve("/sbin/poweroff", poweroff_argv, envp) < 0) { + if (call_usermodehelper("/sbin/poweroff", poweroff_argv, envp, 0) < 0) { +#ifdef CONFIG_XEN sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_POWER_OFF, NULL); +#endif /* CONFIG_XEN */ } } shutting_down = SHUTDOWN_INVALID; /* could try again */ + return 0; +} + +static int xen_suspend(void *__unused) +{ + __xen_suspend(); + shutting_down = SHUTDOWN_INVALID; return 0; } @@ -257,7 +88,7 @@ static void __shutdown_handler(void *unu err = kernel_thread(shutdown_process, NULL, CLONE_FS | CLONE_FILES); else - err = kthread_create_on_cpu(__do_suspend, NULL, "suspend", 0); + err = kthread_create_on_cpu(xen_suspend, NULL, "suspend", 0); if (err < 0) { printk(KERN_WARNING "Error creating shutdown process (%d): " @@ -298,7 +129,7 @@ static void shutdown_handler(struct xenb if (strcmp(str, "poweroff") == 0) shutting_down = SHUTDOWN_POWEROFF; else if (strcmp(str, "reboot") == 0) - ctrl_alt_del(); + kill_proc(1, SIGINT, 1); /* interrupt init */ else if (strcmp(str, "suspend") == 0) shutting_down = SHUTDOWN_SUSPEND; else if (strcmp(str, "halt") == 0) @@ -364,10 +195,14 @@ static int setup_shutdown_watcher(struct err = register_xenbus_watch(&shutdown_watch); if (err) printk(KERN_ERR "Failed to set shutdown watcher\n"); + else + xenbus_write(XBT_NIL, "control", "feature-reboot", "1"); err = register_xenbus_watch(&sysrq_watch); if (err) printk(KERN_ERR "Failed to set sysrq watcher\n"); + else + xenbus_write(XBT_NIL, "control", "feature-sysrq", "1"); return NOTIFY_DONE; } @@ -378,6 +213,7 @@ static int __init setup_shutdown_event(v .notifier_call = setup_shutdown_watcher }; register_xenstore_notifier(&xenstore_notifier); + return 0; } diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/core/skbuff.c --- a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c Wed Nov 29 14:16:36 2006 -0600 @@ -18,7 +18,12 @@ /*static*/ kmem_cache_t *skbuff_cachep; EXPORT_SYMBOL(skbuff_cachep); -#define MAX_SKBUFF_ORDER 4 +/* Allow up to 64kB or page-sized packets (whichever is greater). */ +#if PAGE_SHIFT < 16 +#define MAX_SKBUFF_ORDER (16 - PAGE_SHIFT) +#else +#define MAX_SKBUFF_ORDER 0 +#endif static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1]; static struct { diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Wed Nov 29 14:16:36 2006 -0600 @@ -419,10 +419,9 @@ static struct file_operations evtchn_fop }; static struct miscdevice evtchn_miscdev = { - .minor = EVTCHN_MINOR, + .minor = MISC_DYNAMIC_MINOR, .name = "evtchn", .fops = &evtchn_fops, - .devfs_name = "misc/evtchn", }; static int __init evtchn_init(void) diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/netback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Wed Nov 29 14:16:36 2006 -0600 @@ -92,6 +92,9 @@ typedef struct netif_st { unsigned long remaining_credit; struct timer_list credit_timeout; + /* Enforce draining of the transmit queue. */ + struct timer_list tx_queue_timeout; + /* Miscellaneous private stuff. */ struct list_head list; /* scheduling list */ atomic_t refcnt; @@ -106,7 +109,7 @@ typedef struct netif_st { void netif_disconnect(netif_t *netif); -netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]); +netif_t *netif_alloc(domid_t domid, unsigned int handle); int netif_map(netif_t *netif, unsigned long tx_ring_ref, unsigned long rx_ring_ref, unsigned int evtchn); @@ -118,6 +121,8 @@ int netif_map(netif_t *netif, unsigned l } while (0) void netif_xenbus_init(void); + +#define netif_schedulable(dev) (netif_running(dev) && netif_carrier_ok(dev)) void netif_schedule_work(netif_t *netif); void netif_deschedule_work(netif_t *netif); diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Wed Nov 29 14:16:36 2006 -0600 @@ -34,6 +34,23 @@ #include <linux/ethtool.h> #include <linux/rtnetlink.h> +/* + * Module parameter 'queue_length': + * + * Enables queuing in the network stack when a client has run out of receive + * descriptors. Although this feature can improve receive bandwidth by avoiding + * packet loss, it can also result in packets sitting in the 'tx_queue' for + * unbounded time. This is bad if those packets hold onto foreign resources. + * For example, consider a packet that holds onto resources belonging to the + * guest for which it is queued (e.g., packet received on vif1.0, destined for + * vif1.1 which is not activated in the guest): in this situation the guest + * will never be destroyed, unless vif1.1 is taken down. To avoid this, we + * run a timer (tx_queue_timeout) to drain the queue when the interface is + * blocked. + */ +static unsigned long netbk_queue_length = 32; +module_param_named(queue_length, netbk_queue_length, ulong, 0); + static void __netif_up(netif_t *netif) { enable_irq(netif->irq); @@ -107,9 +124,9 @@ static struct ethtool_ops network_ethtoo .get_link = ethtool_op_get_link, }; -netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]) -{ - int err = 0, i; +netif_t *netif_alloc(domid_t domid, unsigned int handle) +{ + int err = 0; struct net_device *dev; netif_t *netif; char name[IFNAMSIZ] = {}; @@ -134,6 +151,10 @@ netif_t *netif_alloc(domid_t domid, unsi netif->credit_bytes = netif->remaining_credit = ~0UL; netif->credit_usec = 0UL; init_timer(&netif->credit_timeout); + /* Initialize 'expires' now: it's used to track the credit window. */ + netif->credit_timeout.expires = jiffies; + + init_timer(&netif->tx_queue_timeout); dev->hard_start_xmit = netif_be_start_xmit; dev->get_stats = netif_be_get_stats; @@ -144,26 +165,16 @@ netif_t *netif_alloc(domid_t domid, unsi SET_ETHTOOL_OPS(dev, &network_ethtool_ops); + dev->tx_queue_len = netbk_queue_length; + /* - * Reduce default TX queuelen so that each guest interface only - * allows it to eat around 6.4MB of host memory. - */ - dev->tx_queue_len = 100; - - for (i = 0; i < ETH_ALEN; i++) - if (be_mac[i] != 0) - break; - if (i == ETH_ALEN) { - /* - * Initialise a dummy MAC address. We choose the numerically - * largest non-broadcast address to prevent the address getting - * stolen by an Ethernet bridge for STP purposes. - * (FE:FF:FF:FF:FF:FF) - */ - memset(dev->dev_addr, 0xFF, ETH_ALEN); - dev->dev_addr[0] &= ~0x01; - } else - memcpy(dev->dev_addr, be_mac, ETH_ALEN); + * Initialise a dummy MAC address. We choose the numerically + * largest non-broadcast address to prevent the address getting + * stolen by an Ethernet bridge for STP purposes. + * (FE:FF:FF:FF:FF:FF) + */ + memset(dev->dev_addr, 0xFF, ETH_ALEN); + dev->dev_addr[0] &= ~0x01; rtnl_lock(); err = register_netdevice(dev); @@ -306,25 +317,6 @@ err_rx: return err; } -static void netif_free(netif_t *netif) -{ - atomic_dec(&netif->refcnt); - wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); - - if (netif->irq) - unbind_from_irqhandler(netif->irq, netif); - - unregister_netdev(netif->dev); - - if (netif->tx.sring) { - unmap_frontend_pages(netif); - free_vm_area(netif->tx_comms_area); - free_vm_area(netif->rx_comms_area); - } - - free_netdev(netif->dev); -} - void netif_disconnect(netif_t *netif) { if (netif_carrier_ok(netif->dev)) { @@ -335,5 +327,23 @@ void netif_disconnect(netif_t *netif) rtnl_unlock(); netif_put(netif); } - netif_free(netif); -} + + atomic_dec(&netif->refcnt); + wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); + + del_timer_sync(&netif->credit_timeout); + del_timer_sync(&netif->tx_queue_timeout); + + if (netif->irq) + unbind_from_irqhandler(netif->irq, netif); + + unregister_netdev(netif->dev); + + if (netif->tx.sring) { + unmap_frontend_pages(netif); + free_vm_area(netif->tx_comms_area); + free_vm_area(netif->rx_comms_area); + } + + free_netdev(netif->dev); +} diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/netback/loopback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Wed Nov 29 14:16:36 2006 -0600 @@ -53,8 +53,10 @@ #include <linux/skbuff.h> #include <linux/ethtool.h> #include <net/dst.h> - -static int nloopbacks = 8; +#include <net/xfrm.h> /* secpath_reset() */ +#include <asm/hypervisor.h> /* is_initial_xendomain() */ + +static int nloopbacks = -1; module_param(nloopbacks, int, 0); MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create"); @@ -77,9 +79,59 @@ static int loopback_close(struct net_dev return 0; } +#ifdef CONFIG_X86 +static int is_foreign(unsigned long pfn) +{ + /* NB. Play it safe for auto-translation mode. */ + return (xen_feature(XENFEAT_auto_translated_physmap) || + (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT)); +} +#else +/* How to detect a foreign mapping? Play it safe. */ +#define is_foreign(pfn) (1) +#endif + +static int skb_remove_foreign_references(struct sk_buff *skb) +{ + struct page *page; + unsigned long pfn; + int i, off; + char *vaddr; + + BUG_ON(skb_shinfo(skb)->frag_list); + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page); + if (!is_foreign(pfn)) + continue; + + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!page)) + return 0; + + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + off = skb_shinfo(skb)->frags[i].page_offset; + memcpy(page_address(page) + off, + vaddr + off, + skb_shinfo(skb)->frags[i].size); + kunmap_skb_frag(vaddr); + + put_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->frags[i].page = page; + } + + return 1; +} + static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_private *np = netdev_priv(dev); + + if (!skb_remove_foreign_references(skb)) { + np->stats.tx_dropped++; + dev_kfree_skb(skb); + return 0; + } dst_release(skb->dst); skb->dst = NULL; @@ -110,6 +162,11 @@ static int loopback_start_xmit(struct sk skb->protocol = eth_type_trans(skb, dev); skb->dev = dev; dev->last_rx = jiffies; + + /* Flush netfilter context: rx'ed skbuffs not expected to have any. */ + nf_reset(skb); + secpath_reset(skb); + netif_rx(skb); return 0; @@ -239,6 +296,9 @@ static int __init loopback_init(void) { int i, err = 0; + if (nloopbacks == -1) + nloopbacks = is_initial_xendomain() ? 4 : 0; + for (i = 0; i < nloopbacks; i++) if ((err = make_loopback(i)) != 0) break; diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Wed Nov 29 14:16:36 2006 -0600 @@ -70,14 +70,15 @@ static struct timer_list net_timer; static struct sk_buff_head rx_queue; -static unsigned long mmap_vstart; -#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) - -static void *rx_mmap_area; +static struct page **mmap_pages; +static inline unsigned long idx_to_kaddr(unsigned int idx) +{ + return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx])); +} #define PKT_PROT_LEN 64 -static struct { +static struct pending_tx_info { netif_tx_request_t req; netif_t *netif; } pending_tx_info[MAX_PENDING_REQS]; @@ -186,7 +187,7 @@ static struct sk_buff *netbk_copy_skb(st if (unlikely(!nskb)) goto err; - skb_reserve(nskb, 16); + skb_reserve(nskb, 16 + NET_IP_ALIGN); headlen = nskb->end - nskb->data; if (headlen > skb_headlen(skb)) headlen = skb_headlen(skb); @@ -217,7 +218,7 @@ static struct sk_buff *netbk_copy_skb(st copy = len >= PAGE_SIZE ? PAGE_SIZE : len; zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; - page = alloc_page(GFP_ATOMIC | zero); + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero); if (unlikely(!page)) goto err_free; @@ -263,6 +264,13 @@ static inline int netbk_queue_full(netif ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); } +static void tx_queue_callback(unsigned long data) +{ + netif_t *netif = (netif_t *)data; + if (netif_schedulable(netif->dev)) + netif_wake_queue(netif->dev); +} + int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) { netif_t *netif = netdev_priv(dev); @@ -270,20 +278,13 @@ int netif_be_start_xmit(struct sk_buff * BUG_ON(skb->dev != dev); /* Drop the packet if the target domain has no receive buffers. */ - if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) + if (unlikely(!netif_schedulable(dev) || netbk_queue_full(netif))) goto drop; - if (unlikely(netbk_queue_full(netif))) { - /* Not a BUG_ON() -- misbehaving netfront can trigger this. */ - if (netbk_can_queue(dev)) - DPRINTK("Queue full but not stopped!\n"); - goto drop; - } - - /* Copy the packet here if it's destined for a flipping - interface but isn't flippable (e.g. extra references to - data) - */ + /* + * Copy the packet here if it's destined for a flipping interface + * but isn't flippable (e.g. extra references to data). + */ if (!netif->copying_receiver && !is_flippable_skb(skb)) { struct sk_buff *nskb = netbk_copy_skb(skb); if ( unlikely(nskb == NULL) ) @@ -304,8 +305,19 @@ int netif_be_start_xmit(struct sk_buff * netif->rx.sring->req_event = netif->rx_req_cons_peek + netbk_max_required_rx_slots(netif); mb(); /* request notification /then/ check & stop the queue */ - if (netbk_queue_full(netif)) + if (netbk_queue_full(netif)) { netif_stop_queue(dev); + /* + * Schedule 500ms timeout to restart the queue, thus + * ensuring that an inactive queue will be drained. + * Packets will be immediately be dropped until more + * receive buffers become available (see + * netbk_queue_full() check above). + */ + netif->tx_queue_timeout.data = (unsigned long)netif; + netif->tx_queue_timeout.function = tx_queue_callback; + __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); + } } skb_queue_tail(&rx_queue, skb); @@ -373,14 +385,22 @@ static u16 netbk_gop_frag(netif_t *netif flipped. */ meta->copy = 1; copy_gop = npo->copy + npo->copy_prod++; - copy_gop->source.domid = DOMID_SELF; + copy_gop->flags = GNTCOPY_dest_gref; + if (PageForeign(page)) { + struct pending_tx_info *src_pend = + &pending_tx_info[page->index]; + copy_gop->source.domid = src_pend->netif->domid; + copy_gop->source.u.ref = src_pend->req.gref; + copy_gop->flags |= GNTCOPY_source_gref; + } else { + copy_gop->source.domid = DOMID_SELF; + copy_gop->source.u.gmfn = old_mfn; + } copy_gop->source.offset = offset; - copy_gop->source.u.gmfn = old_mfn; copy_gop->dest.domid = netif->domid; copy_gop->dest.offset = 0; copy_gop->dest.u.ref = req->gref; copy_gop->len = size; - copy_gop->flags = GNTCOPY_dest_gref; } else { meta->copy = 0; if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -474,7 +494,7 @@ static int netbk_check_gop(int nr_frags, copy_op = npo->copy + npo->copy_cons++; if (copy_op->status != GNTST_okay) { DPRINTK("Bad status %d from copy to DOM%d.\n", - gop->status, domid); + copy_op->status, domid); status = NETIF_RSP_ERROR; } } else { @@ -697,6 +717,7 @@ static void net_rx_action(unsigned long } if (netif_queue_stopped(netif->dev) && + netif_schedulable(netif->dev) && !netbk_queue_full(netif)) netif_wake_queue(netif->dev); @@ -754,8 +775,7 @@ static void add_to_net_schedule_list_tai spin_lock_irq(&net_schedule_list_lock); if (!__on_net_schedule_list(netif) && - likely(netif_running(netif->dev) && - netif_carrier_ok(netif->dev))) { + likely(netif_schedulable(netif->dev))) { list_add_tail(&netif->list, &net_schedule_list); netif_get(netif); } @@ -792,10 +812,30 @@ void netif_deschedule_work(netif_t *neti } +static void tx_add_credit(netif_t *netif) +{ + unsigned long max_burst, max_credit; + + /* + * Allow a burst big enough to transmit a jumbo packet of up to 128kB. + * Otherwise the interface can seize up due to insufficient credit. + */ + max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size; + max_burst = min(max_burst, 131072UL); + max_burst = max(max_burst, netif->credit_bytes); + + /* Take care that adding a new chunk of credit doesn't wrap to zero. */ + max_credit = netif->remaining_credit + netif->credit_bytes; + if (max_credit < netif->remaining_credit) + max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ + + netif->remaining_credit = min(max_credit, max_burst); +} + static void tx_credit_callback(unsigned long data) { netif_t *netif = (netif_t *)data; - netif->remaining_credit = netif->credit_bytes; + tx_add_credit(netif); netif_schedule_work(netif); } @@ -819,7 +859,7 @@ inline static void net_tx_action_dealloc gop = tx_unmap_ops; while (dc != dp) { pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; - gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx), + gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx), GNTMAP_host_map, grant_tx_handle[pending_idx]); gop++; @@ -857,20 +897,28 @@ static void netbk_tx_err(netif_t *netif, netif_put(netif); } -static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp, - int work_to_do) -{ - netif_tx_request_t *first = txp; +static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first, + netif_tx_request_t *txp, int work_to_do) +{ RING_IDX cons = netif->tx.req_cons; int frags = 0; - while (txp->flags & NETTXF_more_data) { + if (!(first->flags & NETTXF_more_data)) + return 0; + + do { if (frags >= work_to_do) { DPRINTK("Need more frags\n"); return -frags; } - txp = RING_GET_REQUEST(&netif->tx, cons + frags); + if (unlikely(frags >= MAX_SKB_FRAGS)) { + DPRINTK("Too many frags\n"); + return -frags; + } + + memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags), + sizeof(*txp)); if (txp->size > first->size) { DPRINTK("Frags galore\n"); return -frags; @@ -884,30 +932,28 @@ static int netbk_count_requests(netif_t txp->offset, txp->size); return -frags; } - } + } while ((txp++)->flags & NETTXF_more_data); return frags; } static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif, struct sk_buff *skb, + netif_tx_request_t *txp, gnttab_map_grant_ref_t *mop) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; - netif_tx_request_t *txp; unsigned long pending_idx = *((u16 *)skb->data); - RING_IDX cons = netif->tx.req_cons; int i, start; /* Skip first skb fragment if it is on same page as header fragment. */ start = ((unsigned long)shinfo->frags[0].page == pending_idx); - for (i = start; i < shinfo->nr_frags; i++) { - txp = RING_GET_REQUEST(&netif->tx, cons++); + for (i = start; i < shinfo->nr_frags; i++, txp++) { pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)]; - gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx), + gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx), GNTMAP_host_map | GNTMAP_readonly, txp->gref, netif->domid); @@ -940,7 +986,7 @@ static int netbk_tx_check_mop(struct sk_ netif_put(netif); } else { set_phys_to_machine( - __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT, + __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT, FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); grant_tx_handle[pending_idx] = mop->handle; } @@ -957,7 +1003,7 @@ static int netbk_tx_check_mop(struct sk_ newerr = (++mop)->status; if (likely(!newerr)) { set_phys_to_machine( - __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT, + __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT, FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); grant_tx_handle[pending_idx] = mop->handle; /* Had a previous error? Invalidate this fragment. */ @@ -1005,7 +1051,7 @@ static void netbk_fill_frags(struct sk_b pending_idx = (unsigned long)frag->page; txp = &pending_tx_info[pending_idx].req; - frag->page = virt_to_page(MMAP_VADDR(pending_idx)); + frag->page = virt_to_page(idx_to_kaddr(pending_idx)); frag->size = txp->size; frag->page_offset = txp->offset; @@ -1018,7 +1064,7 @@ int netbk_get_extras(netif_t *netif, str int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras, int work_to_do) { - struct netif_extra_info *extra; + struct netif_extra_info extra; RING_IDX cons = netif->tx.req_cons; do { @@ -1027,18 +1073,18 @@ int netbk_get_extras(netif_t *netif, str return -EBADR; } - extra = (struct netif_extra_info *) - RING_GET_REQUEST(&netif->tx, cons); - if (unlikely(!extra->type || - extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons), + sizeof(extra)); + if (unlikely(!extra.type || + extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { netif->tx.req_cons = ++cons; - DPRINTK("Invalid extra type: %d\n", extra->type); + DPRINTK("Invalid extra type: %d\n", extra.type); return -EINVAL; } - memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); + memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); netif->tx.req_cons = ++cons; - } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); + } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); return work_to_do; } @@ -1073,6 +1119,7 @@ static void net_tx_action(unsigned long struct sk_buff *skb; netif_t *netif; netif_tx_request_t txreq; + netif_tx_request_t txfrags[MAX_SKB_FRAGS]; struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; u16 pending_idx; RING_IDX i; @@ -1101,6 +1148,7 @@ static void net_tx_action(unsigned long i = netif->tx.req_cons; rmb(); /* Ensure that we see the request before we copy it. */ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); + /* Credit-based scheduling. */ if (txreq.size > netif->remaining_credit) { unsigned long now = jiffies; @@ -1109,25 +1157,27 @@ static void net_tx_action(unsigned long msecs_to_jiffies(netif->credit_usec / 1000); /* Timer could already be pending in rare cases. */ - if (timer_pending(&netif->credit_timeout)) - break; + if (timer_pending(&netif->credit_timeout)) { + netif_put(netif); + continue; + } /* Passed the point where we can replenish credit? */ if (time_after_eq(now, next_credit)) { netif->credit_timeout.expires = now; - netif->remaining_credit = netif->credit_bytes; + tx_add_credit(netif); } /* Still too big to send right now? Set a callback. */ if (txreq.size > netif->remaining_credit) { - netif->remaining_credit = 0; netif->credit_timeout.data = (unsigned long)netif; netif->credit_timeout.function = tx_credit_callback; __mod_timer(&netif->credit_timeout, next_credit); - break; + netif_put(netif); + continue; } } netif->remaining_credit -= txreq.size; @@ -1146,18 +1196,12 @@ static void net_tx_action(unsigned long } } - ret = netbk_count_requests(netif, &txreq, work_to_do); + ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do); if (unlikely(ret < 0)) { netbk_tx_err(netif, &txreq, i - ret); continue; } i += ret; - - if (unlikely(ret > MAX_SKB_FRAGS)) { - DPRINTK("Too many frags\n"); - netbk_tx_err(netif, &txreq, i); - continue; - } if (unlikely(txreq.size < ETH_HLEN)) { DPRINTK("Bad packet size: %d\n", txreq.size); @@ -1180,7 +1224,7 @@ static void net_tx_action(unsigned long ret < MAX_SKB_FRAGS) ? PKT_PROT_LEN : txreq.size; - skb = alloc_skb(data_len+16, GFP_ATOMIC); + skb = alloc_skb(data_len + 16 + NET_IP_ALIGN, GFP_ATOMIC); if (unlikely(skb == NULL)) { DPRINTK("Can't allocate a skb in start_xmit.\n"); netbk_tx_err(netif, &txreq, i); @@ -1188,7 +1232,7 @@ static void net_tx_action(unsigned long } /* Packets passed to netif_rx() must have some headroom. */ - skb_reserve(skb, 16); + skb_reserve(skb, 16 + NET_IP_ALIGN); if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { struct netif_extra_info *gso; @@ -1201,7 +1245,7 @@ static void net_tx_action(unsigned long } } - gnttab_set_map_op(mop, MMAP_VADDR(pending_idx), + gnttab_set_map_op(mop, idx_to_kaddr(pending_idx), GNTMAP_host_map | GNTMAP_readonly, txreq.gref, netif->domid); mop++; @@ -1227,7 +1271,7 @@ static void net_tx_action(unsigned long pending_cons++; - mop = netbk_get_requests(netif, skb, mop); + mop = netbk_get_requests(netif, skb, txfrags, mop); netif->tx.req_cons = i; netif_schedule_work(netif); @@ -1260,8 +1304,8 @@ static void net_tx_action(unsigned long } data_len = skb->len; - memcpy(skb->data, - (void *)(MMAP_VADDR(pending_idx)|txp->offset), + memcpy(skb->data, + (void *)(idx_to_kaddr(pending_idx)|txp->offset), data_len); if (data_len < txp->size) { /* Append the packet payload as a fragment. */ @@ -1315,18 +1359,10 @@ static void netif_idx_release(u16 pendin static void netif_page_release(struct page *page) { - u16 pending_idx = page - virt_to_page(mmap_vstart); - /* Ready for next use. */ set_page_count(page, 1); - netif_idx_release(pending_idx); -} - -static void netif_rx_page_release(struct page *page) -{ - /* Ready for next use. */ - set_page_count(page, 1); + netif_idx_release(page->index); } irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) @@ -1336,7 +1372,7 @@ irqreturn_t netif_be_int(int irq, void * add_to_net_schedule_list_tail(netif); maybe_schedule_tx_action(); - if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif)) + if (netif_schedulable(netif->dev) && !netbk_queue_full(netif)) netif_wake_queue(netif->dev); return IRQ_HANDLED; @@ -1446,27 +1482,17 @@ static int __init netback_init(void) init_timer(&net_timer); net_timer.data = 0; net_timer.function = net_alarm; - - page = balloon_alloc_empty_page_range(MAX_PENDING_REQS); - if (page == NULL) + + mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); + if (mmap_pages == NULL) { + printk("%s: out of memory\n", __FUNCTION__); return -ENOMEM; - - mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + } for (i = 0; i < MAX_PENDING_REQS; i++) { - page = virt_to_page(MMAP_VADDR(i)); - set_page_count(page, 1); + page = mmap_pages[i]; SetPageForeign(page, netif_page_release); - } - - page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE); - BUG_ON(page == NULL); - rx_mmap_area = pfn_to_kaddr(page_to_pfn(page)); - - for (i = 0; i < NET_RX_RING_SIZE; i++) { - page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE)); - set_page_count(page, 1); - SetPageForeign(page, netif_rx_page_release); + page->index = i; } pending_cons = 0; diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Wed Nov 29 14:16:36 2006 -0600 @@ -28,29 +28,20 @@ printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) #endif -struct backend_info -{ +struct backend_info { struct xenbus_device *dev; netif_t *netif; - struct xenbus_watch backend_watch; enum xenbus_state frontend_state; }; static int connect_rings(struct backend_info *); static void connect(struct backend_info *); -static void maybe_connect(struct backend_info *); -static void backend_changed(struct xenbus_watch *, const char **, - unsigned int); +static void backend_create_netif(struct backend_info *be); static int netback_remove(struct xenbus_device *dev) { struct backend_info *be = dev->dev.driver_data; - if (be->backend_watch.node) { - unregister_xenbus_watch(&be->backend_watch); - kfree(be->backend_watch.node); - be->backend_watch.node = NULL; - } if (be->netif) { netif_disconnect(be->netif); be->netif = NULL; @@ -63,8 +54,7 @@ static int netback_remove(struct xenbus_ /** * Entry point to this code when a new device is created. Allocate the basic - * structures, and watch the store waiting for the hotplug scripts to tell us - * the device's handle. Switch to InitWait. + * structures and switch to InitWait. */ static int netback_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) @@ -83,11 +73,6 @@ static int netback_probe(struct xenbus_d be->dev = dev; dev->dev.driver_data = be; - err = xenbus_watch_path2(dev, dev->nodename, "handle", - &be->backend_watch, backend_changed); - if (err) - goto fail; - do { err = xenbus_transaction_start(&xbt); if (err) { @@ -108,9 +93,22 @@ static int netback_probe(struct xenbus_d goto abort_transaction; } - err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1); - if (err) { - message = "writing feature-copying"; + /* We support rx-copy path. */ + err = xenbus_printf(xbt, dev->nodename, + "feature-rx-copy", "%d", 1); + if (err) { + message = "writing feature-rx-copy"; + goto abort_transaction; + } + + /* + * We don't support rx-flip path (except old guests who don't + * grok this feature flag). + */ + err = xenbus_printf(xbt, dev->nodename, + "feature-rx-flip", "%d", 0); + if (err) { + message = "writing feature-rx-flip"; goto abort_transaction; } @@ -123,9 +121,11 @@ static int netback_probe(struct xenbus_d } err = xenbus_switch_state(dev, XenbusStateInitWait); - if (err) { + if (err) goto fail; - } + + /* This kicks hotplug scripts, so do it immediately. */ + backend_create_netif(be); return 0; @@ -175,48 +175,30 @@ static int netback_uevent(struct xenbus_ } -/** - * Callback received when the hotplug scripts have placed the handle node. - * Read it, and create a netif structure. If the frontend is ready, connect. - */ -static void backend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) +static void backend_create_netif(struct backend_info *be) { int err; long handle; - struct backend_info *be - = container_of(watch, struct backend_info, backend_watch); struct xenbus_device *dev = be->dev; - DPRINTK(""); + if (be->netif != NULL) + return; err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle); - if (XENBUS_EXIST_ERR(err)) { - /* Since this watch will fire once immediately after it is - registered, we expect this. Ignore it, and wait for the - hotplug scripts. */ - return; - } if (err != 1) { xenbus_dev_fatal(dev, err, "reading handle"); return; } - if (be->netif == NULL) { - u8 be_mac[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 }; - - be->netif = netif_alloc(dev->otherend_id, handle, be_mac); - if (IS_ERR(be->netif)) { - err = PTR_ERR(be->netif); - be->netif = NULL; - xenbus_dev_fatal(dev, err, "creating interface"); - return; - } - - kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); - - maybe_connect(be); - } + be->netif = netif_alloc(dev->otherend_id, handle); + if (IS_ERR(be->netif)) { + err = PTR_ERR(be->netif); + be->netif = NULL; + xenbus_dev_fatal(dev, err, "creating interface"); + return; + } + + kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); } @@ -249,11 +231,9 @@ static void frontend_changed(struct xenb break; case XenbusStateConnected: - if (!be->netif) { - /* reconnect: setup be->netif */ - backend_changed(&be->backend_watch, NULL, 0); - } - maybe_connect(be); + backend_create_netif(be); + if (be->netif) + connect(be); break; case XenbusStateClosing: @@ -279,15 +259,6 @@ static void frontend_changed(struct xenb } -/* ** Connection ** */ - - -static void maybe_connect(struct backend_info *be) -{ - if (be->netif && (be->frontend_state == XenbusStateConnected)) - connect(be); -} - static void xen_net_read_rate(struct xenbus_device *dev, unsigned long *bytes, unsigned long *usec) { @@ -366,6 +337,10 @@ static void connect(struct backend_info be->netif->remaining_credit = be->netif->credit_bytes; xenbus_switch_state(dev, XenbusStateConnected); + + /* May not get a kick from the frontend, so start the tx_queue now. */ + if (!netbk_can_queue(be->netif->dev)) + netif_wake_queue(be->netif->dev); } @@ -403,14 +378,16 @@ static int connect_rings(struct backend_ } be->netif->copying_receiver = !!rx_copy; - if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d", - &val) < 0) - val = 0; - if (val) - be->netif->can_queue = 1; - else - /* Must be non-zero for pfifo_fast to work. */ - be->netif->dev->tx_queue_len = 1; + if (be->netif->dev->tx_queue_len != 0) { + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-rx-notify", "%d", &val) < 0) + val = 0; + if (val) + be->netif->can_queue = 1; + else + /* Must be non-zero for pfifo_fast to work. */ + be->netif->dev->tx_queue_len = 1; + } if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0) val = 0; diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Wed Nov 29 14:16:36 2006 -0600 @@ -47,6 +47,7 @@ #include <linux/in.h> #include <linux/if_ether.h> #include <linux/io.h> +#include <linux/moduleparam.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/arp.h> @@ -63,20 +64,76 @@ #include <xen/interface/grant_table.h> #include <xen/gnttab.h> +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif + +/* + * Mutually-exclusive module options to select receive data path: + * rx_copy : Packets are copied by network backend into local memory + * rx_flip : Page containing packet data is transferred to our ownership + * For fully-virtualised guests there is no option - copying must be used. + * For paravirtualised guests, flipping is the default. + */ +#ifdef CONFIG_XEN +static int MODPARM_rx_copy = 0; +module_param_named(rx_copy, MODPARM_rx_copy, bool, 0); +MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)"); +static int MODPARM_rx_flip = 0; +module_param_named(rx_flip, MODPARM_rx_flip, bool, 0); +MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)"); +#else +static const int MODPARM_rx_copy = 1; +static const int MODPARM_rx_flip = 0; +#endif + #define RX_COPY_THRESHOLD 256 /* If we don't have GSO, fake things up so that we never try to use it. */ -#ifndef NETIF_F_GSO -#define netif_needs_gso(dev, skb) 0 -#define dev_disable_gso_features(dev) ((void)0) -#else +#if defined(NETIF_F_GSO) #define HAVE_GSO 1 +#define HAVE_TSO 1 /* TSO is a subset of GSO */ static inline void dev_disable_gso_features(struct net_device *dev) { /* Turn off all GSO bits except ROBUST. */ dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; dev->features |= NETIF_F_GSO_ROBUST; } +#elif defined(NETIF_F_TSO) +#define HAVE_TSO 1 + +/* Some older kernels cannot cope with incorrect checksums, + * particularly in netfilter. I'm not sure there is 100% correlation + * with the presence of NETIF_F_TSO but it appears to be a good first + * approximiation. + */ +#define HAVE_NO_CSUM_OFFLOAD 1 + +#define gso_size tso_size +#define gso_segs tso_segs +static inline void dev_disable_gso_features(struct net_device *dev) +{ + /* Turn off all TSO bits. */ + dev->features &= ~NETIF_F_TSO; +} +static inline int skb_is_gso(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->tso_size; +} +static inline int skb_gso_ok(struct sk_buff *skb, int features) +{ + return (features & NETIF_F_TSO); +} + +static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) +{ + return skb_is_gso(skb) && + (!skb_gso_ok(skb, dev->features) || + unlikely(skb->ip_summed != CHECKSUM_HW)); +} +#else +#define netif_needs_gso(dev, skb) 0 +#define dev_disable_gso_features(dev) ((void)0) #endif #define GRANT_INVALID_REF 0 @@ -96,7 +153,6 @@ struct netfront_info { spinlock_t tx_lock; spinlock_t rx_lock; - unsigned int handle; unsigned int evtchn, irq; unsigned int copying_receiver; @@ -120,7 +176,7 @@ struct netfront_info { grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; grant_ref_t gref_rx_head; - grant_ref_t grant_rx_ref[NET_TX_RING_SIZE]; + grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; struct xenbus_device *xbdev; int tx_ring_ref; @@ -185,9 +241,8 @@ static inline grant_ref_t xennet_get_rx_ #define WPRINTK(fmt, args...) \ printk(KERN_WARNING "netfront: " fmt, ##args) -static int talk_to_backend(struct xenbus_device *, struct netfront_info *); static int setup_device(struct xenbus_device *, struct netfront_info *); -static struct net_device *create_netdev(int, int, struct xenbus_device *); +static struct net_device *create_netdev(struct xenbus_device *); static void netfront_closing(struct xenbus_device *); @@ -195,9 +250,8 @@ static void netif_disconnect_backend(str static void netif_disconnect_backend(struct netfront_info *); static int open_netdev(struct netfront_info *); static void close_netdev(struct netfront_info *); -static void netif_free(struct netfront_info *); - -static void network_connect(struct net_device *); + +static int network_connect(struct net_device *); static void network_tx_buf_gc(struct net_device *); static void network_alloc_rx_buffers(struct net_device *); static int send_fake_arp(struct net_device *); @@ -220,8 +274,7 @@ static inline int xennet_can_sg(struct n /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and - * inform the backend of the appropriate details for those. Switch to - * Connected state. + * inform the backend of the appropriate details for those. */ static int __devinit netfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) @@ -229,31 +282,8 @@ static int __devinit netfront_probe(stru int err; struct net_device *netdev; struct netfront_info *info; - unsigned int handle; - unsigned feature_rx_copy; - - err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle); - if (err != 1) { - xenbus_dev_fatal(dev, err, "reading handle"); - return err; - } - -#ifndef CONFIG_XEN - err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u", - &feature_rx_copy); - if (err != 1) { - xenbus_dev_fatal(dev, err, "reading feature-rx-copy"); - return err; - } - if (!feature_rx_copy) { - xenbus_dev_fatal(dev, 0, "need a copy-capable backend"); - return -EINVAL; - } -#else - feature_rx_copy = 0; -#endif - - netdev = create_netdev(handle, feature_rx_copy, dev); + + netdev = create_netdev(dev); if (IS_ERR(netdev)) { err = PTR_ERR(netdev); xenbus_dev_fatal(dev, err, "creating netdev"); @@ -263,20 +293,13 @@ static int __devinit netfront_probe(stru info = netdev_priv(netdev); dev->dev.driver_data = info; - err = talk_to_backend(dev, info); - if (err) - goto fail_backend; - err = open_netdev(info); if (err) - goto fail_open; + goto fail; return 0; - fail_open: - xennet_sysfs_delif(info->netdev); - unregister_netdev(netdev); - fail_backend: + fail: free_netdev(netdev); dev->dev.driver_data = NULL; return err; @@ -296,7 +319,7 @@ static int netfront_resume(struct xenbus DPRINTK("%s\n", dev->nodename); netif_disconnect_backend(info); - return talk_to_backend(dev, info); + return 0; } static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) @@ -379,13 +402,21 @@ again: goto abort_transaction; } +#ifdef HAVE_NO_CSUM_OFFLOAD + err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload", "%d", 1); + if (err) { + message = "writing feature-no-csum-offload"; + goto abort_transaction; + } +#endif + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); if (err) { message = "writing feature-sg"; goto abort_transaction; } -#ifdef HAVE_GSO +#ifdef HAVE_TSO err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1); if (err) { message = "writing feature-gso-tcpv4"; @@ -407,11 +438,10 @@ again: xenbus_transaction_end(xbt, 1); xenbus_dev_fatal(dev, err, "%s", message); destroy_ring: - netif_free(info); + netif_disconnect_backend(info); out: return err; } - static int setup_device(struct xenbus_device *dev, struct netfront_info *info) { @@ -472,10 +502,8 @@ static int setup_device(struct xenbus_de return 0; fail: - netif_free(info); return err; } - /** * Callback received when the backend's state changes. @@ -497,7 +525,8 @@ static void backend_changed(struct xenbu break; case XenbusStateInitWait: - network_connect(netdev); + if (network_connect(netdev) != 0) + break; xenbus_switch_state(dev, XenbusStateConnected); (void)send_fake_arp(netdev); break; @@ -507,7 +536,6 @@ static void backend_changed(struct xenbu break; } } - /** Send a packet on a net device to encourage switches to learn the * MAC. We send a fake ARP request. @@ -536,7 +564,6 @@ static int send_fake_arp(struct net_devi return dev_queue_xmit(skb); } - static int network_open(struct net_device *dev) { @@ -629,13 +656,11 @@ static void network_tx_buf_gc(struct net network_maybe_wake_tx(dev); } - static void rx_refill_timeout(unsigned long data) { struct net_device *dev = (struct net_device *)data; netif_rx_schedule(dev); } - static void network_alloc_rx_buffers(struct net_device *dev) { @@ -669,7 +694,7 @@ static void network_alloc_rx_buffers(str * necessary here. * 16 bytes added as necessary headroom for netif_receive_skb. */ - skb = alloc_skb(RX_COPY_THRESHOLD + 16, + skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto no_skb; @@ -687,7 +712,7 @@ no_skb: break; } - skb_reserve(skb, 16); /* mimic dev_alloc_skb() */ + skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */ skb_shinfo(skb)->frags[0].page = page; skb_shinfo(skb)->nr_frags = 1; __skb_queue_tail(&np->rx_batch, skb); @@ -742,7 +767,7 @@ no_skb: } else { gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, - pfn, + pfn_to_mfn(pfn), 0); } @@ -917,7 +942,7 @@ static int network_start_xmit(struct sk_ tx->flags |= NETTXF_data_validated; #endif -#ifdef HAVE_GSO +#ifdef HAVE_TSO if (skb_shinfo(skb)->gso_size) { struct netif_extra_info *gso = (struct netif_extra_info *) RING_GET_REQUEST(&np->tx, ++i); @@ -1071,6 +1096,7 @@ static int xennet_get_responses(struct n if (net_ratelimit()) WPRINTK("rx->offset: %x, size: %u\n", rx->offset, rx->status); + xennet_move_rx_slot(np, skb, ref); err = -EINVAL; goto next; } @@ -1081,7 +1107,8 @@ static int xennet_get_responses(struct n * situation to the system controller to reboot the backed. */ if (ref == GRANT_INVALID_REF) { - WPRINTK("Bad rx response id %d.\n", rx->id); + if (net_ratelimit()) + WPRINTK("Bad rx response id %d.\n", rx->id); err = -EINVAL; goto next; } @@ -1153,6 +1180,9 @@ next: err = -E2BIG; } + if (unlikely(err)) + np->rx.rsp_cons = cons + frags; + *pages_flipped_p = pages_flipped; return err; @@ -1205,12 +1235,14 @@ static int xennet_set_skb_gso(struct sk_ return -EINVAL; } +#ifdef HAVE_TSO + skb_shinfo(skb)->gso_size = gso->u.gso.size; #ifdef HAVE_GSO - skb_shinfo(skb)->gso_size = gso->u.gso.size; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; +#endif skb_shinfo(skb)->gso_segs = 0; return 0; @@ -1255,9 +1287,9 @@ static int netif_poll(struct net_device rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ - for (i = np->rx.rsp_cons, work_done = 0; - (i != rp) && (work_done < budget); - np->rx.rsp_cons = ++i, work_done++) { + i = np->rx.rsp_cons; + work_done = 0; + while ((i != rp) && (work_done < budget)) { memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); memset(extras, 0, sizeof(extras)); @@ -1265,12 +1297,11 @@ static int netif_poll(struct net_device &pages_flipped); if (unlikely(err)) { -err: - i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1; - work_done--; +err: while ((skb = __skb_dequeue(&tmpq))) __skb_queue_tail(&errq, skb); np->stats.rx_errors++; + i = np->rx.rsp_cons; continue; } @@ -1282,6 +1313,7 @@ err: if (unlikely(xennet_set_skb_gso(skb, gso))) { __skb_queue_head(&tmpq, skb); + np->rx.rsp_cons += skb_queue_len(&tmpq); goto err; } } @@ -1345,6 +1377,9 @@ err: np->stats.rx_bytes += skb->len; __skb_queue_tail(&rxq, skb); + + np->rx.rsp_cons = ++i; + work_done++; } if (pages_flipped) { @@ -1561,7 +1596,7 @@ static int xennet_set_sg(struct net_devi static int xennet_set_tso(struct net_device *dev, u32 data) { -#ifdef HAVE_GSO +#ifdef HAVE_TSO if (data) { struct netfront_info *np = netdev_priv(dev); int val; @@ -1588,19 +1623,52 @@ static void xennet_set_features(struct n if (!(dev->features & NETIF_F_IP_CSUM)) return; - if (!xennet_set_sg(dev, 1)) - xennet_set_tso(dev, 1); -} - -static void network_connect(struct net_device *dev) + if (xennet_set_sg(dev, 1)) + return; + + /* Before 2.6.9 TSO seems to be unreliable so do not enable it + * on older kernels. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) + xennet_set_tso(dev, 1); +#endif + +} + +static int network_connect(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); - int i, requeue_idx; + int i, requeue_idx, err; struct sk_buff *skb; grant_ref_t ref; netif_rx_request_t *req; + unsigned int feature_rx_copy, feature_rx_flip; + + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-rx-copy", "%u", &feature_rx_copy); + if (err != 1) + feature_rx_copy = 0; + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-rx-flip", "%u", &feature_rx_flip); + if (err != 1) + feature_rx_flip = 1; + + /* + * Copy packets on receive path if: + * (a) This was requested by user, and the backend supports it; or + * (b) Flipping was requested, but this is unsupported by the backend. + */ + np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) || + (MODPARM_rx_flip && !feature_rx_flip)); + + err = talk_to_backend(np->xbdev, np); + if (err) + return err; xennet_set_features(dev); + + IPRINTK("device %s has %sing receive path.\n", + dev->name, np->copying_receiver ? "copy" : "flipp"); spin_lock_irq(&np->tx_lock); spin_lock(&np->rx_lock); @@ -1632,7 +1700,8 @@ static void network_connect(struct net_d } else { gnttab_grant_foreign_access_ref( ref, np->xbdev->otherend_id, - page_to_pfn(skb_shinfo(skb)->frags->page), + pfn_to_mfn(page_to_pfn(skb_shinfo(skb)-> + frags->page)), 0); } req->gref = ref; @@ -1656,6 +1725,8 @@ static void network_connect(struct net_d spin_unlock(&np->rx_lock); spin_unlock_irq(&np->tx_lock); + + return 0; } static void netif_uninit(struct net_device *dev) @@ -1821,8 +1892,7 @@ static void network_set_multicast_list(s { } -static struct net_device * __devinit -create_netdev(int handle, int copying_receiver, struct xenbus_device *dev) +static struct net_device * __devinit create_netdev(struct xenbus_device *dev) { int i, err = 0; struct net_device *netdev = NULL; @@ -1836,9 +1906,7 @@ create_netdev(int handle, int copying_re } np = netdev_priv(netdev); - np->handle = handle; np->xbdev = dev; - np->copying_receiver = copying_receiver; netif_carrier_off(netdev); @@ -1969,10 +2037,12 @@ static int open_netdev(struct netfront_i err = xennet_sysfs_addif(info->netdev); if (err) { - /* This can be non-fatal: it only means no tuning parameters */ + unregister_netdev(info->netdev); printk(KERN_WARNING "%s: add sysfs failed err=%d\n", __FUNCTION__, err); - } + return err; + } + return 0; } @@ -2004,14 +2074,6 @@ static void netif_disconnect_backend(str info->rx_ring_ref = GRANT_INVALID_REF; info->tx.sring = NULL; info->rx.sring = NULL; -} - - -static void netif_free(struct netfront_info *info) -{ - close_netdev(info); - netif_disconnect_backend(info); - free_netdev(info->netdev); } @@ -2053,6 +2115,16 @@ static int __init netif_init(void) if (!is_running_on_xen()) return -ENODEV; +#ifdef CONFIG_XEN + if (MODPARM_rx_flip && MODPARM_rx_copy) { + WPRINTK("Cannot specify both rx_copy and rx_flip.\n"); + return -EINVAL; + } + + if (!MODPARM_rx_flip && !MODPARM_rx_copy) + MODPARM_rx_flip = 1; /* Default is to flip. */ +#endif + if (is_initial_xendomain()) return 0; @@ -2067,6 +2139,9 @@ module_init(netif_init); static void __exit netif_exit(void) { + if (is_initial_xendomain()) + return; + unregister_inetaddr_notifier(¬ifier_inetdev); return xenbus_unregister_driver(&netfront); diff -r ee4397571e44 -r 223470316756 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Wed Aug 09 15:38:37 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Wed Nov 29 14:16:36 2006 -0600 @@ -35,6 +35,10 @@ static struct proc_dir_entry *privcmd_in static struct proc_dir_entry *privcmd_intf; static struct proc_dir_entry *capabilities_intf; +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); +#endif + static int privcmd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long data) { @@ -49,6 +53,8 @@ static int privcmd_ioctl(struct inode *i return -EFAULT; #if defined(__i386__) + if (hypercall.op >= (PAGE_SIZE >> 5)) + break; __asm__ __volatile__ ( "pushl %%ebx; pushl %%ecx; pushl %%edx; " "pushl %%esi; pushl %%edi; " @@ -65,45 +71,36 @@ static int privcmd_ioctl(struct inode *i "popl %%ecx; popl %%ebx" : "=a" (ret) : "0" (&hypercall) : "memory" ); #elif defined (__x86_64__) - { + if (hypercall.op < (PAGE_SIZE >> 5)) { long ign1, ign2, ign3; __asm__ __volatile__ ( "movq %8,%%r10; movq %9,%%r8;" - "shlq $5,%%rax ;" + "shll $5,%%eax ;" "addq $hypercall_page,%%rax ;" "call *%%rax" : "=a" (ret), "=D" (ign1), "=S" (ign2), "=d" (ign3) - : "0" ((unsigned long)hypercall.op), - "1" ((unsigned long)hypercall.arg[0]), - "2" ((unsigned long)hypercall.arg[1]), - "3" ((unsigned long)hypercall.arg[2]), - "g" ((unsigned long)hypercall.arg[3]), - "g" ((unsigned long)hypercall.arg[4]) + : "0" ((unsigned int)hypercall.op), + "1" (hypercall.arg[0]), + "2" (hypercall.arg[1]), + "3" (hypercall.arg[2]), + "g" (hypercall.arg[3]), + "g" (hypercall.arg[4]) : "r8", "r10", "memory" ); } #elif defined (__ia64__) - __asm__ __volatile__ ( - ";; mov r14=%2; mov r15=%3; " - "mov r16=%4; mov r17=%5; mov r18=%6;" - "mov r2=%1; break 0x1000;; mov %0=r8 ;;" - : "=r" (ret) - : "r" (hypercall.op), - "r" (hypercall.arg[0]), _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |