[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] merge
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID 6c43118bdba8d4316ddfffbea7a53f0b2f128ab3 # Parent d940ec92958d62d3c03a92cecb2aa13fb9be5d0d # Parent 0807931dfa54a79664b8ff4ec942036dfcbdcd19 merge diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Fri Feb 24 21:03:07 2006 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Fri Feb 24 22:41:08 2006 @@ -113,10 +113,10 @@ # CONFIG_IA64_PAGE_SIZE_64KB is not set CONFIG_PGTABLE_3=y # CONFIG_PGTABLE_4 is not set -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 CONFIG_IA64_L1_CACHE_SHIFT=7 CONFIG_IA64_CYCLONE=y CONFIG_IOSAPIC=y diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Fri Feb 24 21:03:07 2006 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Fri Feb 24 22:41:08 2006 @@ -175,10 +175,10 @@ CONFIG_MTRR=y # CONFIG_REGPARM is not set CONFIG_SECCOMP=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Fri Feb 24 21:03:07 2006 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Fri Feb 24 22:41:08 2006 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.16-rc3-xen0 -# Thu Feb 16 22:55:30 2006 +# Mon Feb 20 11:37:43 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -126,10 +126,10 @@ # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 CONFIG_SECCOMP=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ISA_DMA_API=y @@ -160,6 +160,11 @@ CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y # CONFIG_ACPI_CONTAINER is not set + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set # # Bus options (PCI etc.) diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xenU_ia64 --- a/buildconfigs/linux-defconfig_xenU_ia64 Fri Feb 24 21:03:07 2006 +++ b/buildconfigs/linux-defconfig_xenU_ia64 Fri Feb 24 22:41:08 2006 @@ -110,10 +110,10 @@ # CONFIG_IA64_PAGE_SIZE_64KB is not set CONFIG_PGTABLE_3=y # CONFIG_PGTABLE_4 is not set -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 CONFIG_IA64_L1_CACHE_SHIFT=7 # CONFIG_IA64_CYCLONE is not set CONFIG_IOSAPIC=y diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xenU_x86_32 --- a/buildconfigs/linux-defconfig_xenU_x86_32 Fri Feb 24 21:03:07 2006 +++ b/buildconfigs/linux-defconfig_xenU_x86_32 Fri Feb 24 22:41:08 2006 @@ -174,10 +174,10 @@ CONFIG_SPLIT_PTLOCK_CPUS=4096 # CONFIG_REGPARM is not set CONFIG_SECCOMP=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 CONFIG_HOTPLUG_CPU=y diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xenU_x86_64 --- a/buildconfigs/linux-defconfig_xenU_x86_64 Fri Feb 24 21:03:07 2006 +++ b/buildconfigs/linux-defconfig_xenU_x86_64 Fri Feb 24 22:41:08 2006 @@ -131,10 +131,10 @@ # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 CONFIG_SECCOMP=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ISA_DMA_API=y diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Fri Feb 24 21:03:07 2006 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Fri Feb 24 22:41:08 2006 @@ -180,10 +180,10 @@ CONFIG_MTRR=y # CONFIG_REGPARM is not set CONFIG_SECCOMP=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 CONFIG_HOTPLUG_CPU=y diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Fri Feb 24 21:03:07 2006 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Fri Feb 24 22:41:08 2006 @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc3-xen0 -# Thu Feb 16 22:56:25 2006 +# Linux kernel version: 2.6.16-rc3-xen +# Mon Feb 20 11:37:11 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -132,10 +132,10 @@ # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 CONFIG_SECCOMP=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ISA_DMA_API=y @@ -168,6 +168,11 @@ CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_ACPI_CONTAINER=m + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set # # Bus options (PCI etc.) diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Fri Feb 24 21:03:07 2006 +++ b/buildconfigs/mk.linux-2.6-xen Fri Feb 24 22:41:08 2006 @@ -2,8 +2,8 @@ OS = linux LINUX_SERIES = 2.6 -LINUX_VER = 2.6.16-rc3 -LINUX_SRCS = linux-2.6.15.tar.bz2 patch-2.6.16-rc3.bz2 +LINUX_VER = 2.6.16-rc4 +LINUX_SRCS = linux-2.6.15.tar.bz2 patch-2.6.16-rc4.bz2 LINUX_PDIR = linux-$(LINUX_VER) EXTRAVERSION ?= xen @@ -34,7 +34,7 @@ touch $(@D)/.hgskip touch $@ -pristine-linux-%.16-rc3/.valid-pristine: pristine-$(LINUX_PDIR)/.valid-srcs +pristine-linux-%.16-rc4/.valid-pristine: pristine-$(LINUX_PDIR)/.valid-srcs touch $@ # update timestamp to avoid rebuild $(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/include/hypervisor.h --- a/extras/mini-os/include/hypervisor.h Fri Feb 24 21:03:07 2006 +++ b/extras/mini-os/include/hypervisor.h Fri Feb 24 22:41:08 2006 @@ -3,10 +3,10 @@ * * Hypervisor handling. * - * TODO - x86_64 broken! * * Copyright (c) 2002, K A Fraser * Copyright (c) 2005, Grzegorz Milos + * Updates: Aravindh Puthiyaparambil <aravindh.puthiyaparambil@xxxxxxxxxx> */ #ifndef _HYPERVISOR_H_ @@ -15,6 +15,13 @@ #include <types.h> #include <xen/xen.h> #include <xen/dom0_ops.h> +#if defined(__i386__) +#include <hypercall-x86_32.h> +#elif defined(__x86_64__) +#include <hypercall-x86_64.h> +#else +#error "Unsupported architecture" +#endif /* * a placeholder for the start of day information passed up from the hypervisor @@ -27,503 +34,10 @@ extern union start_info_union start_info_union; #define start_info (start_info_union.start_info) - /* hypervisor.c */ //void do_hypervisor_callback(struct pt_regs *regs); void mask_evtchn(u32 port); void unmask_evtchn(u32 port); void clear_evtchn(u32 port); -/* - * Assembler stubs for hyper-calls. - */ -#if defined(__i386__) -/* Taken from Linux */ - -#ifndef __HYPERCALL_H__ -#define __HYPERCALL_H__ - -#include <xen/sched.h> - -#define _hypercall0(type, name) \ -({ \ - long __res; \ - asm volatile ( \ - TRAP_INSTR \ - : "=a" (__res) \ - : "0" (__HYPERVISOR_##name) \ - : "memory" ); \ - (type)__res; \ -}) - -#define _hypercall1(type, name, a1) \ -({ \ - long __res, __ign1; \ - asm volatile ( \ - TRAP_INSTR \ - : "=a" (__res), "=b" (__ign1) \ - : "0" (__HYPERVISOR_##name), "1" ((long)(a1)) \ - : "memory" ); \ - (type)__res; \ -}) - -#define _hypercall2(type, name, a1, a2) \ -({ \ - long __res, __ign1, __ign2; \ - asm volatile ( \ - TRAP_INSTR \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ - : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ - "2" ((long)(a2)) \ - : "memory" ); \ - (type)__res; \ -}) - -#define _hypercall3(type, name, a1, a2, a3) \ -({ \ - long __res, __ign1, __ign2, __ign3; \ - asm volatile ( \ - TRAP_INSTR \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3) \ - : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ - "2" ((long)(a2)), "3" ((long)(a3)) \ - : "memory" ); \ - (type)__res; \ -}) - -#define _hypercall4(type, name, a1, a2, a3, a4) \ -({ \ - long __res, __ign1, __ign2, __ign3, __ign4; \ - asm volatile ( \ - TRAP_INSTR \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4) \ - : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ - "2" ((long)(a2)), "3" ((long)(a3)), \ - "4" ((long)(a4)) \ - : "memory" ); \ - (type)__res; \ -}) - -#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ -({ \ - long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ - asm volatile ( \ - TRAP_INSTR \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ - : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ - "2" ((long)(a2)), "3" ((long)(a3)), \ - "4" ((long)(a4)), "5" ((long)(a5)) \ - : "memory" ); \ - (type)__res; \ -}) - -static inline int -HYPERVISOR_set_trap_table( - trap_info_t *table) -{ - return _hypercall1(int, set_trap_table, table); -} - -static inline int -HYPERVISOR_mmu_update( - mmu_update_t *req, int count, int *success_count, domid_t domid) -{ - return _hypercall4(int, mmu_update, req, count, success_count, domid); -} - -static inline int -HYPERVISOR_mmuext_op( - struct mmuext_op *op, int count, int *success_count, domid_t domid) -{ - return _hypercall4(int, mmuext_op, op, count, success_count, domid); -} - -static inline int -HYPERVISOR_set_gdt( - unsigned long *frame_list, int entries) -{ - return _hypercall2(int, set_gdt, frame_list, entries); -} - -static inline int -HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - return _hypercall2(int, stack_switch, ss, esp); -} - -static inline int -HYPERVISOR_set_callbacks( - unsigned long event_selector, unsigned long event_address, - unsigned long failsafe_selector, unsigned long failsafe_address) -{ - return _hypercall4(int, set_callbacks, - event_selector, event_address, - failsafe_selector, failsafe_address); -} - -static inline int -HYPERVISOR_fpu_taskswitch( - int set) -{ - return _hypercall1(int, fpu_taskswitch, set); -} - -static inline int -HYPERVISOR_sched_op( - int cmd, unsigned long arg) -{ - return _hypercall2(int, sched_op, cmd, arg); -} - -static inline long -HYPERVISOR_set_timer_op( - u64 timeout) -{ - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); -} - -static inline int -HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - return _hypercall1(int, dom0_op, dom0_op); -} - -static inline int -HYPERVISOR_set_debugreg( - int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static inline unsigned long -HYPERVISOR_get_debugreg( - int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); -} - -static inline int -HYPERVISOR_update_descriptor( - u64 ma, u64 desc) -{ - return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); -} - -static inline int -HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - return _hypercall2(int, memory_op, cmd, arg); -} - -static inline int -HYPERVISOR_multicall( - void *call_list, int nr_calls) -{ - return _hypercall2(int, multicall, call_list, nr_calls); -} - -static inline int -HYPERVISOR_update_va_mapping( - unsigned long va, pte_t new_val, unsigned long flags) -{ - unsigned long pte_hi = 0; -#ifdef CONFIG_X86_PAE - pte_hi = new_val.pte_high; -#endif - return _hypercall4(int, update_va_mapping, va, - new_val.pte_low, pte_hi, flags); -} - -static inline int -HYPERVISOR_event_channel_op( - void *op) -{ - return _hypercall1(int, event_channel_op, op); -} - -static inline int -HYPERVISOR_xen_version( - int cmd, void *arg) -{ - return _hypercall2(int, xen_version, cmd, arg); -} - -static inline int -HYPERVISOR_console_io( - int cmd, int count, char *str) -{ - return _hypercall3(int, console_io, cmd, count, str); -} - -static inline int -HYPERVISOR_physdev_op( - void *physdev_op) -{ - return _hypercall1(int, physdev_op, physdev_op); -} - -static inline int -HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - return _hypercall3(int, grant_table_op, cmd, uop, count); -} - -static inline int -HYPERVISOR_update_va_mapping_otherdomain( - unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) -{ - unsigned long pte_hi = 0; -#ifdef CONFIG_X86_PAE - pte_hi = new_val.pte_high; -#endif - return _hypercall5(int, update_va_mapping_otherdomain, va, - new_val.pte_low, pte_hi, flags, domid); -} - -static inline int -HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - return _hypercall2(int, vm_assist, cmd, type); -} - -static inline int -HYPERVISOR_vcpu_op( - int cmd, int vcpuid, void *extra_args) -{ - return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); -} - -static inline int -HYPERVISOR_suspend( - unsigned long srec) -{ - return _hypercall3(int, sched_op, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); -} - -#endif /* __HYPERCALL_H__ */ -#elif defined(__x86_64__) - -#define __syscall_clobber "r11","rcx","memory" - -/* - * Assembler stubs for hyper-calls. - */ -static inline int -HYPERVISOR_set_trap_table( - trap_info_t *table) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_trap_table), "D" (table) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_mmu_update( - mmu_update_t *req, int count, int *success_count, domid_t domid) -{ - int ret; - - __asm__ __volatile__ ( - "movq %5, %%r10;" TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_mmu_update), "D" (req), "S" ((long)count), - "d" (success_count), "g" ((unsigned long)domid) - : __syscall_clobber, "r10" ); - - return ret; -} - -static inline int -HYPERVISOR_mmuext_op( - struct mmuext_op *op, int count, int *success_count, domid_t domid) -{ - int ret; - - __asm__ __volatile__ ( - "movq %5, %%r10;" TRAP_INSTR - : "=a" (ret) - : "0" (__HYPERVISOR_mmuext_op), "D" (op), "S" ((long)count), - "d" (success_count), "g" ((unsigned long)domid) - : __syscall_clobber, "r10" ); - - return ret; -} - -static inline int -HYPERVISOR_set_gdt( - unsigned long *frame_list, int entries) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_gdt), "D" (frame_list), "S" ((long)entries) - : __syscall_clobber ); - - - return ret; -} -static inline int -HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_stack_switch), "D" (ss), "S" (esp) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_set_callbacks( - unsigned long event_address, unsigned long failsafe_address, - unsigned long syscall_address) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_callbacks), "D" (event_address), - "S" (failsafe_address), "d" (syscall_address) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_fpu_taskswitch( - int set) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch), - "D" ((unsigned long) set) : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_yield( - void) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned long)SCHEDOP_yield) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_block( - void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned long)SCHEDOP_block) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_shutdown( - void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), - "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_reboot( - void) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), - "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_suspend( - unsigned long srec) -{ - int ret; - - /* NB. On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), - "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift))), - "S" (srec) - : __syscall_clobber ); - - return ret; -} - -/* - * We can have the timeout value in a single argument for the hypercall, but - * that will break the common code. - */ -static inline long -HYPERVISOR_set_timer_op( - u64 timeout) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_timer_op), - "D" (timeout) - : __syscall_clobber ); - - return ret; -} -#endif - #endif /* __HYPERVISOR_H__ */ diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/x86_32.S --- a/extras/mini-os/x86_32.S Fri Feb 24 21:03:07 2006 +++ b/extras/mini-os/x86_32.S Fri Feb 24 22:41:08 2006 @@ -1,16 +1,16 @@ #include <os.h> #include <xen/arch-x86_32.h> - .section __xen_guest .ascii "GUEST_OS=Mini-OS" .ascii ",XEN_VER=xen-3.0" + .ascii ",HYPERCALL_PAGE=0x2" .ascii ",LOADER=generic" .ascii ",PT_MODE_WRITABLE" .byte 0 .text -.globl _start, shared_info +.globl _start, shared_info, hypercall_page _start: cld @@ -26,7 +26,9 @@ .org 0x1000 shared_info: .org 0x2000 - + +hypercall_page: + .org 0x3000 ES = 0x20 ORIG_EAX = 0x24 diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/x86_64.S --- a/extras/mini-os/x86_64.S Fri Feb 24 21:03:07 2006 +++ b/extras/mini-os/x86_64.S Fri Feb 24 22:41:08 2006 @@ -1,11 +1,16 @@ #include <os.h> .section __xen_guest - .asciz "XEN_VER=3.0,LOADER=generic,PT_MODE_WRITABLE" + .ascii "GUEST_OS=Mini-OS" + .ascii ",XEN_VER=xen-3.0" + .ascii ",HYPERCALL_PAGE=0x2" + .ascii ",LOADER=generic" + .ascii ",PT_MODE_WRITABLE" + .byte 0 .text #define ENTRY(X) .globl X ; X : -.globl _start, shared_info +.globl _start, shared_info, hypercall_page #define SAVE_ALL \ cld; \ @@ -56,6 +61,9 @@ .org 0x1000 shared_info: .org 0x2000 + +hypercall_page: + .org 0x3000 ENTRY(hypervisor_callback) popq %rcx diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/i386/Kconfig Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Fri Feb 24 22:41:08 2006 @@ -780,7 +780,7 @@ config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EMBEDDED - depends on !XEN + depends on !X86_NO_TSS help This option allows trapping of rare doublefault exceptions that would otherwise cause a system to silently reboot. Disabling this @@ -1176,6 +1176,21 @@ depends on X86_SMP || (X86_VOYAGER && SMP) default y +config X86_NO_TSS + bool + depends on X86_XEN + default y + +config X86_SYSENTER + bool + depends on !X86_NO_TSS + default y + +config X86_NO_IDT + bool + depends on X86_XEN + default y + config KTIME_SCALAR bool default y diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile Fri Feb 24 22:41:08 2006 @@ -49,11 +49,13 @@ vsyscall_note := vsyscall-note.o endif +VSYSCALL_TYPES-y := int80 +VSYSCALL_TYPES-$(CONFIG_X86_SYSENTER) += sysenter # vsyscall.o contains the vsyscall DSO images as __initdata. # We must build both images before we can assemble it. # Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so -targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) +$(obj)/vsyscall.o: $(foreach F,$(VSYSCALL_TYPES-y),$(obj)/vsyscall-$F.so) +targets += $(foreach F,$(VSYSCALL_TYPES-y),vsyscall-$F.o vsyscall-$F.so) targets += $(vsyscall_note) vsyscall.lds # The DSO images are built using a special linker script. @@ -81,7 +83,8 @@ SYSCFLAGS_vsyscall-syms.o = -r $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ - $(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE + $(foreach F,$(VSYSCALL_TYPES-y),$(obj)/vsyscall-$F.o) \ + $(obj)/$(vsyscall_note) FORCE $(call if_changed,syscall) ifdef CONFIG_XEN diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c Fri Feb 24 22:41:08 2006 @@ -85,13 +85,6 @@ return 0xff; } -#ifdef CONFIG_XEN -void switch_APIC_timer_to_ipi(void *cpumask) { } -EXPORT_SYMBOL(switch_APIC_timer_to_ipi); -void switch_ipi_to_APIC_timer(void *cpumask) { } -EXPORT_SYMBOL(switch_ipi_to_APIC_timer); -#endif - #ifndef CONFIG_XEN #ifndef CONFIG_SMP static void up_apic_timer_interrupt_call(struct pt_regs *regs) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c Fri Feb 24 22:41:08 2006 @@ -583,7 +583,6 @@ } if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8)) BUG(); - lgdt_finish(); } /* @@ -595,7 +594,9 @@ void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); +#ifndef CONFIG_X86_NO_TSS struct tss_struct * t = &per_cpu(init_tss, cpu); +#endif struct thread_struct *thread = ¤t->thread; if (cpu_test_and_set(cpu, cpu_initialized)) { diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S --- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Fri Feb 24 22:41:08 2006 @@ -223,6 +223,7 @@ jmp need_resched #endif +#ifdef CONFIG_X86_SYSENTER /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -270,6 +271,7 @@ xorl %ebp,%ebp sti sysexit +#endif /* CONFIG_X86_SYSENTER */ # system call handler stub @@ -662,6 +664,7 @@ call math_state_restore jmp ret_from_exception +#ifdef CONFIG_X86_SYSENTER /* * Debug traps and NMI can happen at the one SYSENTER instruction * that sets up the real kernel stack. Check here, since we can't @@ -683,12 +686,15 @@ pushfl; \ pushl $__KERNEL_CS; \ pushl $sysenter_past_esp +#endif /* CONFIG_X86_SYSENTER */ KPROBE_ENTRY(debug) +#ifdef CONFIG_X86_SYSENTER cmpl $sysenter_entry,(%esp) jne debug_stack_correct FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) debug_stack_correct: +#endif /* !CONFIG_X86_SYSENTER */ pushl $-1 # mark this as an int SAVE_ALL xorl %edx,%edx # error code 0 diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Fri Feb 24 22:41:08 2006 @@ -1,23 +1,11 @@ -#include <linux/config.h> - -.section __xen_guest - .ascii "GUEST_OS=linux,GUEST_VER=2.6" - .ascii ",XEN_VER=xen-3.0" - .ascii ",VIRT_BASE=0xC0000000" - .ascii ",HYPERCALL_PAGE=0x104" /* __pa(hypercall_page) >> 12 */ -#ifdef CONFIG_X86_PAE - .ascii ",PAE=yes" -#else - .ascii ",PAE=no" -#endif - .ascii ",LOADER=generic" - .byte 0 .text +#include <linux/config.h> #include <linux/threads.h> #include <linux/linkage.h> #include <asm/segment.h> +#include <asm/page.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <xen/interface/arch-x86_32.h> @@ -37,15 +25,10 @@ ENTRY(startup_32) movl %esi,xen_start_info - -#if 0 -ENTRY(startup_32_smp) -#endif /* CONFIG_SMP */ - cld /* Set up the stack pointer */ - lss stack_start,%esp + movl $(init_thread_union+THREAD_SIZE),%esp checkCPUtype: @@ -69,66 +52,54 @@ movb %cl,X86_MASK movl %edx,X86_CAPABILITY + movb $1,X86_HARD_MATH + xorl %eax,%eax # Clear FS/GS and LDT movl %eax,%fs movl %eax,%gs cld # gcc2 wants the direction flag cleared at all times -#if 0 - movb ready, %cl - movb $1, ready - cmpb $0,%cl - je 1f # the first CPU calls start_kernel - # all other CPUs call initialize_secondary - call initialize_secondary - jmp L6 -1: -#endif /* CONFIG_SMP */ call start_kernel L6: jmp L6 # main should never return here, but # just in case, we know what happens. -ENTRY(lgdt_finish) - movl $(__KERNEL_DS),%eax # reload all the segment registers - movw %ax,%ss # after changing gdt. +#define HYPERCALL_PAGE_OFFSET 0x1000 +.org HYPERCALL_PAGE_OFFSET +ENTRY(hypercall_page) +.skip 0x1000 - movl $(__USER_DS),%eax # DS/ES contains default USER segment - movw %ax,%ds - movw %ax,%es +/* + * Real beginning of normal "text" segment + */ +ENTRY(stext) +ENTRY(_stext) - popl %eax # reload CS by intersegment return - pushl $(__KERNEL_CS) - pushl %eax - lret +/* + * BSS section + */ +.section ".bss.page_aligned","w" +ENTRY(empty_zero_page) + .fill 4096,1,0 -ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE - .long __BOOT_DS - -ready: .byte 0 - -.globl idt_descr -.globl cpu_gdt_descr +/* + * This starts the data section. + */ +.data ALIGN - .word 0 # 32-bit align idt_desc.address -idt_descr: - .word IDT_ENTRIES*8-1 # idt contains 256 entries - .long idt_table - -# boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address + .globl cpu_gdt_descr cpu_gdt_descr: .word GDT_SIZE .long cpu_gdt_table .fill NR_CPUS-1,8,0 # space for the other GDT descriptors -.org 0x1000 -ENTRY(empty_zero_page) - -.org 0x2000 +/* + * The Global Descriptor Table contains 28 quadwords, per-CPU. + */ + .align PAGE_SIZE_asm ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ @@ -143,17 +114,10 @@ .quad 0x0000000000000000 /* 0x53 reserved */ .quad 0x0000000000000000 /* 0x5b reserved */ -#ifdef CONFIG_X86_PAE - .quad 0x00cfbb00000067ff /* 0x60 kernel 4GB code at 0x00000000 */ - .quad 0x00cfb300000067ff /* 0x68 kernel 4GB data at 0x00000000 */ - .quad 0x00cffb00000067ff /* 0x73 user 4GB code at 0x00000000 */ - .quad 0x00cff300000067ff /* 0x7b user 4GB data at 0x00000000 */ -#else - .quad 0x00cfbb000000c3ff /* 0x60 kernel 4GB code at 0x00000000 */ - .quad 0x00cfb3000000c3ff /* 0x68 kernel 4GB data at 0x00000000 */ - .quad 0x00cffb000000c3ff /* 0x73 user 4GB code at 0x00000000 */ - .quad 0x00cff3000000c3ff /* 0x7b user 4GB data at 0x00000000 */ -#endif + .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ + .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ @@ -187,15 +151,36 @@ /* Be sure this is zeroed to avoid false validations in Xen */ .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0 -.org 0x3000 -ENTRY(default_ldt) -.org 0x4000 -ENTRY(hypercall_page) +/* + * __xen_guest information + */ +.macro utoa value + .if (\value) < 0 || (\value) >= 0x10 + utoa (((\value)>>4)&0x0fffffff) + .endif + .if ((\value) & 0xf) < 10 + .byte '0' + ((\value) & 0xf) + .else + .byte 'A' + ((\value) & 0xf) - 10 + .endif +.endm -.org 0x5000 -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) +.section __xen_guest + .ascii "GUEST_OS=linux,GUEST_VER=2.6" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x" + utoa __PAGE_OFFSET + .ascii ",HYPERCALL_PAGE=0x" + utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) + .ascii ",FEATURES=writable_page_tables" + .ascii "|writable_descriptor_tables" + .ascii "|auto_translated_physmap" + .ascii "|supervisor_mode_kernel" +#ifdef CONFIG_X86_PAE + .ascii ",PAE=yes" +#else + .ascii ",PAE=no" +#endif + .ascii ",LOADER=generic" + .byte 0 diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c Fri Feb 24 22:41:08 2006 @@ -41,9 +41,11 @@ EXPORT_SYMBOL(init_task); +#ifndef CONFIG_X86_NO_TSS /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +#endif diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Fri Feb 24 22:41:08 2006 @@ -272,7 +272,7 @@ regs.xes = __USER_DS; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; - regs.xcs = __KERNEL_CS; + regs.xcs = GET_KERNEL_CS(); regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; /* Ok, create the new process.. */ @@ -518,7 +518,9 @@ struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); +#ifndef CONFIG_X86_NO_TSS struct tss_struct *tss = &per_cpu(init_tss, cpu); +#endif physdev_op_t iopl_op, iobmp_op; multicall_entry_t _mcl[8], *mcl = _mcl; @@ -543,10 +545,9 @@ * Reload esp0. * This is load_esp0(tss, next) with a multicall. */ - tss->esp0 = next->esp0; mcl->op = __HYPERVISOR_stack_switch; - mcl->args[0] = tss->ss0; - mcl->args[1] = tss->esp0; + mcl->args[0] = __KERNEL_DS; + mcl->args[1] = next->esp0; mcl++; /* diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Feb 24 22:41:08 2006 @@ -94,9 +94,9 @@ #endif /* cpu data as detected by the assembly code in head.S */ -struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 }; +struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; /* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 0, 1, 0, -1 }; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c Fri Feb 24 22:41:08 2006 @@ -58,15 +58,20 @@ asmlinkage int system_call(void); +struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 } }; + /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; +#ifndef CONFIG_X86_NO_IDT /* * The IDT has to be page-aligned to simplify the Pentium * F0 0F bug workaround.. We have a special link segment * for this. */ struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +#endif asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -496,20 +501,6 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, long error_code) { - /* - * If we trapped on an LDT access then ensure that the default_ldt is - * loaded, if nothing else. We load default_ldt lazily because LDT - * switching costs time and many applications don't need it. - */ - if (unlikely((error_code & 6) == 4)) { - unsigned long ldt; - __asm__ __volatile__ ("sldt %0" : "=r" (ldt)); - if (ldt == 0) { - xen_set_ldt((unsigned long)&default_ldt[0], 5); - return; - } - } - current->thread.error_code = error_code; current->thread.trap_no = 13; @@ -1079,13 +1070,6 @@ } /* - * default LDT is a single-entry callgate to lcall7 for iBCS - * and a callgate to lcall27 for Solaris/x86 binaries - */ - make_lowmem_page_readonly( - &default_ldt[0], XENFEAT_writable_descriptor_tables); - - /* * Should be a barrier for any external CPU state. */ cpu_init(); @@ -1094,12 +1078,6 @@ void smp_trap_init(trap_info_t *trap_ctxt) { trap_info_t *t = trap_table; - int i; - - for (i = 0; i < 256; i++) { - trap_ctxt[i].vector = i; - trap_ctxt[i].cs = FLAT_KERNEL_CS; - } for (t = trap_table; t->address; t++) { trap_ctxt[t->vector].flags = t->flags; diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Fri Feb 24 22:41:08 2006 @@ -85,7 +85,7 @@ return eip + (seg << 4); /* By far the most common cases. */ - if (likely(seg == __USER_CS || seg == __KERNEL_CS)) + if (likely(seg == __USER_CS || seg == GET_KERNEL_CS())) return eip; /* Check the segment exists, is within the current LDT/GDT size, @@ -396,7 +396,7 @@ switch (error_code & 3) { default: /* 3: write, present */ #ifdef TEST_VERIFY_AREA - if (regs->cs == KERNEL_CS) + if (regs->cs == GET_KERNEL_CS()) printk("WP fault at %08lx\n", regs->eip); #endif /* fall through */ diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Feb 24 22:41:08 2006 @@ -324,10 +324,13 @@ .domid = DOMID_SELF }; - if (xen_feature(XENFEAT_auto_translated_physmap)) { - BUG_ON(order >= 1); + /* + * Currently an auto-translated guest will not perform I/O, nor will + * it require PAE page directories below 4GB. Therefore any calls to + * this function are redundant and can be ignored. + */ + if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; - } scrub_pages(vstart, 1 << order); diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Fri Feb 24 22:41:08 2006 @@ -759,7 +759,7 @@ #endif 0, pgd_ctor, - PTRS_PER_PMD == 1 ? pgd_dtor : NULL); + pgd_dtor); if (!pgd_cache) panic("pgtable_cache_init(): Cannot create pgd cache"); } diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Fri Feb 24 22:41:08 2006 @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/spinlock.h> +#include <linux/module.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -184,6 +185,10 @@ __flush_tlb_one(vaddr); } +static int nr_fixmaps = 0; +unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE); +EXPORT_SYMBOL(__FIXADDR_TOP); + void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags) { unsigned long address = __fix_to_virt(idx); @@ -194,7 +199,6 @@ } switch (idx) { case FIX_WP_TEST: - case FIX_VSYSCALL: #ifdef CONFIG_X86_F00F_BUG case FIX_F00F_IDT: #endif @@ -204,6 +208,13 @@ set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags); break; } + nr_fixmaps++; +} + +void set_fixaddr_top(unsigned long top) +{ + BUG_ON(nr_fixmaps > 0); + __FIXADDR_TOP = top - PAGE_SIZE; } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) @@ -289,10 +300,11 @@ unsigned long flags; if (PTRS_PER_PMD > 1) { - /* Ensure pgd resides below 4GB. */ - int rc = xen_create_contiguous_region( - (unsigned long)pgd, 0, 32); - BUG_ON(rc); + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) { + int rc = xen_create_contiguous_region( + (unsigned long)pgd, 0, 32); + BUG_ON(rc); + } if (HAVE_SHARED_KERNEL_PMD) memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, @@ -308,16 +320,20 @@ } } -/* never called when PTRS_PER_PMD > 1 */ void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) { unsigned long flags; /* can be called from interrupt context */ - spin_lock_irqsave(&pgd_lock, flags); - pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); - - pgd_test_and_unpin(pgd); + if (PTRS_PER_PMD > 1) { + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) + xen_destroy_contiguous_region((unsigned long)pgd, 0); + } else { + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); + + pgd_test_and_unpin(pgd); + } } pgd_t *pgd_alloc(struct mm_struct *mm) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/ia64/kernel/entry.S --- a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S Fri Feb 24 22:41:08 2006 @@ -569,7 +569,9 @@ .mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 .mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -.ret3: br.cond.sptk .work_pending_syscall_end +.ret3: +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk + br.cond.sptk .work_pending_syscall_end strace_error: ld8 r3=[r2] // load pt_regs.r8 diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Fri Feb 24 22:41:08 2006 @@ -446,6 +446,7 @@ if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); + parse_early_param(); #ifdef CONFIG_ACPI /* Initialize the ACPI boot-time table parser */ acpi_table_init(); @@ -704,6 +705,9 @@ setup_per_cpu_areas (void) { /* start_kernel() requires this... */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU + prefill_possible_map(); +#endif } /* diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/Kconfig --- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Fri Feb 24 22:41:08 2006 @@ -124,6 +124,16 @@ select SWIOTLB help This option will compile a kernel compatible with Xen hypervisor + +config X86_NO_TSS + bool + depends on X86_64_XEN + default y + +config X86_NO_IDT + bool + depends on X86_64_XEN + default y # # Define implied options from the CPU selection here @@ -529,9 +539,7 @@ source "drivers/acpi/Kconfig" -if !X86_64_XEN source "arch/x86_64/kernel/cpufreq/Kconfig" -endif endmenu diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/ia32/syscall32-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/ia32/syscall32-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/ia32/syscall32-xen.c Fri Feb 24 22:41:08 2006 @@ -119,7 +119,7 @@ /* Load these always in case some future AMD CPU supports SYSENTER from compat mode too. */ - checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)(__KERNEL_CS | 3)); + checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c Fri Feb 24 22:41:08 2006 @@ -57,13 +57,6 @@ if (!disable_apic) ack_APIC_irq(); } - -#ifdef CONFIG_XEN -void switch_APIC_timer_to_ipi(void *cpumask) { } -EXPORT_SYMBOL(switch_APIC_timer_to_ipi); -void switch_ipi_to_APIC_timer(void *cpumask) { } -EXPORT_SYMBOL(switch_ipi_to_APIC_timer); -#endif int setup_profiling_timer(unsigned int multiplier) { diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Fri Feb 24 22:41:08 2006 @@ -61,6 +61,8 @@ #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif + +NMI_MASK = 0x80000000 /* * C code is not supposed to know about undefined top of stack. Every time @@ -143,6 +145,18 @@ * #define VGCF_IN_SYSCALL (1<<8) */ .macro HYPERVISOR_IRET flag + testb $3,1*8(%rsp) + jnz 1f + testl $NMI_MASK,2*8(%rsp) + jnz 1f + + /* Direct iret to kernel space. Correct CS and SS. */ + orb $3,1*8(%rsp) + orb $3,4*8(%rsp) + iretq + +1: /* Slow iret via hypervisor. */ + andl $~NMI_MASK, 16(%rsp) pushq $\flag jmp hypercall_page + (__HYPERVISOR_iret * 32) .endm @@ -510,13 +524,7 @@ jnz restore_all_enable_events # != 0 => reenable event delivery XEN_PUT_VCPU_INFO(%rsi) - RESTORE_ARGS 0,8,0 - testb $3,8(%rsp) # check CS - jnz user_mode -kernel_mode: - orb $3,1*8(%rsp) - iretq -user_mode: + RESTORE_ARGS 0,8,0 HYPERVISOR_IRET 0 /* edi: workmask, edx: work */ @@ -811,6 +819,7 @@ ENTRY(do_nmi_callback) addq $8, %rsp call do_nmi + orl $NMI_MASK,EFLAGS(%rsp) RESTORE_REST XEN_BLOCK_EVENTS(%rsi) GET_THREAD_INFO(%rcx) @@ -826,11 +835,6 @@ jnz 14f # process more events if necessary... XEN_PUT_VCPU_INFO(%rsi) RESTORE_ARGS 0,8,0 - testb $3,8(%rsp) # check CS - jnz crit_user_mode - orb $3,1*8(%rsp) - iretq -crit_user_mode: HYPERVISOR_IRET 0 14: XEN_LOCKED_BLOCK_EVENTS(%rsi) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Fri Feb 24 22:41:08 2006 @@ -14,15 +14,6 @@ #include <linux/linkage.h> - -.section __xen_guest - .ascii "GUEST_OS=linux,GUEST_VER=2.6" - .ascii ",XEN_VER=xen-3.0" - .ascii ",VIRT_BASE=0xffffffff80000000" - .ascii ",HYPERCALL_PAGE=0x10a" /* __pa(hypercall_page) >> 12 */ - .ascii ",LOADER=generic" - .byte 0 - #include <linux/threads.h> #include <linux/init.h> #include <asm/desc.h> @@ -30,64 +21,21 @@ #include <asm/page.h> #include <asm/msr.h> #include <asm/cache.h> - -/* we are not able to switch in one step to the final KERNEL ADRESS SPACE - * because we need identity-mapped pages on setup so define __START_KERNEL to - * 0x100000 for this stage - * - */ .text .code64 .globl startup_64 startup_64: ENTRY(_start) - movq %rsi,xen_start_info(%rip) - -#ifdef CONFIG_SMP -ENTRY(startup_64_smp) -#endif /* CONFIG_SMP */ - - cld - - movq init_rsp(%rip),%rsp + movq $(init_thread_union+THREAD_SIZE-8),%rsp /* zero EFLAGS after setting rsp */ pushq $0 popfq - movq initial_code(%rip),%rax - jmp *%rax - /* SMP bootup changes these two */ - .globl initial_code -initial_code: - .quad x86_64_start_kernel - .globl init_rsp -init_rsp: - .quad init_thread_union+THREAD_SIZE-8 - -ENTRY(early_idt_handler) - xorl %eax,%eax - movq 8(%rsp),%rsi # get rip - movq (%rsp),%rdx - leaq early_idt_msg(%rip),%rdi -1: hlt # generate #GP - jmp 1b - -early_idt_msg: - .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n" - -#if 0 -ENTRY(lgdt_finish) - movl $(__USER_DS),%eax # DS/ES contains default USER segment - movw %ax,%ds - movw %ax,%es - movl $(__KERNEL_DS),%eax - movw %ax,%ss # after changing gdt. - popq %rax # get the retrun address - pushq $(__KERNEL_CS) - pushq %rax - lretq -#endif + /* rsi is pointer to startup info structure. + pass it to C */ + movq %rsi,%rdi + jmp x86_64_start_kernel ENTRY(stext) ENTRY(_stext) @@ -113,39 +61,9 @@ NEXT_PAGE(init_level4_user_pgt) .fill 512,8,0 - /* - * In Xen the following pre-initialized pgt entries are re-initialized. - */ NEXT_PAGE(level3_kernel_pgt) - .fill 510,8,0 - /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ - .quad phys_level2_kernel_pgt | 0x007 - .fill 1,8,0 + .fill 512,8,0 -NEXT_PAGE(level2_ident_pgt) - /* 40MB for bootup. */ - i = 0 - .rept 20 - .quad i << 21 | 0x083 - i = i + 1 - .endr - /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ - .globl temp_boot_pmds -temp_boot_pmds: - .fill 492,8,0 - -NEXT_PAGE(level2_kernel_pgt) - /* 40MB kernel mapping. The kernel code cannot be bigger than that. - When you change this change KERNEL_TEXT_SIZE in page.h too. */ - /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ - i = 0 - .rept 20 - .quad i << 21 | 0x183 - i = i + 1 - .endr - /* Module mapping starts here */ - .fill 492,8,0 - /* * This is used for vsyscall area mapping as we have a different * level4 page table for user. @@ -153,78 +71,16 @@ NEXT_PAGE(level3_user_pgt) .fill 512,8,0 -NEXT_PAGE(cpu_gdt_table) -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0 /* unused */ - .quad 0x00affa000000ffff /* __KERNEL_CS */ - .quad 0x00cff2000000ffff /* __KERNEL_DS */ - .quad 0x00cffa000000ffff /* __USER32_CS */ - .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */ - .quad 0x00affa000000ffff /* __USER_CS */ - .quad 0x00cffa000000ffff /* __KERNEL32_CS */ - .quad 0,0 /* TSS */ - .quad 0,0 /* LDT */ - .quad 0,0,0 /* three TLS descriptors */ - .quad 0 /* unused */ -gdt_end: -#if 0 - /* asm/segment.h:GDT_ENTRIES must match this */ - /* This should be a multiple of the cache line size */ - /* GDTs of other CPUs are now dynamically allocated */ - - /* zero the remaining page */ - .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 -#endif +NEXT_PAGE(level2_kernel_pgt) + .fill 512,8,0 NEXT_PAGE(empty_zero_page) - -NEXT_PAGE(level3_physmem_pgt) - .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ + .skip PAGE_SIZE NEXT_PAGE(hypercall_page) -.if (phys_hypercall_page - 0x10a000) - /* cause compiler error if the hypercall_page is at a - * different address than expected. */ - .quad __adjust_hypercall_page_in_header -.endif .fill 512,8,0 #undef NEXT_PAGE - - .data - -#ifndef CONFIG_XEN -#ifdef CONFIG_ACPI_SLEEP - .align PAGE_SIZE -ENTRY(wakeup_level4_pgt) - .quad phys_level3_ident_pgt | 0x007 - .fill 255,8,0 - .quad phys_level3_physmem_pgt | 0x007 - .fill 254,8,0 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad phys_level3_kernel_pgt | 0x007 -#endif - -#ifndef CONFIG_HOTPLUG_CPU - __INITDATA -#endif - /* - * This default setting generates an ident mapping at address 0x100000 - * and a mapping for the kernel that precisely maps virtual address - * 0xffffffff80000000 to physical address 0x000000. (always using - * 2Mbyte large pages provided by PAE mode) - */ - .align PAGE_SIZE -ENTRY(boot_level4_pgt) - .quad phys_level3_ident_pgt | 0x007 - .fill 255,8,0 - .quad phys_level3_physmem_pgt | 0x007 - .fill 254,8,0 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad phys_level3_kernel_pgt | 0x007 -#endif .data @@ -246,11 +102,55 @@ * Also sysret mandates a special GDT layout */ -.align PAGE_SIZE + .section .data.page_aligned, "aw" + .align PAGE_SIZE -ENTRY(idt_table) - .rept 256 - .quad 0 - .quad 0 - .endr +/* The TLS descriptors are currently at a different place compared to i386. + Hopefully nobody expects them at a fixed place (Wine?) */ +ENTRY(cpu_gdt_table) + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x0 /* unused */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x00cffa000000ffff /* __USER32_CS */ + .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */ + .quad 0x00affa000000ffff /* __USER_CS */ + .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ + .quad 0,0 /* TSS */ + .quad 0,0 /* LDT */ + .quad 0,0,0 /* three TLS descriptors */ + .quad 0 /* unused */ +gdt_end: + /* asm/segment.h:GDT_ENTRIES must match this */ + /* This should be a multiple of the cache line size */ + /* GDTs of other CPUs are now dynamically allocated */ + + /* zero the remaining page */ + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 + +/* + * __xen_guest information + */ +.macro utoh value + .if (\value) < 0 || (\value) >= 0x10 + utoh (((\value)>>4)&0x0fffffffffffffff) + .endif + .if ((\value) & 0xf) < 10 + .byte '0' + ((\value) & 0xf) + .else + .byte 'A' + ((\value) & 0xf) - 10 + .endif +.endm + +.section __xen_guest + .ascii "GUEST_OS=linux,GUEST_VER=2.6" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x"; utoh __START_KERNEL_map + .ascii ",HYPERCALL_PAGE=0x"; utoh (phys_hypercall_page >> PAGE_SHIFT) + .ascii ",FEATURES=writable_page_tables" + .ascii "|writable_descriptor_tables" + .ascii "|auto_translated_physmap" + .ascii "|supervisor_mode_kernel" + .ascii ",LOADER=generic" + .byte 0 diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c Fri Feb 24 22:41:08 2006 @@ -47,7 +47,7 @@ static void __init copy_bootdata(char *real_mode_data) { -#if 0 +#ifndef CONFIG_XEN int new_data; char * command_line; @@ -63,7 +63,6 @@ } command_line = (char *) ((u64)(new_data)); memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); - printk("Bootdata ok (command line is %s)\n", saved_command_line); #else int max_cmdline; @@ -71,8 +70,8 @@ max_cmdline = COMMAND_LINE_SIZE; memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); saved_command_line[max_cmdline-1] = '\0'; +#endif printk("Bootdata ok (command line is %s)\n", saved_command_line); -#endif } static void __init setup_boot_cpu_data(void) @@ -98,6 +97,7 @@ char *s; int i; + xen_start_info = (struct start_info *)real_mode_data; if (!xen_feature(XENFEAT_auto_translated_physmap)) { phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; @@ -105,9 +105,9 @@ xen_start_info->nr_pt_frames; } +#if 0 for (i = 0; i < 256; i++) set_intr_gate(i, early_idt_handler); -#if 0 asm volatile("lidt %0" :: "m" (idt_descr)); #endif diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c Fri Feb 24 22:41:08 2006 @@ -30,6 +30,9 @@ #include <linux/mc146818rtc.h> #include <linux/acpi.h> #include <linux/sysdev.h> +#ifdef CONFIG_ACPI +#include <acpi/acpi_bus.h> +#endif #include <asm/io.h> #include <asm/smp.h> @@ -309,6 +312,8 @@ Can be overwritten with "apic" And another hack to disable the IOMMU on VIA chipsets. + + ... and others. Really should move this somewhere else. Kludge-O-Rama. */ void __init check_ioapic(void) @@ -358,6 +363,17 @@ #ifndef CONFIG_XEN if (apic_runs_main_timer != 0) break; +#ifdef CONFIG_ACPI + /* Don't do this for laptops right + right now because their timer + doesn't necessarily tick in C2/3 */ + if (acpi_fadt.revision >= 3 && + (acpi_fadt.plvl2_lat + acpi_fadt.plvl3_lat) < 1100) { + printk(KERN_INFO +"ATI board detected, but seems to be a laptop. Timer might be shakey, sorry\n"); + break; + } +#endif printk(KERN_INFO "ATI board detected. Using APIC/PM timer.\n"); apic_runs_main_timer = 1; diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c Fri Feb 24 22:41:08 2006 @@ -295,9 +295,9 @@ memcpy(str,mpc->mpc_productid,12); str[12]=0; - printk(KERN_INFO "Product ID: %s ",str); - - printk(KERN_INFO "APIC at: 0x%X\n",mpc->mpc_lapic); + printk("Product ID: %s ",str); + + printk("APIC at: 0x%X\n",mpc->mpc_lapic); /* save the local APIC address, it might be non-default */ if (!acpi_lapic) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Fri Feb 24 22:41:08 2006 @@ -289,16 +289,28 @@ kprobe_flush_task(me); if (me->thread.io_bitmap_ptr) { +#ifndef CONFIG_X86_NO_TSS struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); +#endif +#ifdef CONFIG_XEN + static physdev_op_t iobmp_op = { + .cmd = PHYSDEVOP_SET_IOBITMAP + }; +#endif kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; /* * Careful, clear this in the TSS too: */ +#ifndef CONFIG_X86_NO_TSS memset(tss->io_bitmap, 0xff, t->io_bitmap_max); + put_cpu(); +#endif +#ifdef CONFIG_XEN + HYPERVISOR_physdev_op(&iobmp_op); +#endif t->io_bitmap_max = 0; - put_cpu(); } } @@ -463,7 +475,9 @@ struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); +#ifndef CONFIG_X86_NO_TSS struct tss_struct *tss = &per_cpu(init_tss, cpu); +#endif physdev_op_t iopl_op, iobmp_op; multicall_entry_t _mcl[8], *mcl = _mcl; @@ -482,10 +496,9 @@ /* * Reload esp0, LDT and the page table pointer: */ - tss->rsp0 = next->rsp0; mcl->op = __HYPERVISOR_stack_switch; mcl->args[0] = __KERNEL_DS; - mcl->args[1] = tss->rsp0; + mcl->args[1] = next->rsp0; mcl++; /* diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c Fri Feb 24 22:41:08 2006 @@ -40,7 +40,9 @@ struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; +#ifndef CONFIG_X86_NO_IDT struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; +#endif char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); @@ -155,13 +157,7 @@ void __init cpu_gdt_init(struct desc_ptr *gdt_descr) { -#ifdef CONFIG_SMP - int cpu = stack_smp_processor_id(); -#else - int cpu = smp_processor_id(); -#endif - - asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); + asm volatile("lgdt %0" :: "m" (*gdt_descr)); asm volatile("lidt %0" :: "m" (idt_descr)); } #endif @@ -203,8 +199,10 @@ pda->irqstackptr += IRQSTACKSIZE-64; } +#ifndef CONFIG_X86_NO_TSS char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ] __attribute__((section(".bss.page_aligned"))); +#endif /* May not be marked __init: used by software suspend */ void syscall_init(void) @@ -246,18 +244,23 @@ void __cpuinit cpu_init (void) { int cpu = stack_smp_processor_id(); +#ifndef CONFIG_X86_NO_TSS struct tss_struct *t = &per_cpu(init_tss, cpu); unsigned long v; char *estacks = NULL; + unsigned i; +#endif struct task_struct *me; - int i; /* CPU 0 is initialised in head64.c */ if (cpu != 0) { pda_init(cpu); zap_low_mappings(cpu); - } else + } +#ifndef CONFIG_X86_NO_TSS + else estacks = boot_exception_stacks; +#endif me = current; @@ -278,12 +281,7 @@ #endif cpu_gdt_descr[cpu].size = GDT_SIZE; -#ifndef CONFIG_XEN - asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); - asm volatile("lidt %0" :: "m" (idt_descr)); -#else cpu_gdt_init(&cpu_gdt_descr[cpu]); -#endif memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); @@ -294,6 +292,7 @@ check_efer(); +#ifndef CONFIG_X86_NO_TSS /* * set up and load the per-CPU TSS */ @@ -330,6 +329,7 @@ */ for (i = 0; i <= IO_BITMAP_LONGS; i++) t->io_bitmap[i] = ~0UL; +#endif atomic_inc(&init_mm.mm_count); me->active_mm = &init_mm; @@ -337,8 +337,10 @@ BUG(); enter_lazy_tlb(&init_mm, me); +#ifndef CONFIG_X86_NO_TSS + set_tss_desc(cpu, t); +#endif #ifndef CONFIG_XEN - set_tss_desc(cpu, t); load_TR_desc(); #endif load_LDT(&init_mm.context); diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c Fri Feb 24 22:41:08 2006 @@ -47,7 +47,9 @@ #include <asm/proto.h> #include <asm/nmi.h> +#ifndef CONFIG_X86_NO_IDT extern struct gate_struct idt_table[256]; +#endif asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -134,6 +136,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, unsigned *usedp, const char **idp) { +#ifndef CONFIG_X86_NO_TSS static char ids[][8] = { [DEBUG_STACK - 1] = "#DB", [NMI_STACK - 1] = "NMI", @@ -185,6 +188,7 @@ } #endif } +#endif return NULL; } @@ -948,28 +952,28 @@ * specify <dpl>|4 in the second field. */ static trap_info_t trap_table[] = { - { 0, 0|4, (__KERNEL_CS|0x3), (unsigned long)divide_error }, - { 1, 0|4, (__KERNEL_CS|0x3), (unsigned long)debug }, - { 3, 3|4, (__KERNEL_CS|0x3), (unsigned long)int3 }, - { 4, 3|4, (__KERNEL_CS|0x3), (unsigned long)overflow }, - { 5, 0|4, (__KERNEL_CS|0x3), (unsigned long)bounds }, - { 6, 0|4, (__KERNEL_CS|0x3), (unsigned long)invalid_op }, - { 7, 0|4, (__KERNEL_CS|0x3), (unsigned long)device_not_available }, - { 9, 0|4, (__KERNEL_CS|0x3), (unsigned long)coprocessor_segment_overrun}, - { 10, 0|4, (__KERNEL_CS|0x3), (unsigned long)invalid_TSS }, - { 11, 0|4, (__KERNEL_CS|0x3), (unsigned long)segment_not_present }, - { 12, 0|4, (__KERNEL_CS|0x3), (unsigned long)stack_segment }, - { 13, 0|4, (__KERNEL_CS|0x3), (unsigned long)general_protection }, - { 14, 0|4, (__KERNEL_CS|0x3), (unsigned long)page_fault }, - { 15, 0|4, (__KERNEL_CS|0x3), (unsigned long)spurious_interrupt_bug }, - { 16, 0|4, (__KERNEL_CS|0x3), (unsigned long)coprocessor_error }, - { 17, 0|4, (__KERNEL_CS|0x3), (unsigned long)alignment_check }, + { 0, 0|4, __KERNEL_CS, (unsigned long)divide_error }, + { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, + { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, + { 4, 3|4, __KERNEL_CS, (unsigned long)overflow }, + { 5, 0|4, __KERNEL_CS, (unsigned long)bounds }, + { 6, 0|4, __KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available }, + { 9, 0|4, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun}, + { 10, 0|4, __KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0|4, __KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0|4, __KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0|4, __KERNEL_CS, (unsigned long)general_protection }, + { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault }, + { 15, 0|4, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0|4, __KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0|4, __KERNEL_CS, (unsigned long)alignment_check }, #ifdef CONFIG_X86_MCE - { 18, 0|4, (__KERNEL_CS|0x3), (unsigned long)machine_check }, -#endif - { 19, 0|4, (__KERNEL_CS|0x3), (unsigned long)simd_coprocessor_error }, + { 18, 0|4, __KERNEL_CS, (unsigned long)machine_check }, +#endif + { 19, 0|4, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, #ifdef CONFIG_IA32_EMULATION - { IA32_SYSCALL_VECTOR, 3|4, (__KERNEL_CS|0x3), (unsigned long)ia32_syscall}, + { IA32_SYSCALL_VECTOR, 3|4, __KERNEL_CS, (unsigned long)ia32_syscall}, #endif { 0, 0, 0, 0 } }; @@ -984,10 +988,6 @@ printk("HYPERVISOR_set_trap_table faild: error %d\n", ret); -#ifdef CONFIG_IA32_EMULATION - set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); -#endif - /* * Should be a barrier for any external CPU state. */ @@ -997,12 +997,6 @@ void smp_trap_init(trap_info_t *trap_ctxt) { trap_info_t *t = trap_table; - int i; - - for (i = 0; i < 256; i++) { - trap_ctxt[i].vector = i; - trap_ctxt[i].cs = FLAT_KERNEL_CS; - } for (t = trap_table; t->address; t++) { trap_ctxt[t->vector].flags = t->flags; diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c Fri Feb 24 22:41:08 2006 @@ -152,10 +152,7 @@ pmd_t *pmd; pte_t *pte; - asm("movq %%cr3,%0" : "=r" (pgd)); - pgd = (pgd_t *)machine_to_phys((maddr_t)pgd); - - pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); + pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); pgd += pgd_index(address); if (bad_address(pgd)) goto bad; printk("PGD %lx ", pgd_val(*pgd)); @@ -261,9 +258,7 @@ /* On Xen the line below does not always work. Needs investigating! */ /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/ - asm("movq %%cr3,%0" : "=r" (pgd)); - pgd = (pgd_t *)machine_to_phys((maddr_t)pgd); - pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); + pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); pgd += pgd_index(address); pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/char/tpm/tpm.c --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c Fri Feb 24 22:41:08 2006 @@ -53,7 +53,7 @@ down(&chip->buffer_mutex); atomic_set(&chip->data_pending, 0); - memset(chip->data_buffer, 0, chip->vendor->buffersize); + memset(chip->data_buffer, 0, get_chip_buffersize(chip)); up(&chip->buffer_mutex); } @@ -352,7 +352,7 @@ spin_unlock(&driver_lock); - chip->data_buffer = kmalloc(chip->vendor->buffersize * sizeof(u8), GFP_KERNEL); + chip->data_buffer = kmalloc(get_chip_buffersize(chip) * sizeof(u8), GFP_KERNEL); if (chip->data_buffer == NULL) { chip->num_opens--; put_device(chip->dev); @@ -400,8 +400,8 @@ down(&chip->buffer_mutex); - if (in_size > chip->vendor->buffersize) - in_size = chip->vendor->buffersize; + if (in_size > get_chip_buffersize(chip)) + in_size = get_chip_buffersize(chip); if (copy_from_user (chip->data_buffer, (void __user *) buf, in_size)) { @@ -411,7 +411,7 @@ /* atomic tpm command send and result receive */ out_size = tpm_transmit(chip, chip->data_buffer, - chip->vendor->buffersize); + get_chip_buffersize(chip)); atomic_set(&chip->data_pending, out_size); atomic_set(&chip->data_position, 0); @@ -432,8 +432,6 @@ int ret_size; int pos, pending = 0; - del_singleshot_timer_sync(&chip->user_read_timer); - flush_scheduled_work(); ret_size = atomic_read(&chip->data_pending); if (ret_size > 0) { /* relay data */ if (size < ret_size) @@ -457,6 +455,7 @@ if ( ret_size <= 0 || pending == 0 ) { atomic_set( &chip->data_pending, 0 ); del_singleshot_timer_sync(&chip->user_read_timer); + flush_scheduled_work(); } return ret_size; diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/char/tpm/tpm.h --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h Fri Feb 24 22:41:08 2006 @@ -101,6 +101,11 @@ outb(value & 0xFF, base+1); } +static inline u32 get_chip_buffersize(struct tpm_chip *chip) +{ + return chip->vendor->buffersize; +} + extern int tpm_register_hardware(struct device *, struct tpm_vendor_specific *); extern int tpm_open(struct inode *, struct file *); diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Fri Feb 24 22:41:08 2006 @@ -25,6 +25,7 @@ #include <xen/tpmfe.h> #include <linux/device.h> #include <linux/interrupt.h> +#include <linux/platform_device.h> #include "tpm.h" /* read status bits */ @@ -455,9 +456,7 @@ .buffersize = 64 * 1024, }; -static struct device tpm_device = { - .bus_id = "vtpm", -}; +static struct platform_device *pdev; static struct tpmfe_device tpmfe = { .receive = tpm_recv, @@ -477,23 +476,22 @@ * driver */ if ((rc = tpm_fe_register_receiver(&tpmfe)) < 0) { - return rc; + goto err_exit; } /* * Register our device with the system. */ - if ((rc = device_register(&tpm_device)) < 0) { - tpm_fe_unregister_receiver(); - return rc; + pdev = platform_device_register_simple("tpm_vtpm", -1, NULL, 0); + if (IS_ERR(pdev)) { + rc = PTR_ERR(pdev); + goto err_unreg_fe; } tpm_xen.buffersize = tpmfe.max_tx_size; - if ((rc = tpm_register_hardware(&tpm_device, &tpm_xen)) < 0) { - device_unregister(&tpm_device); - tpm_fe_unregister_receiver(); - return rc; + if ((rc = tpm_register_hardware(&pdev->dev, &tpm_xen)) < 0) { + goto err_unreg_pdev; } dataex.current_request = NULL; @@ -508,13 +506,25 @@ disconnect_time = jiffies; return 0; + + +err_unreg_pdev: + platform_device_unregister(pdev); +err_unreg_fe: + tpm_fe_unregister_receiver(); + +err_exit: + return rc; } static void __exit cleanup_xen(void) { - tpm_remove_hardware(&tpm_device); - device_unregister(&tpm_device); - tpm_fe_unregister_receiver(); + struct tpm_chip *chip = dev_get_drvdata(&pdev->dev); + if (chip) { + tpm_remove_hardware(chip->dev); + platform_device_unregister(pdev); + tpm_fe_unregister_receiver(); + } } module_init(init_xen); diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/char/tty_io.c --- a/linux-2.6-xen-sparse/drivers/char/tty_io.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c Fri Feb 24 22:41:08 2006 @@ -1843,7 +1843,6 @@ tty_closing = tty->count <= 1; o_tty_closing = o_tty && (o_tty->count <= (pty_master ? 1 : 0)); - up(&tty_sem); do_sleep = 0; if (tty_closing) { @@ -1871,6 +1870,7 @@ printk(KERN_WARNING "release_dev: %s: read/write wait queue " "active!\n", tty_name(tty, buf)); + up(&tty_sem); schedule(); } @@ -1879,8 +1879,6 @@ * both sides, and we've completed the last operation that could * block, so it's safe to proceed with closing. */ - - down(&tty_sem); if (pty_master) { if (--o_tty->count < 0) { printk(KERN_WARNING "release_dev: bad pty slave count " @@ -1894,7 +1892,6 @@ tty->count, tty_name(tty, buf)); tty->count = 0; } - up(&tty_sem); /* * We've decremented tty->count, so we need to remove this file @@ -1938,6 +1935,8 @@ } while_each_task_pid(o_tty->session, PIDTYPE_SID, p); read_unlock(&tasklist_lock); } + + up(&tty_sem); /* check whether both sides are closing ... */ if (!tty_closing || (o_tty && !o_tty_closing)) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/serial/Kconfig --- a/linux-2.6-xen-sparse/drivers/serial/Kconfig Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/serial/Kconfig Fri Feb 24 22:41:08 2006 @@ -903,8 +903,8 @@ something like this to connect more than two modems to your Linux box, for instance in order to become a dial-in server. This driver supports PCI boards only. - If you have a card like this, say Y here and read the file - <file:Documentation/jsm.txt>. + + If you have a card like this, say Y here, otherwise say N. To compile this driver as a module, choose M here: the module will be called jsm. diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Feb 24 22:41:08 2006 @@ -726,8 +726,7 @@ int j; /* Stage 1: Make a safe copy of the shadow state. */ - copy = kmalloc(sizeof(info->shadow), GFP_KERNEL); - BUG_ON(copy == NULL); + copy = kmalloc(sizeof(info->shadow), GFP_KERNEL | __GFP_NOFAIL); memcpy(copy, info->shadow, sizeof(info->shadow)); /* Stage 2: Set up free list. */ diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Fri Feb 24 22:41:08 2006 @@ -222,25 +222,22 @@ } int -gnttab_grant_foreign_transfer(domid_t domid) +gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) { int ref; if (unlikely((ref = get_free_entry()) == -1)) return -ENOSPC; - - shared[ref].frame = 0; - shared[ref].domid = domid; - wmb(); - shared[ref].flags = GTF_accept_transfer; + gnttab_grant_foreign_transfer_ref(ref, domid, pfn); return ref; } void -gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid) -{ - shared[ref].frame = 0; +gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, + unsigned long pfn) +{ + shared[ref].frame = pfn; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_accept_transfer; diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Fri Feb 24 22:41:08 2006 @@ -188,7 +188,7 @@ ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT; #else /* __x86_64__ */ - ctxt.user_regs.cs = __KERNEL_CS | 3; + ctxt.user_regs.cs = __KERNEL_CS; ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); ctxt.kernel_ss = __KERNEL_DS; @@ -237,7 +237,6 @@ #ifdef __x86_64__ cpu_pda(cpu)->pcurrent = idle; cpu_pda(cpu)->cpunumber = cpu; - per_cpu(init_tss,cpu).rsp0 = idle->thread.rsp; clear_ti_thread_flag(idle->thread_info, TIF_FORK); #endif diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/xen/netback/loopback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Fri Feb 24 22:41:08 2006 @@ -137,10 +137,13 @@ sprintf(dev_name, "vif0.%d", i); dev1 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup); + if (!dev1) + return err; + sprintf(dev_name, "veth%d", i); dev2 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup); - if ((dev1 == NULL) || (dev2 == NULL)) - goto fail; + if (!dev2) + goto fail_netdev2; loopback_construct(dev1, dev2); loopback_construct(dev2, dev1); @@ -169,8 +172,9 @@ return 0; fail: - kfree(dev1); - kfree(dev2); + free_netdev(dev2); + fail_netdev2: + free_netdev(dev1); return err; } diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Feb 24 22:41:08 2006 @@ -587,25 +587,23 @@ BUG_ON((signed short)ref < 0); np->grant_rx_ref[id] = ref; gnttab_grant_foreign_transfer_ref(ref, - np->xbdev->otherend_id); + np->xbdev->otherend_id, + __pa(skb->head) >> PAGE_SHIFT); RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref; rx_pfn_array[i] = virt_to_mfn(skb->head); - /* Remove this page from map before passing back to Xen. */ - set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT, - INVALID_P2M_ENTRY); - - MULTI_update_va_mapping(rx_mcl+i, (unsigned long)skb->head, - __pte(0), 0); - } - - /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; - - /* Give away a batch of pages. */ - rx_mcl[i].op = __HYPERVISOR_memory_op; - rx_mcl[i].args[0] = XENMEM_decrease_reservation; - rx_mcl[i].args[1] = (unsigned long)&reservation; + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remove this page before passing back to Xen. */ + set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT, + INVALID_P2M_ENTRY); + MULTI_update_va_mapping(rx_mcl+i, + (unsigned long)skb->head, + __pte(0), 0); + } + } + + /* Tell the ballon driver what is going on. */ + balloon_update_driver_allowance(i); reservation.extent_start = rx_pfn_array; reservation.nr_extents = i; @@ -613,15 +611,27 @@ reservation.address_bits = 0; reservation.domid = DOMID_SELF; - /* Tell the ballon driver what is going on. */ - balloon_update_driver_allowance(i); - - /* Zap PTEs and give away pages in one big multicall. */ - (void)HYPERVISOR_multicall(rx_mcl, i+1); - - /* Check return status of HYPERVISOR_memory_op(). */ - if (unlikely(rx_mcl[i].result != i)) - panic("Unable to reduce memory reservation\n"); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* After all PTEs have been zapped, flush the TLB. */ + rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = + UVMF_TLB_FLUSH|UVMF_ALL; + + /* Give away a batch of pages. */ + rx_mcl[i].op = __HYPERVISOR_memory_op; + rx_mcl[i].args[0] = XENMEM_decrease_reservation; + rx_mcl[i].args[1] = (unsigned long)&reservation; + + /* Zap PTEs and give away pages in one big multicall. */ + (void)HYPERVISOR_multicall(rx_mcl, i+1); + + /* Check return status of HYPERVISOR_memory_op(). */ + if (unlikely(rx_mcl[i].result != i)) + panic("Unable to reduce memory reservation\n"); + } else { + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation) != i) + panic("Unable to reduce memory reservation\n"); + } /* Above is a suitable barrier to ensure backend will see requests. */ np->rx.req_prod_pvt = req_prod + i; @@ -802,17 +812,19 @@ np->stats.rx_packets++; np->stats.rx_bytes += rx->status; - /* Remap the page. */ - MULTI_update_va_mapping(mcl, (unsigned long)skb->head, - pfn_pte_ma(mfn, PAGE_KERNEL), 0); - mcl++; if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remap the page. */ + MULTI_update_va_mapping(mcl, (unsigned long)skb->head, + pfn_pte_ma(mfn, PAGE_KERNEL), + 0); + mcl++; mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = __pa(skb->head) >> PAGE_SHIFT; mmu++; - set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT, mfn); + set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT, + mfn); } __skb_queue_tail(&rxq, skb); @@ -1003,7 +1015,8 @@ if ((unsigned long)np->rx_skbs[i] < __PAGE_OFFSET) continue; gnttab_grant_foreign_transfer_ref( - np->grant_rx_ref[i], np->xbdev->otherend_id); + np->grant_rx_ref[i], np->xbdev->otherend_id, + __pa(np->rx_skbs[i]->data) >> PAGE_SHIFT); RING_GET_REQUEST(&np->rx, requeue_idx)->gref = np->grant_rx_ref[i]; RING_GET_REQUEST(&np->rx, requeue_idx)->id = i; diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Fri Feb 24 22:41:08 2006 @@ -13,9 +13,6 @@ #include "common.h" #include <xen/balloon.h> - -#define TPMIF_HASHSZ (2 << 5) -#define TPMIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(TPMIF_HASHSZ-1)) static kmem_cache_t *tpmif_cachep; int num_frontends = 0; diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Fri Feb 24 22:41:08 2006 @@ -65,8 +65,6 @@ int isuserbuffer, u32 left); - -#define MAX_PENDING_REQS TPMIF_TX_RING_SIZE #define MIN(x,y) (x) < (y) ? (x) : (y) @@ -973,8 +971,6 @@ static void tpm_tx_action(unsigned long unused); static DECLARE_TASKLET(tpm_tx_tasklet, tpm_tx_action, 0); -#define MAX_PENDING_REQS TPMIF_TX_RING_SIZE - static struct list_head tpm_schedule_list; static spinlock_t tpm_schedule_list_lock; diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h Fri Feb 24 22:41:08 2006 @@ -61,6 +61,7 @@ "rorl $16,%1" \ : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type)) +#ifndef CONFIG_X86_NO_TSS static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr) { _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr, @@ -68,6 +69,7 @@ } #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) +#endif static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) { diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h Fri Feb 24 22:41:08 2006 @@ -20,7 +20,7 @@ * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ -#define __FIXADDR_TOP (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE) +extern unsigned long __FIXADDR_TOP; #ifndef __ASSEMBLY__ #include <linux/kernel.h> @@ -53,7 +53,6 @@ */ enum fixed_addresses { FIX_HOLE, - FIX_VSYSCALL, #ifdef CONFIG_X86_LOCAL_APIC FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ #endif @@ -99,8 +98,10 @@ __end_of_fixed_addresses }; -extern void __set_fixmap( - enum fixed_addresses idx, maddr_t phys, pgprot_t flags); +extern void __set_fixmap(enum fixed_addresses idx, + maddr_t phys, pgprot_t flags); + +extern void set_fixaddr_top(unsigned long top); #define set_fixmap(idx, phys) \ __set_fixmap(idx, phys, PAGE_KERNEL) @@ -122,14 +123,6 @@ #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -/* - * This is the range that is readable by user mode, and things - * acting like user mode such as get_user_pages. - */ -#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL)) -#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) - extern void __this_fixmap_does_not_exist(void); diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Fri Feb 24 22:41:08 2006 @@ -60,9 +60,6 @@ /* arch/xen/i386/kernel/hypervisor.c */ void do_hypervisor_callback(struct pt_regs *regs); - -/* arch/xen/i386/kernel/head.S */ -void lgdt_finish(void); /* arch/xen/i386/mm/hypervisor.c */ /* diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Feb 24 22:41:08 2006 @@ -294,7 +294,7 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) -#define MAXMEM (HYPERVISOR_VIRT_START-__PAGE_OFFSET-__VMALLOC_RESERVE) +#define MAXMEM (__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) @@ -317,6 +317,8 @@ #define virt_to_mfn(v) (pfn_to_mfn(__pa(v) >> PAGE_SHIFT)) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) +#define __HAVE_ARCH_GATE_AREA 1 + #endif /* __KERNEL__ */ #include <asm-generic/page.h> diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h Fri Feb 24 22:41:08 2006 @@ -91,8 +91,10 @@ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; +#ifndef CONFIG_X86_NO_TSS extern struct tss_struct doublefault_tss; DECLARE_PER_CPU(struct tss_struct, init_tss); +#endif #ifdef CONFIG_SMP extern struct cpuinfo_x86 cpu_data[]; @@ -343,7 +345,9 @@ #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) +#ifndef CONFIG_X86_NO_TSS #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) +#endif #define INVALID_IO_BITMAP_OFFSET 0x8000 #define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 @@ -401,6 +405,7 @@ struct thread_struct; +#ifndef CONFIG_X86_NO_TSS struct tss_struct { unsigned short back_link,__blh; unsigned long esp0; @@ -446,6 +451,7 @@ */ unsigned long stack[64]; } __attribute__((packed)); +#endif #define ARCH_MIN_TASKALIGN 16 @@ -482,6 +488,7 @@ .io_bitmap_ptr = NULL, \ } +#ifndef CONFIG_X86_NO_TSS /* * Note that the .io_bitmap member must be extra-big. This is because * the CPU will access an additional byte beyond the end of the IO @@ -496,16 +503,23 @@ .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ } -static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) +static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) { tss->esp0 = thread->esp0; +#ifdef CONFIG_X86_SYSENTER /* This can only happen when SEP is enabled, no need to test "SEP"arately */ if (unlikely(tss->ss1 != thread->sysenter_cs)) { tss->ss1 = thread->sysenter_cs; wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); } - HYPERVISOR_stack_switch(tss->ss0, tss->esp0); -} +#endif +} +#define load_esp0(tss, thread) \ + __load_esp0(tss, thread) +#else +#define load_esp0(tss, thread) \ + HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0) +#endif #define start_thread(regs, new_eip, new_esp) do { \ __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h Fri Feb 24 22:41:08 2006 @@ -60,10 +60,12 @@ #define GDT_ENTRY_KERNEL_BASE 12 #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) -#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8 + 1) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) +#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) ) #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) -#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8 + 1) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) +#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) ) #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) @@ -85,9 +87,11 @@ /* Simple and small GDT entries for booting only */ -#define __BOOT_CS FLAT_KERNEL_CS +#define GDT_ENTRY_BOOT_CS 2 +#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) -#define __BOOT_DS FLAT_KERNEL_DS +#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) +#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) /* The PnP BIOS entries in the GDT */ #define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Feb 24 22:41:08 2006 @@ -25,6 +25,7 @@ clear_bit(X86_FEATURE_SEP, c->x86_capability); if (!(xen_start_info->flags & SIF_PRIVILEGED)) clear_bit(X86_FEATURE_MTRR, c->x86_capability); + c->hlt_works_ok = 0; } extern void hypervisor_callback(void); @@ -33,6 +34,8 @@ static void __init machine_specific_arch_setup(void) { + struct xen_platform_parameters pp; + HYPERVISOR_set_callbacks( __KERNEL_CS, (unsigned long)hypervisor_callback, __KERNEL_CS, (unsigned long)failsafe_callback); @@ -40,4 +43,8 @@ HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi); machine_specific_modify_cpu_capabilities(&boot_cpu_data); + + if (HYPERVISOR_xen_version(XENVER_platform_parameters, + &pp) == 0) + set_fixaddr_top(pp.virt_start - PAGE_SIZE); } diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h Fri Feb 24 22:41:08 2006 @@ -90,7 +90,9 @@ * something other than this. */ extern struct desc_struct default_ldt[]; +#ifndef CONFIG_X86_NO_IDT extern struct gate_struct idt_table[]; +#endif extern struct desc_ptr cpu_gdt_descr[]; /* the cpu gdt accessor */ @@ -113,6 +115,7 @@ memcpy(adr, &s, 16); } +#ifndef CONFIG_X86_NO_IDT static inline void set_intr_gate(int nr, void *func) { BUG_ON((unsigned)nr > 0xFF); @@ -135,6 +138,7 @@ { _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); } +#endif static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, unsigned size) @@ -152,6 +156,7 @@ memcpy(ptr, &d, 16); } +#ifndef CONFIG_X86_NO_TSS static inline void set_tss_desc(unsigned cpu, void *addr) { /* @@ -165,6 +170,7 @@ (unsigned long)addr, DESC_TSS, IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1); } +#endif static inline void set_ldt_desc(unsigned cpu, void *addr, int size) { diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h Fri Feb 24 22:41:08 2006 @@ -14,7 +14,6 @@ #include <linux/vmalloc.h> - /* * The DMA channel used by the floppy controller cannot access data at * addresses >= 16MB @@ -25,8 +24,6 @@ */ #define _CROSS_64KB(a,s,vdma) \ (!(vdma) && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) - -#include <linux/vmalloc.h> /* XEN: Hit DMA paths on the head. This trick from asm-m68k/floppy.h. */ #include <asm/dma.h> @@ -43,8 +40,12 @@ #define fd_disable_irq() disable_irq(FLOPPY_IRQ) #define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) #define fd_get_dma_residue() vdma_get_dma_residue(FLOPPY_DMA) -#define fd_dma_mem_alloc(size) vdma_mem_alloc(size) -#define fd_dma_mem_free(addr, size) vdma_mem_free(addr, size) +/* + * Do not use vmalloc/vfree: floppy_release_irq_and_dma() gets called from + * softirq context via motor_off_callback. A generic bug we happen to trigger. + */ +#define fd_dma_mem_alloc(size) __get_free_pages(GFP_KERNEL, get_order(size)) +#define fd_dma_mem_free(addr, size) free_pages(addr, get_order(size)) #define fd_dma_setup(addr, size, mode, io) vdma_dma_setup(addr, size, mode, io) static int virtual_dma_count; @@ -137,7 +138,7 @@ "floppy", NULL); } - +#if 0 static unsigned long vdma_mem_alloc(unsigned long size) { return (unsigned long) vmalloc(size); @@ -148,6 +149,7 @@ { vfree((void *)addr); } +#endif static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io) { @@ -168,7 +170,7 @@ { use_virtual_dma = 1; can_use_virtual_dma = 1; - return 0x340; + return 0x3f0; } /* diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h Fri Feb 24 22:41:08 2006 @@ -196,7 +196,9 @@ #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) +#ifndef CONFIG_X86_NO_TSS #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) +#endif #define INVALID_IO_BITMAP_OFFSET 0x8000 struct i387_fxsave_struct { @@ -217,6 +219,7 @@ struct i387_fxsave_struct fxsave; }; +#ifndef CONFIG_X86_NO_TSS struct tss_struct { u32 reserved1; u64 rsp0; @@ -240,8 +243,10 @@ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; } __attribute__((packed)) ____cacheline_aligned; +DECLARE_PER_CPU(struct tss_struct,init_tss); +#endif + extern struct cpuinfo_x86 boot_cpu_data; -DECLARE_PER_CPU(struct tss_struct,init_tss); #ifdef CONFIG_X86_VSMP #define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) @@ -283,9 +288,11 @@ .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ } +#ifndef CONFIG_X86_NO_TSS #define INIT_TSS { \ .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ } +#endif #define INIT_MMAP \ { &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL } diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h Fri Feb 24 22:41:08 2006 @@ -182,7 +182,7 @@ #define read_cr3() ({ \ unsigned long __dummy; \ asm("movq %%cr3,%0" : "=r" (__dummy)); \ - return machine_to_phys(__dummy); \ + machine_to_phys(__dummy); \ }) static inline unsigned long read_cr4(void) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/linux/mm.h --- a/linux-2.6-xen-sparse/include/linux/mm.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/linux/mm.h Fri Feb 24 22:41:08 2006 @@ -1064,5 +1064,7 @@ void drop_pagecache(void); void drop_slab(void); +extern int randomize_va_space; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/xen/gnttab.h --- a/linux-2.6-xen-sparse/include/xen/gnttab.h Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Fri Feb 24 22:41:08 2006 @@ -71,7 +71,7 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page); -int gnttab_grant_foreign_transfer(domid_t domid); +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); @@ -98,7 +98,8 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly); -void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid); +void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, + unsigned long pfn); #ifdef __ia64__ #define gnttab_map_vaddr(map) __va(map.dev_bus_addr) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/mm/memory.c --- a/linux-2.6-xen-sparse/mm/memory.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/mm/memory.c Fri Feb 24 22:41:08 2006 @@ -81,6 +81,16 @@ EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(high_memory); EXPORT_SYMBOL(vmalloc_earlyreserve); + +int randomize_va_space __read_mostly = 1; + +static int __init disable_randmaps(char *s) +{ + randomize_va_space = 0; + return 0; +} +__setup("norandmaps", disable_randmaps); + /* * If a p?d_bad entry is found while walking page tables, report diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/mm/page_alloc.c --- a/linux-2.6-xen-sparse/mm/page_alloc.c Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/mm/page_alloc.c Fri Feb 24 22:41:08 2006 @@ -56,6 +56,7 @@ int percpu_pagelist_fraction; static void fastcall free_hot_cold_page(struct page *page, int cold); +static void __free_pages_ok(struct page *page, unsigned int order); /* * results with 256, 32 in the lowmem_reserve sysctl: @@ -169,20 +170,23 @@ * All pages have PG_compound set. All pages have their ->private pointing at * the head page (even the head page has this). * - * The first tail page's ->mapping, if non-zero, holds the address of the - * compound page's put_page() function. - * - * The order of the allocation is stored in the first tail page's ->index - * This is only for debug at present. This usage means that zero-order pages - * may not be compound. - */ + * The first tail page's ->lru.next holds the address of the compound page's + * put_page() function. Its ->lru.prev holds the order of allocation. + * This usage means that zero-order pages may not be compound. + */ + +static void free_compound_page(struct page *page) +{ + __free_pages_ok(page, (unsigned long)page[1].lru.prev); +} + static void prep_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; - page[1].mapping = NULL; - page[1].index = order; + page[1].lru.next = (void *)free_compound_page; /* set dtor */ + page[1].lru.prev = (void *)order; for (i = 0; i < nr_pages; i++) { struct page *p = page + i; @@ -196,7 +200,7 @@ int i; int nr_pages = 1 << order; - if (unlikely(page[1].index != order)) + if (unlikely((unsigned long)page[1].lru.prev != order)) bad_page(page); for (i = 0; i < nr_pages; i++) { @@ -1539,28 +1543,28 @@ */ static int __init find_next_best_node(int node, nodemask_t *used_node_mask) { - int i, n, val; + int n, val; int min_val = INT_MAX; int best_node = -1; - for_each_online_node(i) { + /* Use the local node if we haven't already */ + if (!node_isset(node, *used_node_mask)) { + node_set(node, *used_node_mask); + return node; + } + + for_each_online_node(n) { cpumask_t tmp; - - /* Start from local node */ - n = (node+i) % num_online_nodes(); /* Don't want a node to appear more than once */ if (node_isset(n, *used_node_mask)) continue; - /* Use the local node if we haven't already */ - if (!node_isset(node, *used_node_mask)) { - best_node = node; - break; - } - /* Use the distance array to find the distance */ val = node_distance(node, n); + + /* Penalize nodes under us ("prefer the next node") */ + val += (n < node); /* Give preference to headless and unused nodes */ tmp = node_to_cpumask(n); diff -r d940ec92958d -r 6c43118bdba8 tools/examples/vif-common.sh --- a/tools/examples/vif-common.sh Fri Feb 24 21:03:07 2006 +++ b/tools/examples/vif-common.sh Fri Feb 24 22:41:08 2006 @@ -125,7 +125,7 @@ # function ip_of() { - ip addr show "$1" | awk "/^.*inet.*$1\$/{print \$2}" | sed 's,/.*,,' + ip addr show "$1" | awk "/^.*inet.*$1\$/{print \$2}" | sed 's,/.*,,' | head -1 } diff -r d940ec92958d -r 6c43118bdba8 tools/examples/vtpm --- a/tools/examples/vtpm Fri Feb 24 21:03:07 2006 +++ b/tools/examples/vtpm Fri Feb 24 22:41:08 2006 @@ -3,6 +3,7 @@ dir=$(dirname "$0") . "$dir/vtpm-common.sh" +vtpm_fatal_error=0 case "$command" in add) @@ -19,5 +20,8 @@ ;; esac -log debug "Successful vTPM operation '$command'." -success +if [ $vtpm_fatal_error -eq 0 ]; then + log debug "Successful vTPM operation '$command'." + success +fi + diff -r d940ec92958d -r 6c43118bdba8 tools/examples/vtpm-common.sh --- a/tools/examples/vtpm-common.sh Fri Feb 24 21:03:07 2006 +++ b/tools/examples/vtpm-common.sh Fri Feb 24 22:41:08 2006 @@ -173,6 +173,7 @@ local vmname=$1 local inst=$2 local res + res=`cat $VTPMDB | \ gawk -vvmname=$vmname \ -vinst=$inst \ @@ -238,6 +239,9 @@ local res set +e get_create_reason + + claim_lock vtpmdb + find_instance $domname res=$? if [ $res -eq 0 ]; then @@ -262,6 +266,9 @@ vtpm_create $instance fi fi + + release_lock vtpmdb + if [ "$REASON" == "create" ]; then vtpm_reset $instance elif [ "$REASON" == "resume" ]; then @@ -292,3 +299,5 @@ fi set -e } + + diff -r d940ec92958d -r 6c43118bdba8 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Fri Feb 24 21:03:07 2006 +++ b/tools/examples/xmexample.hvm Fri Feb 24 22:41:08 2006 @@ -29,6 +29,9 @@ #----------------------------------------------------------------------------- # the number of cpus guest platform has, default=1 #vcpus=1 + +# enable/disable HVM guest PAE, default=0 (disabled) +#pae=0 # enable/disable HVM guest ACPI, default=0 (disabled) #acpi=0 diff -r d940ec92958d -r 6c43118bdba8 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Fri Feb 24 21:03:07 2006 +++ b/tools/ioemu/target-i386-dm/helper2.c Fri Feb 24 22:41:08 2006 @@ -125,9 +125,8 @@ //the evtchn fd for polling int evtchn_fd = -1; -//the evtchn port for polling the notification, -//should be inputed as bochs's parameter -evtchn_port_t ioreq_remote_port, ioreq_local_port; +//which vcpu we are serving +int send_vcpu = 0; //some functions to handle the io req packet void sp_info() @@ -135,52 +134,62 @@ ioreq_t *req; int i; - term_printf("event port: %d\n", shared_page->sp_global.eport); for ( i = 0; i < vcpus; i++ ) { req = &(shared_page->vcpu_iodata[i].vp_ioreq); - term_printf("vcpu %d:\n", i); + term_printf("vcpu %d: event port %d\n", + i, shared_page->vcpu_iodata[i].vp_eport); term_printf(" req state: %x, pvalid: %x, addr: %llx, " "data: %llx, count: %llx, size: %llx\n", req->state, req->pdata_valid, req->addr, req->u.data, req->count, req->size); + term_printf(" IO totally occurred on this vcpu: %llx\n", + req->io_count); } } //get the ioreq packets from share mem -ioreq_t* __cpu_get_ioreq(void) +static ioreq_t* __cpu_get_ioreq(int vcpu) { ioreq_t *req; - req = &(shared_page->vcpu_iodata[0].vp_ioreq); - if (req->state == STATE_IOREQ_READY) { - req->state = STATE_IOREQ_INPROCESS; - } else { - fprintf(logfile, "False I/O request ... in-service already: " - "%x, pvalid: %x, port: %llx, " - "data: %llx, count: %llx, size: %llx\n", - req->state, req->pdata_valid, req->addr, - req->u.data, req->count, req->size); - req = NULL; - } - - return req; + req = &(shared_page->vcpu_iodata[vcpu].vp_ioreq); + + if ( req->state == STATE_IOREQ_READY ) + return req; + + fprintf(logfile, "False I/O request ... in-service already: " + "%x, pvalid: %x, port: %llx, " + "data: %llx, count: %llx, size: %llx\n", + req->state, req->pdata_valid, req->addr, + req->u.data, req->count, req->size); + return NULL; } //use poll to get the port notification //ioreq_vec--out,the //retval--the number of ioreq packet -ioreq_t* cpu_get_ioreq(void) -{ - int rc; +static ioreq_t* cpu_get_ioreq(void) +{ + int i, rc; evtchn_port_t port; rc = read(evtchn_fd, &port, sizeof(port)); - if ((rc == sizeof(port)) && (port == ioreq_local_port)) { + if ( rc == sizeof(port) ) { + for ( i = 0; i < vcpus; i++ ) + if ( shared_page->vcpu_iodata[i].dm_eport == port ) + break; + + if ( i == vcpus ) { + fprintf(logfile, "Fatal error while trying to get io event!\n"); + exit(1); + } + // unmask the wanted port again - write(evtchn_fd, &ioreq_local_port, sizeof(port)); + write(evtchn_fd, &port, sizeof(port)); //get the io packet from shared memory - return __cpu_get_ioreq(); + send_vcpu = i; + return __cpu_get_ioreq(i); } //read error or read nothing @@ -361,6 +370,8 @@ ioreq_t *req = cpu_get_ioreq(); if (req) { + req->state = STATE_IOREQ_INPROCESS; + if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) { if (req->size != 4) req->u.data &= (1UL << (8 * req->size))-1; @@ -465,7 +476,7 @@ struct ioctl_evtchn_notify notify; env->send_event = 0; - notify.port = ioreq_local_port; + notify.port = shared_page->vcpu_iodata[send_vcpu].dm_eport; (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, ¬ify); } } @@ -488,7 +499,7 @@ { CPUX86State *env; struct ioctl_evtchn_bind_interdomain bind; - int rc; + int i, rc; cpu_exec_init(); qemu_register_reset(qemu_hvm_reset, NULL); @@ -509,14 +520,17 @@ return NULL; } + /* FIXME: how about if we overflow the page here? */ bind.remote_domain = domid; - bind.remote_port = ioreq_remote_port; - rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); - if (rc == -1) { - fprintf(logfile, "bind interdomain ioctl error %d\n", errno); - return NULL; - } - ioreq_local_port = rc; + for ( i = 0; i < vcpus; i++ ) { + bind.remote_port = shared_page->vcpu_iodata[i].vp_eport; + rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); + if ( rc == -1 ) { + fprintf(logfile, "bind interdomain ioctl error %d\n", errno); + return NULL; + } + shared_page->vcpu_iodata[i].dm_eport = rc; + } return env; } diff -r d940ec92958d -r 6c43118bdba8 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Fri Feb 24 21:03:07 2006 +++ b/tools/ioemu/vl.c Fri Feb 24 22:41:08 2006 @@ -2337,7 +2337,6 @@ QEMU_OPTION_S, QEMU_OPTION_s, - QEMU_OPTION_p, QEMU_OPTION_d, QEMU_OPTION_l, QEMU_OPTION_hdachs, @@ -2414,7 +2413,6 @@ { "S", 0, QEMU_OPTION_S }, { "s", 0, QEMU_OPTION_s }, - { "p", HAS_ARG, QEMU_OPTION_p }, { "d", HAS_ARG, QEMU_OPTION_d }, { "l", HAS_ARG, QEMU_OPTION_l }, { "hdachs", HAS_ARG, QEMU_OPTION_hdachs }, @@ -2936,13 +2934,6 @@ { domid = atoi(optarg); fprintf(logfile, "domid: %d\n", domid); - } - break; - case QEMU_OPTION_p: - { - extern evtchn_port_t ioreq_remote_port; - ioreq_remote_port = atoi(optarg); - fprintf(logfile, "eport: %d\n", ioreq_remote_port); } break; case QEMU_OPTION_l: diff -r d940ec92958d -r 6c43118bdba8 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Fri Feb 24 21:03:07 2006 +++ b/tools/libxc/xc_hvm_build.c Fri Feb 24 22:41:08 2006 @@ -20,7 +20,7 @@ #define L3_PROT (_PAGE_PRESENT) #endif -#define E820MAX 128 +#define E820MAX 128 #define E820_RAM 1 #define E820_RESERVED 2 @@ -137,7 +137,7 @@ */ static int set_hvm_info(int xc_handle, uint32_t dom, unsigned long *pfn_list, unsigned int vcpus, - unsigned int acpi, unsigned int apic) + unsigned int pae, unsigned int acpi, unsigned int apic) { char *va_map; struct hvm_info_table *va_hvm; @@ -149,7 +149,7 @@ PAGE_SIZE, PROT_READ|PROT_WRITE, pfn_list[HVM_INFO_PFN]); - + if ( va_map == NULL ) return -1; @@ -159,6 +159,7 @@ va_hvm->length = sizeof(struct hvm_info_table); va_hvm->acpi_enabled = acpi; va_hvm->apic_enabled = apic; + va_hvm->pae_enabled = pae; va_hvm->nr_vcpus = vcpus; set_hvm_info_checksum(va_hvm); @@ -174,9 +175,9 @@ unsigned long nr_pages, vcpu_guest_context_t *ctxt, unsigned long shared_info_frame, - unsigned int control_evtchn, unsigned int vcpus, - unsigned int acpi, + unsigned int pae, + unsigned int acpi, unsigned int apic, unsigned int store_evtchn, unsigned long *store_mfn) @@ -190,11 +191,7 @@ xc_mmu_t *mmu = NULL; int rc; - unsigned long nr_pt_pages; - struct domain_setup_info dsi; - unsigned long vpt_start; - unsigned long vpt_end; unsigned long v_end; unsigned long shared_page_frame = 0; @@ -214,20 +211,10 @@ /* memsize is in megabytes */ v_end = (unsigned long)memsize << 20; -#ifdef __i386__ - nr_pt_pages = 1 + ((memsize + 3) >> 2); -#else - nr_pt_pages = 5 + ((memsize + 1) >> 1); -#endif - vpt_start = v_end; - vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); - printf("VIRTUAL MEMORY ARRANGEMENT:\n" " Loaded HVM loader: %08lx->%08lx\n" - " Page tables: %08lx->%08lx\n" " TOTAL: %08lx->%08lx\n", dsi.v_kernstart, dsi.v_kernend, - vpt_start, vpt_end, dsi.v_start, v_end); printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry); @@ -265,7 +252,7 @@ goto error_out; } - if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) { + if ( set_hvm_info(xc_handle, dom, page_array, vcpus, pae, acpi, apic) ) { fprintf(stderr, "Couldn't set hvm info for HVM guest.\n"); goto error_out; } @@ -296,7 +283,19 @@ shared_page_frame)) == 0 ) goto error_out; memset(sp, 0, PAGE_SIZE); - sp->sp_global.eport = control_evtchn; + + /* FIXME: how about if we overflow the page here? */ + for ( i = 0; i < vcpus; i++ ) { + unsigned int vp_eport; + + vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0); + if ( vp_eport < 0 ) { + fprintf(stderr, "Couldn't get unbound port from VMX guest.\n"); + goto error_out; + } + sp->vcpu_iodata[i].vp_eport = vp_eport; + } + munmap(sp, PAGE_SIZE); *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2]; @@ -343,9 +342,9 @@ uint32_t domid, int memsize, const char *image_name, - unsigned int control_evtchn, unsigned int vcpus, - unsigned int acpi, + unsigned int pae, + unsigned int acpi, unsigned int apic, unsigned int store_evtchn, unsigned long *store_mfn) @@ -366,8 +365,8 @@ if ( !strstr(xen_caps, "hvm") ) { - PERROR("CPU doesn't support HVM extensions or " - "the extensions are not enabled"); + PERROR("CPU doesn't support HVM extensions or " + "the extensions are not enabled"); goto error_out; } @@ -399,8 +398,8 @@ ctxt->flags = VGCF_HVM_GUEST; if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages, - ctxt, op.u.getdomaininfo.shared_info_frame, control_evtchn, - vcpus, acpi, apic, store_evtchn, store_mfn) < 0) + ctxt, op.u.getdomaininfo.shared_info_frame, + vcpus, pae, acpi, apic, store_evtchn, store_mfn) < 0) { ERROR("Error constructing guest OS"); goto error_out; diff -r d940ec92958d -r 6c43118bdba8 tools/libxc/xc_ia64_stubs.c --- a/tools/libxc/xc_ia64_stubs.c Fri Feb 24 21:03:07 2006 +++ b/tools/libxc/xc_ia64_stubs.c Fri Feb 24 22:41:08 2006 @@ -16,7 +16,7 @@ #undef __IA64_UL #define __IA64_UL(x) ((unsigned long)(x)) #undef __ASSEMBLY__ - + unsigned long xc_ia64_fpsr_default(void) { return FPSR_DEFAULT; @@ -569,12 +569,14 @@ static int setup_guest( int xc_handle, uint32_t dom, unsigned long memsize, char *image, unsigned long image_size, - unsigned int control_evtchn, + uint32_t vcpus, unsigned int store_evtchn, unsigned long *store_mfn) { unsigned long page_array[2]; shared_iopage_t *sp; + int i; + // FIXME: initialize pfn list for a temp hack if (xc_ia64_get_pfn_list(xc_handle, dom, NULL, -1, -1) == -1) { PERROR("Could not allocate continuous memory"); @@ -612,7 +614,18 @@ page_array[0])) == 0) goto error_out; memset(sp, 0, PAGE_SIZE); - sp->sp_global.eport = control_evtchn; + + for (i = 0; i < vcpus; i++) { + uint32_t vp_eport; + + vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0); + if (vp_eport < 0) { + fprintf(stderr, "Couldn't get unbound port from VMX guest.\n"); + goto error_out; + } + sp->vcpu_iodata[i].vp_eport = vp_eport; + } + munmap(sp, PAGE_SIZE); return 0; @@ -625,10 +638,10 @@ uint32_t domid, int memsize, const char *image_name, - unsigned int control_evtchn, - unsigned int lapic, unsigned int vcpus, + unsigned int pae, unsigned int acpi, + unsigned int apic, unsigned int store_evtchn, unsigned long *store_mfn) { @@ -667,8 +680,8 @@ memset(ctxt, 0, sizeof(*ctxt)); - if ( setup_guest(xc_handle, domid, (unsigned long)memsize, image, image_size, - control_evtchn, store_evtchn, store_mfn ) < 0 ){ + if ( setup_guest(xc_handle, domid, (unsigned long)memsize, image, + image_size, vcpus, store_evtchn, store_mfn ) < 0 ){ ERROR("Error constructing guest OS"); goto error_out; } diff -r d940ec92958d -r 6c43118bdba8 tools/libxc/xc_load_bin.c --- a/tools/libxc/xc_load_bin.c Fri Feb 24 21:03:07 2006 +++ b/tools/libxc/xc_load_bin.c Fri Feb 24 22:41:08 2006 @@ -231,6 +231,7 @@ dsi->v_kernstart = dsi->v_start; dsi->v_kernend = dsi->v_end; dsi->v_kernentry = image_info->entry_addr; + dsi->xen_guest_string = ""; return 0; } diff -r d940ec92958d -r 6c43118bdba8 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Fri Feb 24 21:03:07 2006 +++ b/tools/libxc/xenguest.h Fri Feb 24 22:41:08 2006 @@ -40,7 +40,7 @@ int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long nr_pfns, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, - unsigned long *console_mfn); + unsigned long *console_mfn); int xc_linux_build(int xc_handle, uint32_t domid, @@ -57,8 +57,8 @@ uint32_t domid, int memsize, const char *image_name, - unsigned int control_evtchn, unsigned int vcpus, + unsigned int pae, unsigned int acpi, unsigned int apic, unsigned int store_evtchn, diff -r d940ec92958d -r 6c43118bdba8 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Fri Feb 24 21:03:07 2006 +++ b/tools/python/xen/lowlevel/xc/xc.c Fri Feb 24 22:41:08 2006 @@ -363,23 +363,24 @@ { uint32_t dom; char *image; - int control_evtchn, store_evtchn; + int store_evtchn; int memsize; int vcpus = 1; + int pae = 0; int acpi = 0; int apic = 0; unsigned long store_mfn = 0; - static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn", - "memsize", "image", "vcpus", "acpi", "apic", + static char *kwd_list[] = { "dom", "store_evtchn", + "memsize", "image", "vcpus", "pae", "acpi", "apic", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisiii", kwd_list, - &dom, &control_evtchn, &store_evtchn, - &memsize, &image, &vcpus, &acpi, &apic) ) - return NULL; - - if ( xc_hvm_build(self->xc_handle, dom, memsize, image, control_evtchn, - vcpus, acpi, apic, store_evtchn, &store_mfn) != 0 ) + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiisiiii", kwd_list, + &dom, &store_evtchn, &memsize, + &image, &vcpus, &pae, &acpi, &apic) ) + return NULL; + + if ( xc_hvm_build(self->xc_handle, dom, memsize, image, + vcpus, pae, acpi, apic, store_evtchn, &store_mfn) != 0 ) return PyErr_SetFromErrno(xc_error); return Py_BuildValue("{s:i}", "store_mfn", store_mfn); diff -r d940ec92958d -r 6c43118bdba8 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri Feb 24 21:03:07 2006 +++ b/tools/python/xen/xend/image.py Fri Feb 24 22:41:08 2006 @@ -191,8 +191,8 @@ ImageHandler.configure(self, imageConfig, deviceConfig) info = xc.xeninfo() - if not 'hvm' in info['xen_caps']: - raise VmError("Not an HVM capable platform, we stop creating!") + if not 'hvm' in info['xen_caps']: + raise VmError("Not an HVM capable platform, we stop creating!") self.dmargs = self.parseDeviceModelArgs(imageConfig, deviceConfig) self.device_model = sxp.child_value(imageConfig, 'device_model') @@ -205,28 +205,24 @@ ("image/device-model", self.device_model), ("image/display", self.display)) - self.device_channel = None self.pid = 0 self.dmargs += self.configVNC(imageConfig) + + self.pae = int(sxp.child_value(imageConfig, 'pae', 0)) self.acpi = int(sxp.child_value(imageConfig, 'acpi', 0)) self.apic = int(sxp.child_value(imageConfig, 'apic', 0)) def buildDomain(self): - # Create an event channel - self.device_channel = xc.evtchn_alloc_unbound(dom=self.vm.getDomid(), - remote_dom=0) - log.info("HVM device model port: %d", self.device_channel) - store_evtchn = self.vm.getStorePort() log.debug("dom = %d", self.vm.getDomid()) log.debug("image = %s", self.kernel) - log.debug("control_evtchn = %d", self.device_channel) log.debug("store_evtchn = %d", store_evtchn) log.debug("memsize = %d", self.vm.getMemoryTarget() / 1024) log.debug("vcpus = %d", self.vm.getVCpuCount()) + log.debug("pae = %d", self.pae) log.debug("acpi = %d", self.acpi) log.debug("apic = %d", self.apic) @@ -234,10 +230,10 @@ return xc.hvm_build(dom = self.vm.getDomid(), image = self.kernel, - control_evtchn = self.device_channel, store_evtchn = store_evtchn, memsize = self.vm.getMemoryTarget() / 1024, vcpus = self.vm.getVCpuCount(), + pae = self.pae, acpi = self.acpi, apic = self.apic) @@ -341,7 +337,6 @@ if len(vnc): args = args + vnc args = args + ([ "-d", "%d" % self.vm.getDomid(), - "-p", "%d" % self.device_channel, "-m", "%s" % (self.vm.getMemoryTarget() / 1024)]) args = args + self.dmargs env = dict(os.environ) @@ -379,28 +374,12 @@ def getDomainMemory(self, mem): """@see ImageHandler.getDomainMemory""" page_kb = 4 + extra_pages = 0 if os.uname()[4] == 'ia64': page_kb = 16 - # for ioreq_t and xenstore - static_pages = 2 - return mem + (self.getPageTableSize(mem / 1024) + static_pages) * page_kb - - def getPageTableSize(self, mem_mb): - """Return the pages of memory needed for 1:1 page tables for physical - mode. - - @param mem_mb: size in MB - @return size in KB - """ - # 1 page for the PGD + 1 pte page for 4MB of memory (rounded) - if os.uname()[4] == 'x86_64': - return 5 + ((mem_mb + 1) >> 1) - elif os.uname()[4] == 'ia64': - # 1:1 pgtable is allocated on demand ia64, so just return rom size - # for guest firmware - return 1024 - else: - return 1 + ((mem_mb + 3) >> 2) + # ROM size for guest firmware, ioreq page and xenstore page + extra_pages = 1024 + 2 + return mem + extra_pages * page_kb def register_shutdown_watch(self): """ add xen store watch on control/shutdown """ diff -r d940ec92958d -r 6c43118bdba8 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Fri Feb 24 21:03:07 2006 +++ b/tools/python/xen/xm/create.py Fri Feb 24 22:41:08 2006 @@ -160,6 +160,10 @@ gopts.var('cpus', val='CPUS', fn=set_int, default=None, use="CPUS to run the domain on.") + +gopts.var('pae', val='PAE', + fn=set_int, default=0, + use="Disable or enable PAE of HVM domain.") gopts.var('acpi', val='ACPI', fn=set_int, default=0, @@ -545,7 +549,7 @@ def configure_hvm(config_image, vals): """Create the config for HVM devices. """ - args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb', + args = [ 'device_model', 'pae', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb', 'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'audio', 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'acpi', 'apic', 'xauthority' ] diff -r d940ec92958d -r 6c43118bdba8 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Fri Feb 24 21:03:07 2006 +++ b/tools/xenstore/xenstored_core.c Fri Feb 24 22:41:08 2006 @@ -573,14 +573,11 @@ { struct buffered_data *data; - data = talloc(ctx, struct buffered_data); + data = talloc_zero(ctx, struct buffered_data); if (data == NULL) return NULL; data->inhdr = true; - data->used = 0; - data->buffer = NULL; - return data; } @@ -1394,17 +1391,13 @@ struct node *node; struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE }; - node = talloc(NULL, struct node); + node = talloc_zero(NULL, struct node); node->name = name; node->perms = &perms; node->num_perms = 1; - node->data = NULL; - node->datalen = 0; node->children = (char *)child; if (child) node->childlen = strlen(child) + 1; - else - node->childlen = 0; if (!write_node(NULL, node)) barf_perror("Could not create initial node %s", name); diff -r d940ec92958d -r 6c43118bdba8 tools/xm-test/README --- a/tools/xm-test/README Fri Feb 24 21:03:07 2006 +++ b/tools/xm-test/README Fri Feb 24 22:41:08 2006 @@ -48,6 +48,15 @@ Simply copy the initrd-X.Y.img file into ramdisk/ and then run: # make existing + +Or, you can run: + # INITRD="http://url.of.initrd.repo/" make existing + +You do not need to include the name of the image itself in the url, +however, an initrd with the right name (initrd.X.Y.img) and version +number must exist at that location. The script will determine which +version of the initrd it needs and try to download the right file from +that location. This will set up the link so that xm-test will use the existing ramdisk. Next, just run "runtest.sh" normally. Note that in general, diff -r d940ec92958d -r 6c43118bdba8 tools/xm-test/ramdisk/Makefile.am --- a/tools/xm-test/ramdisk/Makefile.am Fri Feb 24 21:03:07 2006 +++ b/tools/xm-test/ramdisk/Makefile.am Fri Feb 24 22:41:08 2006 @@ -57,6 +57,9 @@ fi existing: + @if test -n "$(INITRD)"; then \ + wget $(INITRD)/$(XMTEST_VER_IMG); \ + fi @if [ -f $(XMTEST_VER_IMG) ] ; then \ ln -sf $(XMTEST_VER_IMG) initrd.img; \ else \ diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/vmx/mmio.c --- a/xen/arch/ia64/vmx/mmio.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/ia64/vmx/mmio.c Fri Feb 24 22:41:08 2006 @@ -154,7 +154,7 @@ set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); p->state = STATE_IOREQ_READY; - evtchn_send(iopacket_port(v->domain)); + evtchn_send(iopacket_port(v)); vmx_wait_io(); if(dir==IOREQ_READ){ //read *val=p->u.data; @@ -186,7 +186,7 @@ set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); p->state = STATE_IOREQ_READY; - evtchn_send(iopacket_port(v->domain)); + evtchn_send(iopacket_port(v)); vmx_wait_io(); if(dir==IOREQ_READ){ //read diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/vmx/vmx_init.c --- a/xen/arch/ia64/vmx/vmx_init.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/ia64/vmx/vmx_init.c Fri Feb 24 22:41:08 2006 @@ -49,6 +49,7 @@ #include <xen/mm.h> #include <public/arch-ia64.h> #include <asm/hvm/vioapic.h> +#include <public/event_channel.h> /* Global flag to identify whether Intel vmx feature is on */ u32 vmx_enabled = 0; @@ -250,9 +251,6 @@ { vpd_t *vpd; - /* Allocate resources for vcpu 0 */ - //memset(&v->arch.arch_vmx, 0, sizeof(struct arch_vmx_struct)); - vpd = alloc_vpd(); ASSERT(vpd); @@ -371,20 +369,15 @@ void vmx_setup_platform(struct domain *d, struct vcpu_guest_context *c) { - shared_iopage_t *sp; - ASSERT(d != dom0); /* only for non-privileged vti domain */ d->arch.vmx_platform.shared_page_va = (unsigned long)__va(__gpa_to_mpa(d, IO_PAGE_START)); - sp = get_sp(d); - //memset((char *)sp,0,PAGE_SIZE); /* TEMP */ d->arch.vmx_platform.pib_base = 0xfee00000UL; /* Only open one port for I/O and interrupt emulation */ memset(&d->shared_info->evtchn_mask[0], 0xff, sizeof(d->shared_info->evtchn_mask)); - clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]); /* Initialize the virtual interrupt lines */ vmx_virq_line_init(d); @@ -393,4 +386,16 @@ hvm_vioapic_init(d); } - +void vmx_do_launch(struct vcpu *v) +{ + if (evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0) { + printk("VMX domain bind port %d to vcpu %d failed!\n", + iopacket_port(v), v->vcpu_id); + domain_crash_synchronous(); + } + + clear_bit(iopacket_port(v), + &v->domain->shared_info->evtchn_mask[0]); + + vmx_load_all_rr(v); +} diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/vmx/vmx_support.c --- a/xen/arch/ia64/vmx/vmx_support.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/ia64/vmx/vmx_support.c Fri Feb 24 22:41:08 2006 @@ -38,7 +38,7 @@ { struct vcpu *v = current; struct domain *d = v->domain; - int port = iopacket_port(d); + int port = iopacket_port(v); do { if (!test_bit(port, @@ -129,7 +129,7 @@ struct domain *d = v->domain; extern void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, unsigned long *pend_irr); - int port = iopacket_port(d); + int port = iopacket_port(v); /* I/O emulation is atomic, so it's impossible to see execution flow * out of vmx_wait_io, when guest is still waiting for response. diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/xen/process.c --- a/xen/arch/ia64/xen/process.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/ia64/xen/process.c Fri Feb 24 22:41:08 2006 @@ -71,7 +71,7 @@ context_saved(prev); if (VMX_DOMAIN(current)) { - vmx_load_all_rr(current); + vmx_do_launch(current); } else { load_region_regs(current); vcpu_load_kernel_regs(current); diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/xen/xenmisc.c --- a/xen/arch/ia64/xen/xenmisc.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/ia64/xen/xenmisc.c Fri Feb 24 22:41:08 2006 @@ -347,6 +347,10 @@ void continue_running(struct vcpu *same) { /* nothing to do */ +} + +void arch_dump_domain_info(struct domain *d) +{ } void panic_domain(struct pt_regs *regs, const char *fmt, ...) diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/domain.c Fri Feb 24 22:41:08 2006 @@ -346,19 +346,22 @@ struct vcpu *v, struct vcpu_guest_context *c) { struct domain *d = v->domain; - unsigned long phys_basetab; + unsigned long phys_basetab = INVALID_MFN; int i, rc; - /* - * This is sufficient! If the descriptor DPL differs from CS RPL then we'll - * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically. - * If SS RPL or DPL differs from CS RPL then we'll #GP. - */ if ( !(c->flags & VGCF_HVM_GUEST) ) { - if ( ((c->user_regs.cs & 3) == 0) || - ((c->user_regs.ss & 3) == 0) ) - return -EINVAL; + fixup_guest_selector(c->user_regs.ss); + fixup_guest_selector(c->kernel_ss); + fixup_guest_selector(c->user_regs.cs); + +#ifdef __i386__ + fixup_guest_selector(c->event_callback_cs); + fixup_guest_selector(c->failsafe_callback_cs); +#endif + + for ( i = 0; i < 256; i++ ) + fixup_guest_selector(c->trap_ctxt[i].cs); } else if ( !hvm_enabled ) return -EINVAL; @@ -372,6 +375,7 @@ v->arch.flags |= TF_kernel_mode; memcpy(&v->arch.guest_context, c, sizeof(*c)); + init_int80_direct_trap(v); if ( !(c->flags & VGCF_HVM_GUEST) ) { @@ -398,17 +402,27 @@ if ( v->vcpu_id == 0 ) d->vm_assist = c->vm_assist; - phys_basetab = c->ctrlreg[3]; - phys_basetab = - (gmfn_to_mfn(d, phys_basetab >> PAGE_SHIFT) << PAGE_SHIFT) | - (phys_basetab & ~PAGE_MASK); - - v->arch.guest_table = mk_pagetable(phys_basetab); + if ( !(c->flags & VGCF_HVM_GUEST) ) + { + phys_basetab = c->ctrlreg[3]; + phys_basetab = + (gmfn_to_mfn(d, phys_basetab >> PAGE_SHIFT) << PAGE_SHIFT) | + (phys_basetab & ~PAGE_MASK); + + v->arch.guest_table = mk_pagetable(phys_basetab); + } if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 ) return rc; - if ( shadow_mode_refcounts(d) ) + if ( c->flags & VGCF_HVM_GUEST ) + { + v->arch.guest_table = mk_pagetable(0); + + if ( !hvm_initialize_guest_resources(v) ) + return -EINVAL; + } + else if ( shadow_mode_refcounts(d) ) { if ( !get_page(mfn_to_page(phys_basetab>>PAGE_SHIFT), d) ) { @@ -416,7 +430,7 @@ return -EINVAL; } } - else if ( !(c->flags & VGCF_HVM_GUEST) ) + else { if ( !get_page_and_type(mfn_to_page(phys_basetab>>PAGE_SHIFT), d, PGT_base_page_table) ) @@ -424,17 +438,6 @@ destroy_gdt(v); return -EINVAL; } - } - - if ( c->flags & VGCF_HVM_GUEST ) - { - /* HVM uses the initially provided page tables as the P2M map. */ - if ( !pagetable_get_paddr(d->arch.phys_table) ) - d->arch.phys_table = v->arch.guest_table; - v->arch.guest_table = mk_pagetable(0); - - if ( !hvm_initialize_guest_resources(v) ) - return -EINVAL; } update_pagetables(v); @@ -610,9 +613,6 @@ struct cpu_user_regs *regs = &ctxt->user_regs; unsigned int dirty_segment_mask = 0; - if ( HVM_DOMAIN(v) ) - hvm_save_segments(v); - regs->ds = read_segment_register(ds); regs->es = read_segment_register(es); regs->fs = read_segment_register(fs); @@ -682,9 +682,15 @@ stack_regs, CTXT_SWITCH_STACK_BYTES); unlazy_fpu(p); - save_segments(p); - if ( HVM_DOMAIN(p) ) + if ( !HVM_DOMAIN(p) ) + { + save_segments(p); + } + else + { + hvm_save_segments(p); hvm_load_msrs(); + } } if ( !is_idle_vcpu(n) ) @@ -980,6 +986,26 @@ relinquish_memory(d, &d->page_list); } +void arch_dump_domain_info(struct domain *d) +{ + if ( shadow_mode_enabled(d) ) + { + printk(" shadow mode: "); + if ( shadow_mode_refcounts(d) ) + printk("refcounts "); + if ( shadow_mode_write_all(d) ) + printk("write_all "); + if ( shadow_mode_log_dirty(d) ) + printk("log_dirty "); + if ( shadow_mode_translate(d) ) + printk("translate "); + if ( shadow_mode_external(d) ) + printk("external "); + if ( shadow_mode_wr_pt_pte(d) ) + printk("wr_pt_pte "); + printk("\n"); + } +} /* * Local variables: diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/domain_build.c Fri Feb 24 22:41:08 2006 @@ -17,6 +17,7 @@ #include <xen/domain.h> #include <xen/compile.h> #include <xen/iocap.h> +#include <xen/bitops.h> #include <asm/regs.h> #include <asm/system.h> #include <asm/io.h> @@ -24,6 +25,8 @@ #include <asm/desc.h> #include <asm/i387.h> #include <asm/shadow.h> + +#include <public/version.h> static long dom0_nrpages; @@ -56,9 +59,6 @@ static unsigned int opt_dom0_shadow; boolean_param("dom0_shadow", opt_dom0_shadow); -static unsigned int opt_dom0_translate; -boolean_param("dom0_translate", opt_dom0_translate); - static char opt_dom0_ioports_disable[200] = ""; string_param("dom0_ioports_disable", opt_dom0_ioports_disable); @@ -131,6 +131,62 @@ if ( ioports_deny_access(dom0, io_from, io_to) != 0 ) BUG(); + } +} + +static const char *feature_names[XENFEAT_NR_SUBMAPS*32] = { + [XENFEAT_writable_page_tables] = "writable_page_tables", + [XENFEAT_writable_descriptor_tables] = "writable_descriptor_tables", + [XENFEAT_auto_translated_physmap] = "auto_translated_physmap", + [XENFEAT_supervisor_mode_kernel] = "supervisor_mode_kernel", + [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb" +}; + +static void parse_features( + const char *feats, + uint32_t supported[XENFEAT_NR_SUBMAPS], + uint32_t required[XENFEAT_NR_SUBMAPS]) +{ + const char *end, *p; + int i, req; + + if ( (end = strchr(feats, ',')) == NULL ) + end = feats + strlen(feats); + + while ( feats < end ) + { + p = strchr(feats, '|'); + if ( (p == NULL) || (p > end) ) + p = end; + + req = (*feats == '!'); + if ( req ) + feats++; + + for ( i = 0; i < XENFEAT_NR_SUBMAPS*32; i++ ) + { + if ( feature_names[i] == NULL ) + continue; + + if ( strncmp(feature_names[i], feats, p-feats) == 0 ) + { + set_bit(i, supported); + if ( req ) + set_bit(i, required); + break; + } + } + + if ( i == XENFEAT_NR_SUBMAPS*32 ) + { + printk("Unknown kernel feature \"%.*s\".\n", + (int)(p-feats), feats); + panic("Domain 0 requires an unknown hypervisor feature.\n"); + } + + feats = p; + if ( *feats == '|' ) + feats++; } } @@ -188,6 +244,10 @@ /* Machine address of next candidate page-table page. */ unsigned long mpt_alloc; + /* Features supported. */ + uint32_t dom0_features_supported[XENFEAT_NR_SUBMAPS] = { 0 }; + uint32_t dom0_features_required[XENFEAT_NR_SUBMAPS] = { 0 }; + extern void translate_l2pgtable( struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn); @@ -245,8 +305,19 @@ return -EINVAL; } - if ( strstr(dsi.xen_section_string, "SHADOW=translate") ) - opt_dom0_translate = 1; + if ( (p = strstr(dsi.xen_section_string, "FEATURES=")) != NULL ) + { + parse_features( + p + strlen("FEATURES="), + dom0_features_supported, + dom0_features_required); + printk("Domain 0 kernel supports features = { %08x }.\n", + dom0_features_supported[0]); + printk("Domain 0 kernel requires features = { %08x }.\n", + dom0_features_required[0]); + if ( dom0_features_required[0] ) + panic("Domain 0 requires an unsupported hypervisor feature.\n"); + } /* Align load address to 4MB boundary. */ dsi.v_start &= ~((1UL<<22)-1); @@ -650,11 +721,6 @@ si->nr_pages = nr_pages; si->shared_info = virt_to_maddr(d->shared_info); - if ( opt_dom0_translate ) - { - si->shared_info = max_page << PAGE_SHIFT; - set_gpfn_from_mfn(virt_to_maddr(d->shared_info) >> PAGE_SHIFT, max_page); - } si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; si->pt_base = vpt_start; @@ -669,7 +735,7 @@ mfn = pfn + alloc_spfn; #ifndef NDEBUG #define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT) - if ( !opt_dom0_translate && (pfn > REVERSE_START) ) + if ( pfn > REVERSE_START ) mfn = alloc_epfn - (pfn - REVERSE_START); #endif ((unsigned long *)vphysmap_start)[pfn] = mfn; @@ -720,48 +786,10 @@ new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start); - if ( opt_dom0_shadow || opt_dom0_translate ) - { - printk("dom0: shadow enable\n"); - shadow_mode_enable(d, (opt_dom0_translate - ? SHM_enable | SHM_refcounts | SHM_translate - : SHM_enable)); - if ( opt_dom0_translate ) - { - printk("dom0: shadow translate\n"); -#if defined(__i386__) && defined(CONFIG_X86_PAE) - printk("FIXME: PAE code needed here: %s:%d (%s)\n", - __FILE__, __LINE__, __FUNCTION__); - for ( ; ; ) - __asm__ __volatile__ ( "hlt" ); -#else - /* Hmm, what does this? - Looks like isn't portable across 32/64 bit and pae/non-pae ... - -- kraxel */ - - /* mafetter: This code is mostly a hack in order to be able to - * test with dom0's which are running with shadow translate. - * I expect we'll rip this out once we have a stable set of - * domU clients which use the various shadow modes, but it's - * useful to leave this here for now... - */ - - // map this domain's p2m table into current page table, - // so that we can easily access it. - // - ASSERT( root_get_intpte(idle_pg_table[1]) == 0 ); - ASSERT( pagetable_get_paddr(d->arch.phys_table) ); - idle_pg_table[1] = root_from_paddr( - pagetable_get_paddr(d->arch.phys_table), __PAGE_HYPERVISOR); - translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT), - pagetable_get_pfn(v->arch.guest_table)); - idle_pg_table[1] = root_empty(); - local_flush_tlb(); -#endif - } - - update_pagetables(v); /* XXX SMP */ - printk("dom0: shadow setup done\n"); + if ( opt_dom0_shadow ) + { + shadow_mode_enable(d, SHM_enable); + update_pagetables(v); } rc = 0; diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/hvm.c Fri Feb 24 22:41:08 2006 @@ -124,11 +124,6 @@ domain_crash_synchronous(); } d->arch.hvm_domain.shared_page_va = (unsigned long)p; - - HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x\n", iopacket_port(d)); - - clear_bit(iopacket_port(d), - &d->shared_info->evtchn_mask[0]); } static int validate_hvm_info(struct hvm_info_table *t) @@ -175,10 +170,12 @@ if ( validate_hvm_info(t) ) { d->arch.hvm_domain.nr_vcpus = t->nr_vcpus; d->arch.hvm_domain.apic_enabled = t->apic_enabled; + d->arch.hvm_domain.pae_enabled = t->pae_enabled; } else { printk("Bad hvm info table\n"); d->arch.hvm_domain.nr_vcpus = 1; d->arch.hvm_domain.apic_enabled = 0; + d->arch.hvm_domain.pae_enabled = 0; } unmap_domain_page(p); @@ -188,8 +185,10 @@ { struct hvm_domain *platform; - if (!(HVM_DOMAIN(current) && (current->vcpu_id == 0))) + if ( !HVM_DOMAIN(current) || (current->vcpu_id != 0) ) return; + + shadow_direct_map_init(d); hvm_map_io_shared_page(d); hvm_get_info(d); @@ -198,7 +197,8 @@ pic_init(&platform->vpic, pic_irq_request, &platform->interrupt_request); register_pic_io_hook(); - if ( hvm_apic_support(d) ) { + if ( hvm_apic_support(d) ) + { spin_lock_init(&d->arch.hvm_domain.round_robin_lock); hvm_vioapic_init(d); } diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/intercept.c Fri Feb 24 22:41:08 2006 @@ -332,8 +332,8 @@ void hlt_timer_fn(void *data) { struct vcpu *v = data; - - evtchn_set_pending(v, iopacket_port(v->domain)); + + evtchn_set_pending(v, iopacket_port(v)); } static __inline__ void missed_ticks(struct hvm_virpit*vpit) diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/io.c Fri Feb 24 22:41:08 2006 @@ -697,8 +697,8 @@ void hvm_wait_io(void) { struct vcpu *v = current; - struct domain *d = v->domain; - int port = iopacket_port(d); + struct domain *d = v->domain; + int port = iopacket_port(v); for ( ; ; ) { @@ -729,8 +729,8 @@ void hvm_safe_block(void) { struct vcpu *v = current; - struct domain *d = v->domain; - int port = iopacket_port(d); + struct domain *d = v->domain; + int port = iopacket_port(v); for ( ; ; ) { diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/platform.c Fri Feb 24 22:41:08 2006 @@ -41,8 +41,6 @@ #define DECODE_success 1 #define DECODE_failure 0 - -extern long evtchn_send(int lport); #if defined (__x86_64__) static inline long __get_reg_value(unsigned long reg, int size) @@ -648,6 +646,8 @@ p->count = count; p->df = regs->eflags & EF_DF ? 1 : 0; + p->io_count++; + if (pvalid) { if (hvm_paging_enabled(current)) p->u.pdata = (void *) gva_to_gpa(value); @@ -664,18 +664,18 @@ p->state = STATE_IOREQ_READY; - evtchn_send(iopacket_port(v->domain)); + evtchn_send(iopacket_port(v)); hvm_wait_io(); } -void send_mmio_req(unsigned char type, unsigned long gpa, - unsigned long count, int size, long value, int dir, int pvalid) +void send_mmio_req( + unsigned char type, unsigned long gpa, + unsigned long count, int size, long value, int dir, int pvalid) { struct vcpu *v = current; vcpu_iodata_t *vio; ioreq_t *p; struct cpu_user_regs *regs; - extern long evtchn_send(int lport); regs = current->arch.hvm_vcpu.mmio_op.inst_decoder_regs; @@ -701,6 +701,8 @@ p->addr = gpa; p->count = count; p->df = regs->eflags & EF_DF ? 1 : 0; + + p->io_count++; if (pvalid) { if (hvm_paging_enabled(v)) @@ -718,7 +720,7 @@ p->state = STATE_IOREQ_READY; - evtchn_send(iopacket_port(v->domain)); + evtchn_send(iopacket_port(v)); hvm_wait_io(); } @@ -760,12 +762,12 @@ void handle_mmio(unsigned long va, unsigned long gpa) { - unsigned long inst_len, inst_addr; + unsigned long inst_addr; struct mmio_op *mmio_opp; struct cpu_user_regs *regs; struct instruction mmio_inst; unsigned char inst[MAX_INST_LEN]; - int i, realmode, ret; + int i, realmode, ret, inst_len; struct vcpu *v = current; mmio_opp = &v->arch.hvm_vcpu.mmio_op; @@ -795,7 +797,7 @@ if (hvm_decode(realmode, inst, &mmio_inst) == DECODE_failure) { printf("handle_mmio: failed to decode instruction\n"); - printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:", + printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %d:", va, gpa, inst_len); for (i = 0; i < inst_len; i++) printf(" %02x", inst[i] & 0xFF); diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/svm/instrlen.c --- a/xen/arch/x86/hvm/svm/instrlen.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/svm/instrlen.c Fri Feb 24 22:41:08 2006 @@ -2,17 +2,19 @@ * instrlen.c - calculates the instruction length for all operating modes * * Travis Betak, travis.betak@xxxxxxx - * Copyright (c) 2005 AMD + * Copyright (c) 2005,2006 AMD + * Copyright (c) 2005 Keir Fraser * - * Essentially a very, very stripped version of Keir Fraser's work in - * x86_emulate.c. Used primarily for MMIO. + * Essentially a very, very stripped version of Keir Fraser's work in + * x86_emulate.c. Used for MMIO. */ /* - * TODO: the way in which we use svm_instrlen is very inefficient as is now - * stands. it will be worth while to return the actual instruction buffer - * along with the instruction length since we are getting the instruction length - * so we know how much of the buffer we need to fetch. + * TODO: the way in which we use svm_instrlen is very inefficient as is now + * stands. It will be worth while to return the actual instruction buffer + * along with the instruction length since one of the reasons we are getting + * the instruction length is to know how many instruction bytes we need to + * fetch. */ #include <xen/config.h> @@ -22,6 +24,11 @@ #include <asm/regs.h> #define DPRINTF DPRINTK #include <asm-x86/x86_emulate.h> + +/* read from guest memory */ +extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip, + int length); +extern void svm_dump_inst(unsigned long eip); /* * Opcode effective-address decode tables. @@ -33,98 +40,101 @@ */ /* Operand sizes: 8-bit operands or specified/overridden size. */ -#define BYTE_OP (1<<0) /* 8-bit operands. */ +#define ByteOp (1<<0) /* 8-bit operands. */ /* Destination operand type. */ -#define IMPLICIT_OPS (1<<1) /* Implicit in opcode. No generic decode. */ -#define DST_REG (2<<1) /* Register operand. */ -#define DST_MEM (3<<1) /* Memory operand. */ -#define DST_MASK (3<<1) +#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ +#define DstReg (2<<1) /* Register operand. */ +#define DstMem (3<<1) /* Memory operand. */ +#define DstMask (3<<1) /* Source operand type. */ -#define SRC_NONE (0<<3) /* No source operand. */ -#define SRC_IMPLICIT (0<<3) /* Source operand is implicit in the opcode. */ -#define SRC_REG (1<<3) /* Register operand. */ -#define SRC_MEM (2<<3) /* Memory operand. */ -#define SRC_IMM (3<<3) /* Immediate operand. */ -#define SRC_IMMBYTE (4<<3) /* 8-bit sign-extended immediate operand. */ -#define SRC_MASK (7<<3) -/* Generic MODRM decode. */ -#define MODRM (1<<6) +#define SrcNone (0<<3) /* No source operand. */ +#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */ +#define SrcReg (1<<3) /* Register operand. */ +#define SrcMem (2<<3) /* Memory operand. */ +#define SrcMem16 (3<<3) /* Memory operand (16-bit). */ +#define SrcMem32 (4<<3) /* Memory operand (32-bit). */ +#define SrcImm (5<<3) /* Immediate operand. */ +#define SrcImmByte (6<<3) /* 8-bit sign-extended immediate operand. */ +#define SrcMask (7<<3) +/* Generic ModRM decode. */ +#define ModRM (1<<6) /* Destination is only written; never read. */ #define Mov (1<<7) -static u8 opcode_table[256] = { +static uint8_t opcode_table[256] = { /* 0x00 - 0x07 */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, 0, 0, 0, 0, /* 0x08 - 0x0F */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, 0, 0, 0, 0, /* 0x10 - 0x17 */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, 0, 0, 0, 0, /* 0x18 - 0x1F */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, 0, 0, 0, 0, /* 0x20 - 0x27 */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, 0, 0, 0, 0, /* 0x28 - 0x2F */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, 0, 0, 0, 0, /* 0x30 - 0x37 */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, 0, 0, 0, 0, /* 0x38 - 0x3F */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, 0, 0, 0, 0, /* 0x40 - 0x4F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50 - 0x5F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x6F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70 - 0x7F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x87 */ - BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMM | MODRM, - BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMMBYTE | MODRM, - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, /* 0x88 - 0x8F */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, - BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM, - 0, 0, 0, DST_MEM | SRC_NONE | MODRM | Mov, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + 0, 0, 0, DstMem|SrcNone|ModRM|Mov, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ - BYTE_OP | DST_REG | SRC_MEM | Mov, DST_REG | SRC_MEM | Mov, - BYTE_OP | DST_MEM | SRC_REG | Mov, DST_MEM | SRC_REG | Mov, - BYTE_OP | IMPLICIT_OPS | Mov, IMPLICIT_OPS | Mov, - BYTE_OP | IMPLICIT_OPS, IMPLICIT_OPS, + ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov, + ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps, ImplicitOps, /* 0xA8 - 0xAF */ - 0, 0, BYTE_OP | IMPLICIT_OPS | Mov, IMPLICIT_OPS | Mov, - BYTE_OP | IMPLICIT_OPS | Mov, IMPLICIT_OPS | Mov, - BYTE_OP | IMPLICIT_OPS, IMPLICIT_OPS, + 0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|ImplicitOps, ImplicitOps, /* 0xB0 - 0xBF */ - SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE, - SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE, + SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, + SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xC0 - 0xC7 */ - BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMMBYTE | MODRM, 0, 0, - 0, 0, BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMM | MODRM, + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0, + 0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, /* 0xC8 - 0xCF */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xD0 - 0xD7 */ - BYTE_OP | DST_MEM | SRC_IMPLICIT | MODRM, DST_MEM | SRC_IMPLICIT | MODRM, - BYTE_OP | DST_MEM | SRC_IMPLICIT | MODRM, DST_MEM | SRC_IMPLICIT | MODRM, + ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, + ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 0, 0, 0, 0, /* 0xD8 - 0xDF */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -132,31 +142,31 @@ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, - 0, 0, BYTE_OP | DST_MEM | SRC_NONE | MODRM, DST_MEM | SRC_NONE | MODRM, + 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM, /* 0xF8 - 0xFF */ 0, 0, 0, 0, - 0, 0, BYTE_OP | DST_MEM | SRC_NONE | MODRM, DST_MEM | SRC_NONE | MODRM + 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM }; -static u8 twobyte_table[256] = { +static uint8_t twobyte_table[256] = { /* 0x00 - 0x0F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, IMPLICIT_OPS | MODRM, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, /* 0x10 - 0x1F */ - 0, 0, 0, 0, 0, 0, 0, 0, IMPLICIT_OPS | MODRM, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x2F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30 - 0x3F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x47 */ - DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov, - DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov, - DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov, - DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, /* 0x48 - 0x4F */ - DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov, - DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov, - DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov, - DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, + DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, /* 0x50 - 0x5F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x6F */ @@ -168,20 +178,17 @@ /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ - 0, 0, 0, DST_MEM | SRC_REG | MODRM, 0, 0, 0, 0, + 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DST_MEM | SRC_REG | MODRM, 0, 0, 0, 0, + 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, /* 0xB0 - 0xB7 */ - BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, 0, - DST_MEM | SRC_REG | MODRM, - 0, 0, - DST_REG | SRC_MEM | MODRM, - DST_REG | SRC_REG | MODRM, - + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM, + 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, /* 0xB8 - 0xBF */ - 0, 0, DST_MEM | SRC_IMMBYTE | MODRM, DST_MEM | SRC_REG | MODRM, 0, 0, 0, 0, + 0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM, + 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, /* 0xC0 - 0xCF */ - 0, 0, 0, 0, 0, 0, 0, IMPLICIT_OPS | MODRM, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xD0 - 0xDF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xEF */ @@ -189,11 +196,6 @@ /* 0xF0 - 0xFF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - -/* read from guest memory */ -extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip, - int length); -extern void svm_dump_inst(unsigned long eip); /* * insn_fetch - fetch the next 1 to 4 bytes from instruction stream @@ -219,206 +221,250 @@ (_type)_x; \ }) + /** - * get_instruction_length - returns the current instructions length + * svn_instrlen - returns the current instructions length * * @regs: guest register state - * @cr2: target address - * @ops: guest memory operations * @mode: guest operating mode * * EXTERNAL this routine calculates the length of the current instruction * pointed to by eip. The guest state is _not_ changed by this routine. */ -unsigned long svm_instrlen(struct cpu_user_regs *regs, int mode) +int svm_instrlen(struct cpu_user_regs *regs, int mode) { - u8 b, d, twobyte = 0; - u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; - unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode; - unsigned int i; + uint8_t b, d, twobyte = 0, rex_prefix = 0; + uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; + unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i; int rc = 0; - u32 length = 0; - u8 tmp; - - /* Copy the registers so we don't alter the guest's present state */ - volatile struct cpu_user_regs _regs = *regs; - - /* Check for Real Mode */ - if (mode == 2) - _regs.eip += (_regs.cs << 4); - - /* Legacy prefix check */ - for (i = 0; i < 8; i++) { - switch (b = insn_fetch(u8, 1, _regs.eip, length)) { - case 0x66: /* operand-size override */ - op_bytes ^= 6; /* switch between 2/4 bytes */ - break; - case 0x67: /* address-size override */ - ad_bytes ^= (mode == 8) ? 12 : 6; /* 2/4/8 bytes */ - break; - case 0x2e: /* CS override */ - case 0x3e: /* DS override */ - case 0x26: /* ES override */ - case 0x64: /* FS override */ - case 0x65: /* GS override */ - case 0x36: /* SS override */ - case 0xf0: /* LOCK */ - case 0xf3: /* REP/REPE/REPZ */ - case 0xf2: /* REPNE/REPNZ */ + int length = 0; + unsigned int tmp; + + /* Shadow copy of register state. Committed on successful emulation. */ + struct cpu_user_regs _regs = *regs; + + /* include CS for 16-bit modes */ + if (mode == X86EMUL_MODE_REAL || mode == X86EMUL_MODE_PROT16) + _regs.eip += (_regs.cs << 4); + + switch ( mode ) + { + case X86EMUL_MODE_REAL: + case X86EMUL_MODE_PROT16: + op_bytes = ad_bytes = 2; + break; + case X86EMUL_MODE_PROT32: + op_bytes = ad_bytes = 4; + break; +#ifdef __x86_64__ + case X86EMUL_MODE_PROT64: + op_bytes = 4; + ad_bytes = 8; + break; +#endif + default: + return -1; + } + + /* Legacy prefixes. */ + for ( i = 0; i < 8; i++ ) + { + switch ( b = insn_fetch(uint8_t, 1, _regs.eip, length) ) + { + case 0x66: /* operand-size override */ + op_bytes ^= 6; /* switch between 2/4 bytes */ + break; + case 0x67: /* address-size override */ + if ( mode == X86EMUL_MODE_PROT64 ) + ad_bytes ^= 12; /* switch between 4/8 bytes */ + else + ad_bytes ^= 6; /* switch between 2/4 bytes */ + break; + case 0x2e: /* CS override */ + case 0x3e: /* DS override */ + case 0x26: /* ES override */ + case 0x64: /* FS override */ + case 0x65: /* GS override */ + case 0x36: /* SS override */ + break; + case 0xf0: /* LOCK */ + lock_prefix = 1; + break; + case 0xf3: /* REP/REPE/REPZ */ + rep_prefix = 1; + break; + case 0xf2: /* REPNE/REPNZ */ break; default: goto done_prefixes; } } - done_prefixes: - /* REX prefix check */ - if ((mode == 8) && ((b & 0xf0) == 0x40)) - { - if (b & 8) - op_bytes = 8; /* REX.W */ - modrm_reg = (b & 4) << 1; /* REX.R */ + /* Note quite the same as 80386 real mode, but hopefully good enough. */ + if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) ) { + printf("sonofabitch!! we don't support 32-bit addresses in realmode\n"); + goto cannot_emulate; + } + + /* REX prefix. */ + if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) ) + { + rex_prefix = b; + if ( b & 8 ) + op_bytes = 8; /* REX.W */ + modrm_reg = (b & 4) << 1; /* REX.R */ /* REX.B and REX.X do not need to be decoded. */ - b = insn_fetch(u8, 1, _regs.eip, length); + b = insn_fetch(uint8_t, 1, _regs.eip, length); } /* Opcode byte(s). */ d = opcode_table[b]; - if (d == 0) + if ( d == 0 ) { /* Two-byte opcode? */ - if (b == 0x0f) { + if ( b == 0x0f ) + { twobyte = 1; - b = insn_fetch(u8, 1, _regs.eip, length); + b = insn_fetch(uint8_t, 1, _regs.eip, length); d = twobyte_table[b]; } /* Unrecognised? */ - if (d == 0) + if ( d == 0 ) goto cannot_emulate; } - /* MODRM and SIB bytes. */ - if (d & MODRM) - { - modrm = insn_fetch(u8, 1, _regs.eip, length); + /* ModRM and SIB bytes. */ + if ( d & ModRM ) + { + modrm = insn_fetch(uint8_t, 1, _regs.eip, length); modrm_mod |= (modrm & 0xc0) >> 6; modrm_reg |= (modrm & 0x38) >> 3; - modrm_rm |= (modrm & 0x07); - switch (modrm_mod) - { - case 0: - if ((modrm_rm == 4) && - (((insn_fetch(u8, 1, _regs.eip, - length)) & 7) == 5)) + modrm_rm |= (modrm & 0x07); + + if ( modrm_mod == 3 ) + { + DPRINTF("Cannot parse ModRM.mod == 3.\n"); + goto cannot_emulate; + } + + if ( ad_bytes == 2 ) + { + /* 16-bit ModR/M decode. */ + switch ( modrm_mod ) { - length += 4; - _regs.eip += 4; /* skip SIB.base disp32 */ - } - else if (modrm_rm == 5) + case 0: + if ( modrm_rm == 6 ) + { + length += 2; + _regs.eip += 2; /* skip disp16 */ + } + break; + case 1: + length += 1; + _regs.eip += 1; /* skip disp8 */ + break; + case 2: + length += 2; + _regs.eip += 2; /* skip disp16 */ + break; + } + } + else + { + /* 32/64-bit ModR/M decode. */ + switch ( modrm_mod ) { + case 0: + if ( (modrm_rm == 4) && + (((insn_fetch(uint8_t, 1, _regs.eip, length)) & 7) + == 5) ) + { + length += 4; + _regs.eip += 4; /* skip disp32 specified by SIB.base */ + } + else if ( modrm_rm == 5 ) + { + length += 4; + _regs.eip += 4; /* skip disp32 */ + } + break; + case 1: + if ( modrm_rm == 4 ) + { + insn_fetch(uint8_t, 1, _regs.eip, length); + } + length += 1; + _regs.eip += 1; /* skip disp8 */ + break; + case 2: + if ( modrm_rm == 4 ) + { + insn_fetch(uint8_t, 1, _regs.eip, length); + } length += 4; _regs.eip += 4; /* skip disp32 */ + break; } - break; - case 1: - if (modrm_rm == 4) - { - insn_fetch(u8, 1, _regs.eip, length); - } - length += 1; - _regs.eip += 1; /* skip disp8 */ - break; - case 2: - if (modrm_rm == 4) - { - insn_fetch(u8, 1, _regs.eip, length); - } - length += 4; - _regs.eip += 4; /* skip disp32 */ - break; - case 3: - DPRINTF("Cannot parse ModRM.mod == 3.\n"); - goto cannot_emulate; } } /* Decode and fetch the destination operand: register or memory. */ - switch (d & DST_MASK) - { - case IMPLICIT_OPS: + switch ( d & DstMask ) + { + case ImplicitOps: /* Special instructions do their own operand decoding. */ goto done; } - /* Decode and fetch the source operand: register, memory or immediate */ - switch (d & SRC_MASK) - { - case SRC_IMM: - tmp = (d & BYTE_OP) ? 1 : op_bytes; - if (tmp == 8) - tmp = 4; + /* Decode and fetch the source operand: register, memory or immediate. */ + switch ( d & SrcMask ) + { + case SrcImm: + tmp = (d & ByteOp) ? 1 : op_bytes; + if ( tmp == 8 ) tmp = 4; /* NB. Immediates are sign-extended as necessary. */ - switch (tmp) { - case 1: - insn_fetch(s8, 1, _regs.eip, length); - break; - case 2: - insn_fetch(s16, 2, _regs.eip, length); - break; - case 4: - insn_fetch(s32, 4, _regs.eip, length); - break; - } - break; - case SRC_IMMBYTE: - insn_fetch(s8, 1, _regs.eip, length); - break; - } - - if (twobyte) + switch ( tmp ) + { + case 1: insn_fetch(int8_t, 1, _regs.eip, length); break; + case 2: insn_fetch(int16_t, 2, _regs.eip, length); break; + case 4: insn_fetch(int32_t, 4, _regs.eip, length); break; + } + break; + case SrcImmByte: + insn_fetch(int8_t, 1, _regs.eip, length); + break; + } + + if ( twobyte ) goto done; - switch (b) - { - case 0xa0: - case 0xa1: /* mov */ + switch ( b ) + { + case 0xa0 ... 0xa1: /* mov */ length += ad_bytes; - _regs.eip += ad_bytes; /* skip src displacement */ - break; - case 0xa2: - case 0xa3: /* mov */ + _regs.eip += ad_bytes; /* skip src displacement */ + break; + case 0xa2 ... 0xa3: /* mov */ length += ad_bytes; - _regs.eip += ad_bytes; /* skip dst displacement */ - break; - case 0xf6: - case 0xf7: /* Grp3 */ - switch (modrm_reg) - { - case 0: - case 1: /* test */ - /* - * Special case in Grp3: test has an - * immediate source operand. - */ - tmp = (d & BYTE_OP) ? 1 : op_bytes; - if (tmp == 8) - tmp = 4; - switch (tmp) + _regs.eip += ad_bytes; /* skip dst displacement */ + break; + case 0xf6 ... 0xf7: /* Grp3 */ + switch ( modrm_reg ) + { + case 0 ... 1: /* test */ + /* Special case in Grp3: test has an immediate source operand. */ + tmp = (d & ByteOp) ? 1 : op_bytes; + if ( tmp == 8 ) tmp = 4; + switch ( tmp ) { - case 1: - insn_fetch(s8, 1, _regs.eip, length); - break; - case 2: - insn_fetch(s16, 2, _regs.eip, length); - break; - case 4: - insn_fetch(s32, 4, _regs.eip, length); - break; + case 1: insn_fetch(int8_t, 1, _regs.eip, length); break; + case 2: insn_fetch(int16_t, 2, _regs.eip, length); break; + case 4: insn_fetch(int32_t, 4, _regs.eip, length); break; } goto done; - } + } break; } @@ -429,5 +475,5 @@ DPRINTF("Cannot emulate %02x at address %lx (eip %lx, mode %d)\n", b, (unsigned long)_regs.eip, (unsigned long)regs->eip, mode); svm_dump_inst(_regs.eip); - return (unsigned long)-1; + return -1; } diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/svm/svm.c Fri Feb 24 22:41:08 2006 @@ -64,7 +64,6 @@ /* * External functions, etc. We should move these to some suitable header file(s) */ -extern long evtchn_send(int lport); extern void do_nmi(struct cpu_user_regs *, unsigned long); extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len); @@ -797,12 +796,13 @@ free_host_save_area(v->arch.hvm_svm.host_save_area); #endif - if (v->vcpu_id == 0) { + if ( v->vcpu_id == 0 ) + { /* unmap IO shared page */ struct domain *d = v->domain; - if (d->arch.hvm_domain.shared_page_va) + if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page((void *)d->arch.hvm_domain.shared_page_va); - shadow_direct_map_clean(v); + shadow_direct_map_clean(d); } destroy_vmcb(&v->arch.hvm_svm); diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/svm/vmcb.c Fri Feb 24 22:41:08 2006 @@ -421,6 +421,18 @@ if (v->vcpu_id == 0) hvm_setup_platform(v->domain); + if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 ) + { + printk("HVM domain bind port %d to vcpu %d failed!\n", + iopacket_port(v), v->vcpu_id); + domain_crash_synchronous(); + } + + HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v)); + + clear_bit(iopacket_port(v), + &v->domain->shared_info->evtchn_mask[0]); + if (hvm_apic_support(v->domain)) vlapic_init(v); init_timer(&v->arch.hvm_svm.hlt_timer, @@ -443,8 +455,6 @@ pt = pagetable_get_paddr(v->domain->arch.phys_table); printk("%s: phys_table = %lx\n", __func__, pt); } - - shadow_direct_map_init(v); if ( svm_paging_enabled(v) ) vmcb->cr3 = pagetable_get_paddr(v->arch.guest_table); @@ -492,7 +502,7 @@ svm_stts(v); - if ( test_bit(iopacket_port(d), &d->shared_info->evtchn_pending[0]) || + if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) hvm_wait_io(); diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/vlapic.c Fri Feb 24 22:41:08 2006 @@ -210,7 +210,7 @@ set_bit(vector, &vlapic->tmr[0]); } } - evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain)); + evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->vcpu)); result = 1; break; @@ -834,7 +834,7 @@ } else vlapic->intr_pending_count[vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER)]++; - evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain)); + evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->vcpu)); } vlapic->timer_current_update = NOW(); diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/vmx/io.c Fri Feb 24 22:41:08 2006 @@ -178,7 +178,7 @@ vmx_stts(); - if ( test_bit(iopacket_port(d), &d->shared_info->evtchn_pending[0]) || + if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) hvm_wait_io(); diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Fri Feb 24 22:41:08 2006 @@ -200,6 +200,18 @@ if (v->vcpu_id == 0) hvm_setup_platform(v->domain); + if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 ) + { + printk("VMX domain bind port %d to vcpu %d failed!\n", + iopacket_port(v), v->vcpu_id); + domain_crash_synchronous(); + } + + HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v)); + + clear_bit(iopacket_port(v), + &v->domain->shared_info->evtchn_mask[0]); + __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : ); error |= __vmwrite(GUEST_CR0, cr0); @@ -230,7 +242,6 @@ error |= __vmwrite(GUEST_TR_BASE, 0); error |= __vmwrite(GUEST_TR_LIMIT, 0xff); - shadow_direct_map_init(v); __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table)); __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table)); diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Feb 24 22:41:08 2006 @@ -81,14 +81,14 @@ void vmx_relinquish_resources(struct vcpu *v) { struct hvm_virpit *vpit; - + if (v->vcpu_id == 0) { /* unmap IO shared page */ struct domain *d = v->domain; if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( (void *)d->arch.hvm_domain.shared_page_va); - shadow_direct_map_clean(v); + shadow_direct_map_clean(d); } vmx_request_clear_vmcs(v); @@ -448,7 +448,6 @@ return 0; /* dummy */ } -extern long evtchn_send(int lport); void do_nmi(struct cpu_user_regs *); static int check_vmx_controls(ctrls, msr) @@ -643,7 +642,7 @@ } /* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */ -#define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46 +#define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46 static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs) { @@ -662,19 +661,21 @@ cpuid(input, &eax, &ebx, &ecx, &edx); - if (input == 1) + if ( input == 1 ) { if ( hvm_apic_support(v->domain) && !vlapic_global_enabled((VLAPIC(v))) ) clear_bit(X86_FEATURE_APIC, &edx); #if CONFIG_PAGING_LEVELS < 3 + clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE, &edx); - clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); #else if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 ) { + if ( !v->domain->arch.hvm_domain.pae_enabled ) + clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); } @@ -1184,8 +1185,12 @@ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value); - if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) { + if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled ) + { + unsigned long cr4; + /* + * Trying to enable guest paging. * The guest CR3 must be pointing to the guest physical. */ if ( !VALID_MFN(mfn = get_mfn_from_gpfn( @@ -1197,52 +1202,51 @@ } #if defined(__x86_64__) - if (test_bit(VMX_CPU_STATE_LME_ENABLED, - &v->arch.hvm_vmx.cpu_state) && - !test_bit(VMX_CPU_STATE_PAE_ENABLED, - &v->arch.hvm_vmx.cpu_state)){ - HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n"); + if ( test_bit(VMX_CPU_STATE_LME_ENABLED, + &v->arch.hvm_vmx.cpu_state) && + !test_bit(VMX_CPU_STATE_PAE_ENABLED, + &v->arch.hvm_vmx.cpu_state) ) + { + HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enabled\n"); vmx_inject_exception(v, TRAP_gp_fault, 0); } - if (test_bit(VMX_CPU_STATE_LME_ENABLED, - &v->arch.hvm_vmx.cpu_state)){ - /* Here the PAE is should to be opened */ - HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n"); + + if ( test_bit(VMX_CPU_STATE_LME_ENABLED, + &v->arch.hvm_vmx.cpu_state) ) + { + /* Here the PAE is should be opened */ + HVM_DBG_LOG(DBG_LEVEL_1, "Enable long mode\n"); set_bit(VMX_CPU_STATE_LMA_ENABLED, &v->arch.hvm_vmx.cpu_state); + __vmread(VM_ENTRY_CONTROLS, &vm_entry_value); vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE; __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); -#if CONFIG_PAGING_LEVELS >= 4 - if(!shadow_set_guest_paging_levels(v->domain, 4)) { + if ( !shadow_set_guest_paging_levels(v->domain, 4) ) { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); /* need to take a clean path */ + } + } + else +#endif /* __x86_64__ */ + { +#if CONFIG_PAGING_LEVELS >= 3 + if ( !shadow_set_guest_paging_levels(v->domain, 2) ) { printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); /* need to take a clean path */ } #endif } - else -#endif /* __x86_64__ */ + + /* update CR4's PAE if needed */ + __vmread(GUEST_CR4, &cr4); + if ( (!(cr4 & X86_CR4_PAE)) && + test_bit(VMX_CPU_STATE_PAE_ENABLED, + &v->arch.hvm_vmx.cpu_state) ) { -#if CONFIG_PAGING_LEVELS >= 3 - if(!shadow_set_guest_paging_levels(v->domain, 2)) { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); /* need to take a clean path */ - } -#endif - } - - { - unsigned long crn; - /* update CR4's PAE if needed */ - __vmread(GUEST_CR4, &crn); - if ( (!(crn & X86_CR4_PAE)) && - test_bit(VMX_CPU_STATE_PAE_ENABLED, - &v->arch.hvm_vmx.cpu_state) ) - { - HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n"); - __vmwrite(GUEST_CR4, crn | X86_CR4_PAE); - } + HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE in cr4\n"); + __vmwrite(GUEST_CR4, cr4 | X86_CR4_PAE); } /* @@ -1262,8 +1266,8 @@ v->arch.hvm_vmx.cpu_cr3, mfn); } - if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled) - if(v->arch.hvm_vmx.cpu_cr3) { + if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled ) + if ( v->arch.hvm_vmx.cpu_cr3 ) { put_page(mfn_to_page(get_mfn_from_gpfn( v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT))); v->arch.guest_table = mk_pagetable(0); @@ -1274,7 +1278,8 @@ * real-mode by performing a world switch to VMXAssist whenever * a partition disables the CR0.PE bit. */ - if ((value & X86_CR0_PE) == 0) { + if ( (value & X86_CR0_PE) == 0 ) + { if ( value & X86_CR0_PG ) { /* inject GP here */ vmx_inject_exception(v, TRAP_gp_fault, 0); @@ -1284,8 +1289,9 @@ * Disable paging here. * Same to PE == 1 && PG == 0 */ - if (test_bit(VMX_CPU_STATE_LMA_ENABLED, - &v->arch.hvm_vmx.cpu_state)){ + if ( test_bit(VMX_CPU_STATE_LMA_ENABLED, + &v->arch.hvm_vmx.cpu_state) ) + { clear_bit(VMX_CPU_STATE_LMA_ENABLED, &v->arch.hvm_vmx.cpu_state); __vmread(VM_ENTRY_CONTROLS, &vm_entry_value); @@ -1295,19 +1301,21 @@ } clear_all_shadow_status(v->domain); - if (vmx_assist(v, VMX_ASSIST_INVOKE)) { + if ( vmx_assist(v, VMX_ASSIST_INVOKE) ) { set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.hvm_vmx.cpu_state); __vmread(GUEST_RIP, &eip); HVM_DBG_LOG(DBG_LEVEL_1, "Transfering control to vmxassist %%eip 0x%lx\n", eip); return 0; /* do not update eip! */ } - } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED, - &v->arch.hvm_vmx.cpu_state)) { + } else if ( test_bit(VMX_CPU_STATE_ASSIST_ENABLED, + &v->arch.hvm_vmx.cpu_state) ) + { __vmread(GUEST_RIP, &eip); HVM_DBG_LOG(DBG_LEVEL_1, "Enabling CR0.PE at %%eip 0x%lx\n", eip); - if (vmx_assist(v, VMX_ASSIST_RESTORE)) { + if ( vmx_assist(v, VMX_ASSIST_RESTORE) ) + { clear_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.hvm_vmx.cpu_state); __vmread(GUEST_RIP, &eip); @@ -1437,15 +1445,13 @@ } case 4: /* CR4 */ { - unsigned long old_cr4; - - __vmread(CR4_READ_SHADOW, &old_cr4); - - if ( value & X86_CR4_PAE && !(old_cr4 & X86_CR4_PAE) ) + __vmread(CR4_READ_SHADOW, &old_cr); + + if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) ) { set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state); - if ( vmx_pgbit_test(v) ) + if ( vmx_pgbit_test(v) ) { /* The guest is 32 bit. */ #if CONFIG_PAGING_LEVELS >= 4 @@ -1459,7 +1465,7 @@ if ( !VALID_MFN(mfn = get_mfn_from_gpfn( v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) || - !get_page(mfn_to_page(mfn), v->domain) ) + !get_page(mfn_to_page(mfn), v->domain) ) { printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3); domain_crash_synchronous(); /* need to take a clean path */ @@ -1488,12 +1494,12 @@ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", v->arch.hvm_vmx.cpu_cr3, mfn); #endif - } + } else { /* The guest is 64 bit. */ #if CONFIG_PAGING_LEVELS >= 4 - if ( !shadow_set_guest_paging_levels(v->domain, 4) ) + if ( !shadow_set_guest_paging_levels(v->domain, 4) ) { printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); /* need to take a clean path */ @@ -1511,7 +1517,6 @@ clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state); } - __vmread(CR4_READ_SHADOW, &old_cr); __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK); __vmwrite(CR4_READ_SHADOW, value); @@ -1751,6 +1756,9 @@ fastcall void smp_call_function_interrupt(void); fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs); fastcall void smp_error_interrupt(struct cpu_user_regs *regs); +#ifdef CONFIG_X86_MCE_P4THERMAL + fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs); +#endif if ((error = __vmread(VM_EXIT_INTR_INFO, &vector)) && !(vector & INTR_INFO_VALID_MASK)) @@ -1778,6 +1786,11 @@ case ERROR_APIC_VECTOR: smp_error_interrupt(regs); break; +#ifdef CONFIG_X86_MCE_P4THERMAL + case THERMAL_APIC_VECTOR: + smp_thermal_interrupt(regs); + break; +#endif default: regs->entry_vector = vector; do_IRQ(regs); diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/mm.c Fri Feb 24 22:41:08 2006 @@ -1776,7 +1776,7 @@ pin_page: if ( shadow_mode_refcounts(FOREIGNDOM) ) - type = PGT_writable_page; + break; okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM); if ( unlikely(!okay) ) @@ -1811,6 +1811,9 @@ goto pin_page; case MMUEXT_UNPIN_TABLE: + if ( shadow_mode_refcounts(d) ) + break; + if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) ) { MEM_LOG("Mfn %lx bad domain (dom=%p)", diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/shadow.c Fri Feb 24 22:41:08 2006 @@ -3609,7 +3609,7 @@ if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) { if ( SH_GUEST_32PAE ) - gpfn = hvm_get_guest_ctrl_reg(v, 3); + gpfn = (hvm_get_guest_ctrl_reg(v, 3)) >> PAGE_SHIFT; else gpfn = pagetable_get_pfn(v->arch.guest_table); } @@ -3942,19 +3942,17 @@ * on handling the #PF as such. */ if ( (mfn = get_mfn_from_gpfn(vpa >> PAGE_SHIFT)) == INVALID_MFN ) - { - goto fail; - } + return 0; shadow_lock(d); __direct_get_l3e(v, vpa, &sl3e); - if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT) ) + if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT) ) { page = alloc_domheap_page(NULL); if ( !page ) - goto nomem; + goto nomem; smfn = page_to_mfn(page); sl3e = l3e_from_pfn(smfn, _PAGE_PRESENT); @@ -3968,11 +3966,11 @@ __direct_get_l2e(v, vpa, &sl2e); - if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) ) + if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) ) { page = alloc_domheap_page(NULL); if ( !page ) - goto nomem; + goto nomem; smfn = page_to_mfn(page); sl2e = l2e_from_pfn(smfn, __PAGE_HYPERVISOR | _PAGE_USER); @@ -3985,20 +3983,17 @@ __direct_get_l1e(v, vpa, &sl1e); - if ( !(l1e_get_flags(sl1e) & _PAGE_PRESENT) ) + if ( !(l1e_get_flags(sl1e) & _PAGE_PRESENT) ) { sl1e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR | _PAGE_USER); __direct_set_l1e(v, vpa, &sl1e); - } + } shadow_unlock(d); return EXCRET_fault_fixed; -fail: - return 0; - nomem: - shadow_direct_map_clean(v); + shadow_direct_map_clean(d); domain_crash_synchronous(); } #endif diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/shadow32.c Fri Feb 24 22:41:08 2006 @@ -1039,12 +1039,10 @@ * on handling the #PF as such. */ if ( (mfn = get_mfn_from_gpfn(vpa >> PAGE_SHIFT)) == INVALID_MFN ) - { - goto fail; - } + return 0; shadow_lock(d); - + __direct_get_l2e(v, vpa, &sl2e); if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) ) @@ -1059,7 +1057,7 @@ sple = (l1_pgentry_t *)map_domain_page(smfn); memset(sple, 0, PAGE_SIZE); __direct_set_l2e(v, vpa, sl2e); - } + } if ( !sple ) sple = (l1_pgentry_t *)map_domain_page(l2e_get_pfn(sl2e)); @@ -1078,54 +1076,55 @@ shadow_unlock(d); return EXCRET_fault_fixed; -fail: - return 0; - nomem: - shadow_direct_map_clean(v); + shadow_direct_map_clean(d); domain_crash_synchronous(); } -int shadow_direct_map_init(struct vcpu *v) +int shadow_direct_map_init(struct domain *d) { struct page_info *page; l2_pgentry_t *root; if ( !(page = alloc_domheap_page(NULL)) ) - goto fail; + return 0; root = map_domain_page(page_to_mfn(page)); memset(root, 0, PAGE_SIZE); unmap_domain_page(root); - v->domain->arch.phys_table = mk_pagetable(page_to_maddr(page)); + d->arch.phys_table = mk_pagetable(page_to_maddr(page)); return 1; - -fail: - return 0; -} - -void shadow_direct_map_clean(struct vcpu *v) +} + +void shadow_direct_map_clean(struct domain *d) { int i; + unsigned long mfn; l2_pgentry_t *l2e; - l2e = map_domain_page( - pagetable_get_pfn(v->domain->arch.phys_table)); + mfn = pagetable_get_pfn(d->arch.phys_table); + + /* + * We may fail very early before direct map is built. + */ + if ( !mfn ) + return; + + l2e = map_domain_page(mfn); for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { if ( l2e_get_flags(l2e[i]) & _PAGE_PRESENT ) free_domheap_page(mfn_to_page(l2e_get_pfn(l2e[i]))); } - - free_domheap_page( - mfn_to_page(pagetable_get_pfn(v->domain->arch.phys_table))); + free_domheap_page(mfn_to_page(mfn)); unmap_domain_page(l2e); - v->domain->arch.phys_table = mk_pagetable(0); + + d->arch.phys_table = mk_pagetable(0); } int __shadow_mode_enable(struct domain *d, unsigned int mode) @@ -1135,7 +1134,7 @@ if(!new_modes) /* Nothing to do - return success */ return 0; - + // can't take anything away by calling this function. ASSERT(!(d->arch.shadow_mode & ~mode)); @@ -1630,27 +1629,58 @@ perfc_incrc(get_mfn_from_gpfn_foreign); - va = gpfn << PAGE_SHIFT; - tabpfn = pagetable_get_pfn(d->arch.phys_table); - l2 = map_domain_page(tabpfn); - l2e = l2[l2_table_offset(va)]; - unmap_domain_page(l2); - if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) - { - printk("%s(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n", - __func__, d->domain_id, gpfn, l2e_get_intpte(l2e)); - return INVALID_MFN; - } - l1 = map_domain_page(l2e_get_pfn(l2e)); - l1e = l1[l1_table_offset(va)]; - unmap_domain_page(l1); - + if ( shadow_mode_external(d) ) + { + unsigned long mfn; + unsigned long *l0; + + va = RO_MPT_VIRT_START + (gpfn * sizeof(mfn)); + + tabpfn = pagetable_get_pfn(d->vcpu[0]->arch.monitor_table); + if ( !tabpfn ) + return INVALID_MFN; + + l2 = map_domain_page(tabpfn); + l2e = l2[l2_table_offset(va)]; + unmap_domain_page(l2); + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) + return INVALID_MFN; + + l1 = map_domain_page(l2e_get_pfn(l2e)); + l1e = l1[l1_table_offset(va)]; + unmap_domain_page(l1); + if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) + return INVALID_MFN; + + l0 = map_domain_page(l1e_get_pfn(l1e)); + mfn = l0[gpfn & ((PAGE_SIZE / sizeof(mfn)) - 1)]; + unmap_domain_page(l0); + return mfn; + } + else + { + va = gpfn << PAGE_SHIFT; + tabpfn = pagetable_get_pfn(d->arch.phys_table); + l2 = map_domain_page(tabpfn); + l2e = l2[l2_table_offset(va)]; + unmap_domain_page(l2); + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) + { + printk("%s(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n", + __func__, d->domain_id, gpfn, l2e_get_intpte(l2e)); + return INVALID_MFN; + } + l1 = map_domain_page(l2e_get_pfn(l2e)); + l1e = l1[l1_table_offset(va)]; + unmap_domain_page(l1); #if 0 - printk("%s(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n", - __func__, d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e); + printk("%s(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n", + __func__, d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e); #endif - return l1e_get_intpte(l1e); + return l1e_get_intpte(l1e); + } + } static unsigned long diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/shadow_public.c Fri Feb 24 22:41:08 2006 @@ -36,35 +36,40 @@ #define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) - ((_encoded) >> 16)) -int shadow_direct_map_init(struct vcpu *v) +int shadow_direct_map_init(struct domain *d) { struct page_info *page; l3_pgentry_t *root; if ( !(page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA)) ) - goto fail; + return 0; root = map_domain_page(page_to_mfn(page)); memset(root, 0, PAGE_SIZE); root[PAE_SHADOW_SELF_ENTRY] = l3e_from_page(page, __PAGE_HYPERVISOR); - v->domain->arch.phys_table = mk_pagetable(page_to_maddr(page)); + d->arch.phys_table = mk_pagetable(page_to_maddr(page)); unmap_domain_page(root); return 1; - -fail: - return 0; -} - -void shadow_direct_map_clean(struct vcpu *v) -{ +} + +void shadow_direct_map_clean(struct domain *d) +{ + unsigned long mfn; l2_pgentry_t *l2e; l3_pgentry_t *l3e; int i, j; - l3e = (l3_pgentry_t *)map_domain_page( - pagetable_get_pfn(v->domain->arch.phys_table)); + mfn = pagetable_get_pfn(d->arch.phys_table); + + /* + * We may fail very early before direct map is built. + */ + if ( !mfn ) + return; + + l3e = (l3_pgentry_t *)map_domain_page(mfn); for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ ) { @@ -81,12 +86,11 @@ free_domheap_page(mfn_to_page(l3e_get_pfn(l3e[i]))); } } - - free_domheap_page( - mfn_to_page(pagetable_get_pfn(v->domain->arch.phys_table))); + free_domheap_page(mfn_to_page(mfn)); unmap_domain_page(l3e); - v->domain->arch.phys_table = mk_pagetable(0); + + d->arch.phys_table = mk_pagetable(0); } /****************************************************************************/ @@ -1790,39 +1794,56 @@ unsigned long va, tabpfn; l1_pgentry_t *l1, l1e; l2_pgentry_t *l2, l2e; +#if CONFIG_PAGING_LEVELS >= 4 + pgentry_64_t *l4 = NULL; + pgentry_64_t l4e = { 0 }; +#endif + pgentry_64_t *l3 = NULL; + pgentry_64_t l3e = { 0 }; + unsigned long *l0tab = NULL; + unsigned long mfn; ASSERT(shadow_mode_translate(d)); perfc_incrc(get_mfn_from_gpfn_foreign); - va = gpfn << PAGE_SHIFT; - tabpfn = pagetable_get_pfn(d->arch.phys_table); - l2 = map_domain_page(tabpfn); + va = RO_MPT_VIRT_START + (gpfn * sizeof(mfn)); + + tabpfn = pagetable_get_pfn(d->vcpu[0]->arch.monitor_table); + if ( !tabpfn ) + return INVALID_MFN; + +#if CONFIG_PAGING_LEVELS >= 4 + l4 = map_domain_page(tabpfn); + l4e = l4[l4_table_offset(va)]; + unmap_domain_page(l4); + if ( !(entry_get_flags(l4e) & _PAGE_PRESENT) ) + return INVALID_MFN; + + l3 = map_domain_page(entry_get_pfn(l4e)); +#else + l3 = map_domain_page(tabpfn); +#endif + l3e = l3[l3_table_offset(va)]; + unmap_domain_page(l3); + if ( !(entry_get_flags(l3e) & _PAGE_PRESENT) ) + return INVALID_MFN; + l2 = map_domain_page(entry_get_pfn(l3e)); l2e = l2[l2_table_offset(va)]; unmap_domain_page(l2); if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) - { - printk("%s(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n", - __func__, d->domain_id, gpfn, l2e_get_intpte(l2e)); return INVALID_MFN; - } + l1 = map_domain_page(l2e_get_pfn(l2e)); l1e = l1[l1_table_offset(va)]; unmap_domain_page(l1); - -#if 0 - printk("%s(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n", - __func__, d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e); -#endif - if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) - { - printk("%s(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n", - __func__, d->domain_id, gpfn, l1e_get_intpte(l1e)); return INVALID_MFN; - } - - return l1e_get_pfn(l1e); + + l0tab = map_domain_page(l1e_get_pfn(l1e)); + mfn = l0tab[gpfn & ((PAGE_SIZE / sizeof (mfn)) - 1)]; + unmap_domain_page(l0tab); + return mfn; } static u32 remove_all_access_in_page( diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/traps.c Fri Feb 24 22:41:08 2006 @@ -1430,11 +1430,7 @@ if ( cur.address == 0 ) break; - if ( !VALID_CODESEL(cur.cs) ) - { - rc = -EPERM; - break; - } + fixup_guest_selector(cur.cs); memcpy(&dst[cur.vector], &cur, sizeof(cur)); diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/x86_32/mm.c Fri Feb 24 22:41:08 2006 @@ -223,8 +223,7 @@ int nr = smp_processor_id(); struct tss_struct *t = &init_tss[nr]; - if ( (ss & 3) != 1 ) - return -EPERM; + fixup_guest_selector(ss); current->arch.guest_context.kernel_ss = ss; current->arch.guest_context.kernel_sp = esp; @@ -239,6 +238,7 @@ { unsigned long base, limit; u32 a = d->a, b = d->b; + u16 cs; /* A not-present descriptor will always fault, so is safe. */ if ( !(b & _SEGMENT_P) ) @@ -250,8 +250,8 @@ * gates (consider a call gate pointing at another kernel descriptor with * DPL 0 -- this would get the OS ring-0 privileges). */ - if ( (b & _SEGMENT_DPL) == 0 ) - goto bad; + if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) ) + d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13); if ( !(b & _SEGMENT_S) ) { @@ -271,9 +271,12 @@ if ( (b & _SEGMENT_TYPE) != 0xc00 ) goto bad; - /* Can't allow far jump to a Xen-private segment. */ - if ( !VALID_CODESEL(a>>16) ) + /* Validate and fix up the target code selector. */ + cs = a >> 16; + fixup_guest_selector(cs); + if ( !guest_gate_selector_okay(cs) ) goto bad; + a = d->a = (d->a & 0xffffU) | (cs << 16); /* Reserved bits must be zero. */ if ( (b & 0xe0) != 0 ) diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/x86_32/traps.c Fri Feb 24 22:41:08 2006 @@ -254,10 +254,14 @@ /* * We can't virtualise interrupt gates, as there's no way to get - * the CPU to automatically clear the events_mask variable. - */ - if ( TI_GET_IF(ti) ) + * the CPU to automatically clear the events_mask variable. Also we + * must ensure that the CS is safe to poke into an interrupt gate. + */ + if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) ) + { + v->arch.int80_desc.a = v->arch.int80_desc.b = 0; return; + } v->arch.int80_desc.a = (ti->cs << 16) | (ti->address & 0xffff); v->arch.int80_desc.b = @@ -274,8 +278,8 @@ { struct vcpu *d = current; - if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) ) - return -EPERM; + fixup_guest_selector(event_selector); + fixup_guest_selector(failsafe_selector); d->arch.guest_context.event_callback_cs = event_selector; d->arch.guest_context.event_callback_eip = event_address; diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/x86_64/entry.S Fri Feb 24 22:41:08 2006 @@ -206,7 +206,7 @@ sti leaq VCPU_trap_bounce(%rbx),%rdx movq %rax,TRAPBOUNCE_eip(%rdx) - movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx) + movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) call create_bounce_frame jmp test_all_events 1: bts $_VCPUF_nmi_pending,VCPU_flags(%rbx) @@ -229,9 +229,6 @@ 1: /* In kernel context already: push new frame at existing %rsp. */ movq UREGS_rsp+8(%rsp),%rsi andb $0xfc,UREGS_cs+8(%rsp) # Indicate kernel context to guest. - testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx) - jz 2f - orb $0x01,UREGS_cs+8(%rsp) 2: andq $~0xf,%rsi # Stack frames are 16-byte aligned. movq $HYPERVISOR_VIRT_START,%rax cmpq %rax,%rsi diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Fri Feb 24 21:03:07 2006 +++ b/xen/arch/x86/x86_64/mm.c Fri Feb 24 22:41:08 2006 @@ -228,8 +228,7 @@ long do_stack_switch(unsigned long ss, unsigned long esp) { - if ( (ss & 3) != 3 ) - return -EPERM; + fixup_guest_selector(ss); current->arch.guest_context.kernel_ss = ss; current->arch.guest_context.kernel_sp = esp; return 0; @@ -292,14 +291,15 @@ int check_descriptor(struct desc_struct *d) { u32 a = d->a, b = d->b; + u16 cs; /* A not-present descriptor will always fault, so is safe. */ if ( !(b & _SEGMENT_P) ) goto good; - /* The guest can only safely be executed in ring 3. */ - if ( (b & _SEGMENT_DPL) != _SEGMENT_DPL ) - goto bad; + /* Check and fix up the DPL. */ + if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) ) + d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13); /* All code and data segments are okay. No base/limit checking. */ if ( (b & _SEGMENT_S) ) @@ -313,9 +313,12 @@ if ( (b & _SEGMENT_TYPE) != 0xc00 ) goto bad; - /* Can't allow far jump to a Xen-private segment. */ - if ( !VALID_CODESEL(a>>16) ) + /* Validate and fix up the target code selector. */ + cs = a >> 16; + fixup_guest_selector(cs); + if ( !guest_gate_selector_okay(cs) ) goto bad; + a = d->a = (d->a & 0xffffU) | (cs << 16); /* Reserved bits must be zero. */ if ( (b & 0xe0) != 0 ) diff -r d940ec92958d -r 6c43118bdba8 xen/common/elf.c --- a/xen/common/elf.c Fri Feb 24 21:03:07 2006 +++ b/xen/common/elf.c Fri Feb 24 22:41:08 2006 @@ -61,7 +61,6 @@ continue; guestinfo = elfbase + shdr->sh_offset; - printk("Xen-ELF header found: '%s'\n", guestinfo); if ( (strstr(guestinfo, "LOADER=generic") == NULL) && (strstr(guestinfo, "GUEST_OS=linux") == NULL) ) diff -r d940ec92958d -r 6c43118bdba8 xen/common/event_channel.c --- a/xen/common/event_channel.c Fri Feb 24 21:03:07 2006 +++ b/xen/common/event_channel.c Fri Feb 24 22:41:08 2006 @@ -399,7 +399,7 @@ } -long evtchn_send(int lport) +long evtchn_send(unsigned int lport) { struct evtchn *lchn, *rchn; struct domain *ld = current->domain, *rd; @@ -508,15 +508,13 @@ return rc; } -static long evtchn_bind_vcpu(evtchn_bind_vcpu_t *bind) -{ - struct domain *d = current->domain; - int port = bind->port; - int vcpu = bind->vcpu; +long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id) +{ + struct domain *d = current->domain; struct evtchn *chn; long rc = 0; - if ( (vcpu >= ARRAY_SIZE(d->vcpu)) || (d->vcpu[vcpu] == NULL) ) + if ( (vcpu_id >= ARRAY_SIZE(d->vcpu)) || (d->vcpu[vcpu_id] == NULL) ) return -ENOENT; spin_lock(&d->evtchn_lock); @@ -533,7 +531,7 @@ case ECS_UNBOUND: case ECS_INTERDOMAIN: case ECS_PIRQ: - chn->notify_vcpu_id = vcpu; + chn->notify_vcpu_id = vcpu_id; break; default: rc = -EINVAL; @@ -638,7 +636,7 @@ break; case EVTCHNOP_bind_vcpu: - rc = evtchn_bind_vcpu(&op.u.bind_vcpu); + rc = evtchn_bind_vcpu(op.u.bind_vcpu.port, op.u.bind_vcpu.vcpu); break; case EVTCHNOP_unmask: diff -r d940ec92958d -r 6c43118bdba8 xen/common/kernel.c --- a/xen/common/kernel.c Fri Feb 24 21:03:07 2006 +++ b/xen/common/kernel.c Fri Feb 24 22:41:08 2006 @@ -193,7 +193,8 @@ if ( shadow_mode_translate(current->domain) ) fi.submap |= (1U << XENFEAT_writable_page_tables) | - (1U << XENFEAT_auto_translated_physmap); + (1U << XENFEAT_auto_translated_physmap) | + (1U << XENFEAT_pae_pgdir_above_4gb); break; default: return -EINVAL; diff -r d940ec92958d -r 6c43118bdba8 xen/common/keyhandler.c --- a/xen/common/keyhandler.c Fri Feb 24 21:03:07 2006 +++ b/xen/common/keyhandler.c Fri Feb 24 22:41:08 2006 @@ -13,6 +13,7 @@ #include <xen/domain.h> #include <xen/rangeset.h> #include <asm/debugger.h> +#include <asm/shadow.h> #define KEY_MAX 256 #define STR_MAX 64 @@ -131,6 +132,8 @@ d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7], d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11], d->handle[12], d->handle[13], d->handle[14], d->handle[15]); + + arch_dump_domain_info(d); rangeset_domain_printk(d); diff -r d940ec92958d -r 6c43118bdba8 xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Fri Feb 24 21:03:07 2006 +++ b/xen/common/sched_sedf.c Fri Feb 24 22:41:08 2006 @@ -1609,15 +1609,19 @@ else { /*time driven domains*/ for_each_vcpu(p, v) { - /* sanity checking! */ - if(cmd->u.sedf.slice > cmd->u.sedf.period ) + /* + * Sanity checking: note that disabling extra weight requires + * that we set a non-zero slice. + */ + if ( (cmd->u.sedf.slice == 0) || + (cmd->u.sedf.slice > cmd->u.sedf.period) ) return -EINVAL; EDOM_INFO(v)->weight = 0; EDOM_INFO(v)->extraweight = 0; EDOM_INFO(v)->period_orig = - EDOM_INFO(v)->period = cmd->u.sedf.period; + EDOM_INFO(v)->period = cmd->u.sedf.period; EDOM_INFO(v)->slice_orig = - EDOM_INFO(v)->slice = cmd->u.sedf.slice; + EDOM_INFO(v)->slice = cmd->u.sedf.slice; } } if (sedf_adjust_weights(cmd)) diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-ia64/vmx.h --- a/xen/include/asm-ia64/vmx.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/asm-ia64/vmx.h Fri Feb 24 22:41:08 2006 @@ -57,9 +57,9 @@ return &((shared_iopage_t *)d->arch.vmx_platform.shared_page_va)->vcpu_iodata[cpu]; } -static inline int iopacket_port(struct domain *d) +static inline int iopacket_port(struct vcpu *v) { - return ((shared_iopage_t *)d->arch.vmx_platform.shared_page_va)->sp_global.eport; + return get_vio(v->domain, v->vcpu_id)->vp_eport; } static inline shared_iopage_t *get_sp(struct domain *d) diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/desc.h --- a/xen/include/asm-x86/desc.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/asm-x86/desc.h Fri Feb 24 22:41:08 2006 @@ -26,16 +26,28 @@ #define GUEST_KERNEL_RPL 1 #endif +/* Fix up the RPL of a guest segment selector. */ +#define fixup_guest_selector(sel) \ + ((sel) = (((sel) & 3) >= GUEST_KERNEL_RPL) ? (sel) : \ + (((sel) & ~3) | GUEST_KERNEL_RPL)) + /* - * Guest OS must provide its own code selectors, or use the one we provide. Any - * LDT selector value is okay. Note that checking only the RPL is insufficient: - * if the selector is poked into an interrupt, trap or call gate then the RPL - * is ignored when the gate is accessed. + * We need this function because enforcing the correct guest kernel RPL is + * unsufficient if the selector is poked into an interrupt, trap or call gate. + * The selector RPL is ignored when a gate is accessed. We must therefore make + * sure that the selector does not reference a Xen-private segment. + * + * Note that selectors used only by IRET do not need to be checked. If the + * descriptor DPL fiffers from CS RPL then we'll #GP. + * + * Stack and data selectors do not need to be checked. If DS, ES, FS, GS are + * DPL < CPL then they'll be cleared automatically. If SS RPL or DPL differs + * from CS RPL then we'll #GP. */ -#define VALID_SEL(_s) \ - (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || ((_s)&4)) && \ - (((_s)&3) == GUEST_KERNEL_RPL)) -#define VALID_CODESEL(_s) ((_s) == FLAT_KERNEL_CS || VALID_SEL(_s)) +#define guest_gate_selector_okay(sel) \ + ((((sel)>>3) < FIRST_RESERVED_GDT_ENTRY) || /* Guest seg? */ \ + ((sel) == FLAT_KERNEL_CS) || /* Xen default seg? */ \ + ((sel) & 4)) /* LDT seg? */ /* These are bitmasks for the high 32 bits of a descriptor table entry. */ #define _SEGMENT_TYPE (15<< 8) diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/asm-x86/hvm/domain.h Fri Feb 24 22:41:08 2006 @@ -34,6 +34,7 @@ unsigned long shared_page_va; unsigned int nr_vcpus; unsigned int apic_enabled; + unsigned int pae_enabled; struct hvm_virpit vpit; struct hvm_virpic vpic; diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/hvm/io.h --- a/xen/include/asm-x86/hvm/io.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/asm-x86/hvm/io.h Fri Feb 24 22:41:08 2006 @@ -23,6 +23,7 @@ #include <asm/hvm/vpic.h> #include <asm/hvm/vioapic.h> #include <public/hvm/ioreq.h> +#include <public/event_channel.h> #define MAX_OPERAND_NUM 2 diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/asm-x86/hvm/support.h Fri Feb 24 22:41:08 2006 @@ -40,9 +40,9 @@ return &get_sp(d)->vcpu_iodata[cpu]; } -static inline int iopacket_port(struct domain *d) +static inline int iopacket_port(struct vcpu *v) { - return get_sp(d)->sp_global.eport; + return get_vio(v->domain, v->vcpu_id)->vp_eport; } /* XXX these are really VMX specific */ diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/processor.h --- a/xen/include/asm-x86/processor.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/asm-x86/processor.h Fri Feb 24 22:41:08 2006 @@ -124,7 +124,6 @@ #define TBF_EXCEPTION_ERRCODE 2 #define TBF_INTERRUPT 8 #define TBF_FAILSAFE 16 -#define TBF_SLOW_IRET 32 /* 'arch_vcpu' flags values */ #define _TF_kernel_mode 0 diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/asm-x86/shadow.h Fri Feb 24 22:41:08 2006 @@ -115,8 +115,8 @@ #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min)) #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1)) #define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16)) -extern void shadow_direct_map_clean(struct vcpu *v); -extern int shadow_direct_map_init(struct vcpu *v); +extern void shadow_direct_map_clean(struct domain *d); +extern int shadow_direct_map_init(struct domain *d); extern int shadow_direct_map_fault( unsigned long vpa, struct cpu_user_regs *regs); extern void shadow_mode_init(void); diff -r d940ec92958d -r 6c43118bdba8 xen/include/public/hvm/hvm_info_table.h --- a/xen/include/public/hvm/hvm_info_table.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/public/hvm/hvm_info_table.h Fri Feb 24 22:41:08 2006 @@ -17,7 +17,7 @@ uint8_t checksum; uint8_t acpi_enabled; uint8_t apic_enabled; - uint8_t pad[1]; + uint8_t pae_enabled; uint32_t nr_vcpus; }; diff -r d940ec92958d -r 6c43118bdba8 xen/include/public/hvm/ioreq.h --- a/xen/include/public/hvm/ioreq.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/public/hvm/ioreq.h Fri Feb 24 22:41:08 2006 @@ -53,6 +53,7 @@ uint8_t dir:1; /* 1=read, 0=write */ uint8_t df:1; uint8_t type; /* I/O type */ + uint64_t io_count; /* How many IO done on a vcpu */ } ioreq_t; #define MAX_VECTOR 256 @@ -65,11 +66,13 @@ uint16_t pic_irr; uint16_t pic_last_irr; uint16_t pic_clear_irr; - int eport; /* Event channel port */ } global_iodata_t; typedef struct { - ioreq_t vp_ioreq; + ioreq_t vp_ioreq; + /* Event channel port */ + unsigned long vp_eport; /* VMX vcpu uses this to notify DM */ + unsigned long dm_eport; /* DM uses this to notify VMX vcpu */ } vcpu_iodata_t; typedef struct { diff -r d940ec92958d -r 6c43118bdba8 xen/include/public/io/xs_wire.h --- a/xen/include/public/io/xs_wire.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/public/io/xs_wire.h Fri Feb 24 22:41:08 2006 @@ -54,7 +54,7 @@ XSD_ERROR(EROFS), XSD_ERROR(EBUSY), XSD_ERROR(EAGAIN), - XSD_ERROR(EISCONN), + XSD_ERROR(EISCONN) }; struct xsd_sockmsg @@ -70,7 +70,7 @@ enum xs_watch_type { XS_WATCH_PATH = 0, - XS_WATCH_TOKEN, + XS_WATCH_TOKEN }; /* Inter-domain shared memory communications. */ diff -r d940ec92958d -r 6c43118bdba8 xen/include/public/version.h --- a/xen/include/public/version.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/public/version.h Fri Feb 24 22:41:08 2006 @@ -18,6 +18,7 @@ /* arg == xen_extraversion_t. */ #define XENVER_extraversion 1 typedef char xen_extraversion_t[16]; +#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) /* arg == xen_compile_info_t. */ #define XENVER_compile_info 2 @@ -30,9 +31,11 @@ #define XENVER_capabilities 3 typedef char xen_capabilities_info_t[1024]; +#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) #define XENVER_changeset 4 typedef char xen_changeset_info_t[64]; +#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) #define XENVER_platform_parameters 5 typedef struct xen_platform_parameters { @@ -45,9 +48,34 @@ uint32_t submap; /* OUT: 32-bit submap */ } xen_feature_info_t; +/* + * If set, the guest does not need to write-protect its pagetables, and can + * update them via direct writes. + */ #define XENFEAT_writable_page_tables 0 + +/* + * If set, the guest does not need to write-protect its segment descriptor + * tables, and can update them via direct writes. + */ #define XENFEAT_writable_descriptor_tables 1 + +/* + * If set, translation between the guest's 'pseudo-physical' address space + * and the host's machine address space are handled by the hypervisor. In this + * mode the guest does not need to perform phys-to/from-machine translations + * when performing page table operations. + */ #define XENFEAT_auto_translated_physmap 2 + +/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */ +#define XENFEAT_supervisor_mode_kernel 3 + +/* + * If set, the guest does not need to allocate x86 PAE page directories + * below 4GB. This flag is usually implied by auto_translated_physmap. + */ +#define XENFEAT_pae_pgdir_above_4gb 4 #define XENFEAT_NR_SUBMAPS 1 diff -r d940ec92958d -r 6c43118bdba8 xen/include/xen/domain.h --- a/xen/include/xen/domain.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/xen/domain.h Fri Feb 24 22:41:08 2006 @@ -24,4 +24,6 @@ extern void dump_pageframe_info(struct domain *d); +extern void arch_dump_domain_info(struct domain *d); + #endif /* __XEN_DOMAIN_H__ */ diff -r d940ec92958d -r 6c43118bdba8 xen/include/xen/event.h --- a/xen/include/xen/event.h Fri Feb 24 21:03:07 2006 +++ b/xen/include/xen/event.h Fri Feb 24 22:41:08 2006 @@ -63,4 +63,10 @@ (!!(v)->vcpu_info->evtchn_upcall_pending & \ !(v)->vcpu_info->evtchn_upcall_mask) +/* Send a notification from a local event-channel port. */ +extern long evtchn_send(unsigned int lport); + +/* Bind a local event-channel port to the specified VCPU. */ +extern long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id); + #endif /* __XEN_EVENT_H__ */ diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/include/hypercall-x86_32.h --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/extras/mini-os/include/hypercall-x86_32.h Fri Feb 24 22:41:08 2006 @@ -0,0 +1,326 @@ +/****************************************************************************** + * hypercall-x86_32.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_X86_32_H__ +#define __HYPERCALL_X86_32_H__ + +#include <xen/xen.h> +#include <xen/sched.h> +#include <xen/nmi.h> +#include <mm.h> + +#define __STR(x) #x +#define STR(x) __STR(x) + +extern char hypercall_page[PAGE_SIZE]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op( + u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +HYPERVISOR_dom0_op( + dom0_op_t *dom0_op) +{ + dom0_op->interface_version = DOM0_INTERFACE_VERSION; + return _hypercall1(int, dom0_op, dom0_op); +} + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + u64 ma, u64 desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, pte_hi, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + void *op) +{ + return _hypercall1(int, event_channel_op, op); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + void *physdev_op) +{ + return _hypercall1(int, physdev_op, physdev_op); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, pte_hi, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +#endif /* __HYPERCALL_X86_32_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/include/hypercall-x86_64.h --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/extras/mini-os/include/hypercall-x86_64.h Fri Feb 24 22:41:08 2006 @@ -0,0 +1,326 @@ +/****************************************************************************** + * hypercall-x86_64.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu <benjamin.liu@xxxxxxxxx> + * Jun Nakajima <jun.nakajima@xxxxxxxxx> + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_X86_64_H__ +#define __HYPERCALL_X86_64_H__ + +#include <xen/xen.h> +#include <xen/sched.h> +#include <mm.h> + +#define __STR(x) #x +#define STR(x) __STR(x) + +extern char hypercall_page[PAGE_SIZE]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "movq %7,%%r10; " \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "g" ((long)(a4)) \ + : "memory", "r10" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "movq %7,%%r10; movq %8,%%r8; " \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "g" ((long)(a4)), \ + "g" ((long)(a5)) \ + : "memory", "r10", "r8" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_address, unsigned long failsafe_address, + unsigned long syscall_address) +{ + return _hypercall3(int, set_callbacks, + event_address, failsafe_address, syscall_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op( + u64 timeout) +{ + return _hypercall1(long, set_timer_op, timeout); +} + +static inline int +HYPERVISOR_dom0_op( + dom0_op_t *dom0_op) +{ + dom0_op->interface_version = DOM0_INTERFACE_VERSION; + return _hypercall1(int, dom0_op, dom0_op); +} + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + unsigned long ma, unsigned long word) +{ + return _hypercall2(int, update_descriptor, ma, word); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val.pte, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + void *op) +{ + return _hypercall1(int, event_channel_op, op); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + void *physdev_op) +{ + return _hypercall1(int, physdev_op, physdev_op); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + return _hypercall4(int, update_va_mapping_otherdomain, va, + new_val.pte, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_set_segment_base( + int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +#endif /* __HYPERCALL_X86_64_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/Kconfig.cpu --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Fri Feb 24 22:41:08 2006 @@ -0,0 +1,315 @@ +# Put here option for CPU selection and depending optimization +if !X86_ELAN + +choice + prompt "Processor family" + default M686 + +config M386 + bool "386" + ---help--- + This is the processor type of your CPU. This information is used for + optimizing purposes. In order to compile a kernel that can run on + all x86 CPU types (albeit not optimally fast), you can specify + "386" here. + + The kernel will not necessarily run on earlier architectures than + the one you have chosen, e.g. a Pentium optimized kernel will run on + a PPro, but not necessarily on a i486. + + Here are the settings recommended for greatest speed: + - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI + 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels + will run on a 386 class machine. + - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or + SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. + - "586" for generic Pentium CPUs lacking the TSC + (time stamp counter) register. + - "Pentium-Classic" for the Intel Pentium. + - "Pentium-MMX" for the Intel Pentium MMX. + - "Pentium-Pro" for the Intel Pentium Pro. + - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. + - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. + - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. + - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). + - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). + - "Crusoe" for the Transmeta Crusoe series. + - "Efficeon" for the Transmeta Efficeon series. + - "Winchip-C6" for original IDT Winchip. + - "Winchip-2" for IDT Winchip 2. + - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. + - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). + - "Geode GX/LX" For AMD Geode GX and LX processors. + - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. + - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). + + If you don't know what to do, choose "386". + +config M486 + bool "486" + help + Select this for a 486 series processor, either Intel or one of the + compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, + DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or + U5S. + +config M586 + bool "586/K5/5x86/6x86/6x86MX" + help + Select this for an 586 or 686 series processor such as the AMD K5, + the Cyrix 5x86, 6x86 and 6x86MX. This choice does not + assume the RDTSC (Read Time Stamp Counter) instruction. + +config M586TSC + bool "Pentium-Classic" + help + Select this for a Pentium Classic processor with the RDTSC (Read + Time Stamp Counter) instruction for benchmarking. + +config M586MMX + bool "Pentium-MMX" + help + Select this for a Pentium with the MMX graphics/multimedia + extended instructions. + +config M686 + bool "Pentium-Pro" + help + Select this for Intel Pentium Pro chips. This enables the use of + Pentium Pro extended instructions, and disables the init-time guard + against the f00f bug found in earlier Pentiums. + +config MPENTIUMII + bool "Pentium-II/Celeron(pre-Coppermine)" + help + Select this for Intel chips based on the Pentium-II and + pre-Coppermine Celeron core. This option enables an unaligned + copy optimization, compiles the kernel with optimization flags + tailored for the chip, and applies any applicable Pentium Pro + optimizations. + +config MPENTIUMIII + bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" + help + Select this for Intel chips based on the Pentium-III and + Celeron-Coppermine core. This option enables use of some + extended prefetch instructions in addition to the Pentium II + extensions. + +config MPENTIUMM + bool "Pentium M" + help + Select this for Intel Pentium M (not Pentium-4 M) + notebook chips. + +config MPENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" + help + Select this for Intel Pentium 4 chips. This includes the + Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M + (not Pentium M) chips. This option enables compile flags + optimized for the chip, uses the correct cache shift, and + applies any applicable Pentium III optimizations. + +config MK6 + bool "K6/K6-II/K6-III" + help + Select this for an AMD K6-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK7 + bool "Athlon/Duron/K7" + help + Select this for an AMD Athlon K7-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK8 + bool "Opteron/Athlon64/Hammer/K8" + help + Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables + use of some extended instructions, and passes appropriate optimization + flags to GCC. + +config MCRUSOE + bool "Crusoe" + help + Select this for a Transmeta Crusoe processor. Treats the processor + like a 586 with TSC, and sets some GCC optimization flags (like a + Pentium Pro with no alignment requirements). + +config MEFFICEON + bool "Efficeon" + help + Select this for a Transmeta Efficeon processor. + +config MWINCHIPC6 + bool "Winchip-C6" + help + Select this for an IDT Winchip C6 chip. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP2 + bool "Winchip-2" + help + Select this for an IDT Winchip-2. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP3D + bool "Winchip-2A/Winchip-3" + help + Select this for an IDT Winchip-2A or 3. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment reqirements. Also enable out of order memory + stores for this CPU, which can increase performance of some + operations. + +config MGEODEGX1 + bool "GeodeGX1" + help + Select this for a Geode GX1 (Cyrix MediaGX) chip. + +config MGEODE_LX + bool "Geode GX/LX" + help + Select this for AMD Geode GX and LX processors. + +config MCYRIXIII + bool "CyrixIII/VIA-C3" + help + Select this for a Cyrix III or C3 chip. Presently Linux and GCC + treat this chip as a generic 586. Whilst the CPU is 686 class, + it lacks the cmov extension which gcc assumes is present when + generating 686 code. + Note that Nehemiah (Model 9) and above will not boot with this + kernel due to them lacking the 3DNow! instructions used in earlier + incarnations of the CPU. + +config MVIAC3_2 + bool "VIA C3-2 (Nehemiah)" + help + Select this for a VIA C3 "Nehemiah". Selecting this enables usage + of SSE and tells gcc to treat the CPU as a 686. + Note, this kernel will not boot on older (pre model 9) C3s. + +endchoice + +config X86_GENERIC + bool "Generic x86 support" + help + Instead of just including optimizations for the selected + x86 variant (e.g. PII, Crusoe or Athlon), include some more + generic optimizations as well. This will make the kernel + perform better on x86 CPUs other than that selected. + + This is really intended for distributors who need more + generic optimizations. + +endif + +# +# Define implied options from the CPU selection here +# +config X86_CMPXCHG + bool + depends on !M386 + default y + +config X86_XADD + bool + depends on !M386 + default y + +config X86_L1_CACHE_SHIFT + int + default "7" if MPENTIUM4 || X86_GENERIC + default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 + default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX + default "6" if MK7 || MK8 || MPENTIUMM + +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + depends on !M386 + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config X86_PPRO_FENCE + bool + depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 + default y + +config X86_F00F_BUG + bool + depends on (M586MMX || M586TSC || M586 || M486 || M386) && !X86_NO_IDT + default y + +config X86_WP_WORKS_OK + bool + depends on !M386 + default y + +config X86_INVLPG + bool + depends on !M386 + default y + +config X86_BSWAP + bool + depends on !M386 + default y + +config X86_POPAD_OK + bool + depends on !M386 + default y + +config X86_CMPXCHG64 + bool + depends on !M386 && !M486 + default y + +config X86_ALIGNMENT_16 + bool + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + default y + +config X86_GOOD_APIC + bool + depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON + default y + +config X86_INTEL_USERCOPY + bool + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON + default y + +config X86_USE_PPRO_CHECKSUM + bool + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX + default y + +config X86_USE_3DNOW + bool + depends on MCYRIXIII || MK7 || MGEODE_LX + default y + +config X86_OOSTORE + bool + depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR + default y + +config X86_TSC + bool + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ + default y diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c Fri Feb 24 22:41:08 2006 @@ -0,0 +1,75 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + */ + +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/personality.h> +#include <linux/suspend.h> +#include <asm/ucontext.h> +#include "sigframe.h" +#include <asm/fixmap.h> +#include <asm/processor.h> +#include <asm/thread_info.h> +#include <asm/elf.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(SIGCONTEXT_eax, sigcontext, eax); + OFFSET(SIGCONTEXT_ebx, sigcontext, ebx); + OFFSET(SIGCONTEXT_ecx, sigcontext, ecx); + OFFSET(SIGCONTEXT_edx, sigcontext, edx); + OFFSET(SIGCONTEXT_esi, sigcontext, esi); + OFFSET(SIGCONTEXT_edi, sigcontext, edi); + OFFSET(SIGCONTEXT_ebp, sigcontext, ebp); + OFFSET(SIGCONTEXT_esp, sigcontext, esp); + OFFSET(SIGCONTEXT_eip, sigcontext, eip); + BLANK(); + + OFFSET(CPUINFO_x86, cpuinfo_x86, x86); + OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); + OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); + OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); + OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math); + OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); + OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); + OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); + BLANK(); + + OFFSET(TI_task, thread_info, task); + OFFSET(TI_exec_domain, thread_info, exec_domain); + OFFSET(TI_flags, thread_info, flags); + OFFSET(TI_status, thread_info, status); + OFFSET(TI_cpu, thread_info, cpu); + OFFSET(TI_preempt_count, thread_info, preempt_count); + OFFSET(TI_addr_limit, thread_info, addr_limit); + OFFSET(TI_restart_block, thread_info, restart_block); + BLANK(); + + OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); + OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); + BLANK(); + + OFFSET(pbe_address, pbe, address); + OFFSET(pbe_orig_address, pbe, orig_address); + OFFSET(pbe_next, pbe, next); + +#ifdef CONFIG_X86_SYSENTER + /* Offset from the sysenter stack to tss.esp0 */ + DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) - + sizeof(struct tss_struct)); +#endif + + DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(VSYSCALL_BASE, VSYSCALL_BASE); +} diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c Fri Feb 24 22:41:08 2006 @@ -0,0 +1,137 @@ +/* + * linux/arch/i386/kernel/sysenter.c + * + * (C) Copyright 2002 Linus Torvalds + * + * This file contains the needed initializations to support sysenter. + */ + +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/thread_info.h> +#include <linux/sched.h> +#include <linux/gfp.h> +#include <linux/string.h> +#include <linux/elf.h> +#include <linux/mm.h> + +#include <asm/cpufeature.h> +#include <asm/msr.h> +#include <asm/pgtable.h> +#include <asm/unistd.h> + +extern asmlinkage void sysenter_entry(void); + +void enable_sep_cpu(void) +{ +#ifdef CONFIG_X86_SYSENTER + int cpu = get_cpu(); + struct tss_struct *tss = &per_cpu(init_tss, cpu); + + if (!boot_cpu_has(X86_FEATURE_SEP)) { + put_cpu(); + return; + } + + tss->ss1 = __KERNEL_CS; + tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; + wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); + wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); + put_cpu(); +#endif +} + +/* + * These symbols are defined by vsyscall.o to mark the bounds + * of the ELF DSO images included therein. + */ +extern const char vsyscall_int80_start, vsyscall_int80_end; +extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; +static void *syscall_page; + +int __init sysenter_setup(void) +{ + syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + +#ifdef CONFIG_X86_SYSENTER + if (boot_cpu_has(X86_FEATURE_SEP)) { + memcpy(syscall_page, + &vsyscall_sysenter_start, + &vsyscall_sysenter_end - &vsyscall_sysenter_start); + return 0; + } +#endif + + memcpy(syscall_page, + &vsyscall_int80_start, + &vsyscall_int80_end - &vsyscall_int80_start); + + return 0; +} + +static struct page* +syscall_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) +{ + struct page *p = virt_to_page(adr - vma->vm_start + syscall_page); + get_page(p); + return p; +} + +/* Prevent VMA merging */ +static void syscall_vma_close(struct vm_area_struct *vma) +{ +} + +static struct vm_operations_struct syscall_vm_ops = { + .close = syscall_vma_close, + .nopage = syscall_nopage, +}; + +/* Setup a VMA at program startup for the vsyscall page */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + int ret; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!vma) + return -ENOMEM; + + memset(vma, 0, sizeof(struct vm_area_struct)); + /* Could randomize here */ + vma->vm_start = VSYSCALL_BASE; + vma->vm_end = VSYSCALL_BASE + PAGE_SIZE; + /* MAYWRITE to allow gdb to COW and set breakpoints */ + vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + vma->vm_ops = &syscall_vm_ops; + vma->vm_mm = mm; + + down_write(&mm->mmap_sem); + if ((ret = insert_vm_struct(mm, vma))) { + up_write(&mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return ret; + } + mm->total_vm++; + up_write(&mm->mmap_sem); + return 0; +} + +struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +{ + return NULL; +} + +int in_gate_area(struct task_struct *task, unsigned long addr) +{ + return 0; +} + +int in_gate_area_no_task(unsigned long addr) +{ + return 0; +} diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/vm86.c --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c Fri Feb 24 22:41:08 2006 @@ -0,0 +1,817 @@ +/* + * linux/kernel/vm86.c + * + * Copyright (C) 1994 Linus Torvalds + * + * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86 + * stack - Manfred Spraul <manfred@xxxxxxxxxxxxxxxx> + * + * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle + * them correctly. Now the emulation will be in a + * consistent state after stackfaults - Kasper Dupont + * <kasperd@xxxxxxxxxxx> + * + * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont + * <kasperd@xxxxxxxxxxx> + * + * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault + * caused by Kasper Dupont's changes - Stas Sergeev + * + * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes. + * Kasper Dupont <kasperd@xxxxxxxxxxx> + * + * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault. + * Kasper Dupont <kasperd@xxxxxxxxxxx> + * + * 9 apr 2002 - Changed stack access macros to jump to a label + * instead of returning to userspace. This simplifies + * do_int, and is needed by handle_vm6_fault. Kasper + * Dupont <kasperd@xxxxxxxxxxx> + * + */ + +#include <linux/capability.h> +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/highmem.h> +#include <linux/ptrace.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/tlbflush.h> +#include <asm/irq.h> + +/* + * Known problems: + * + * Interrupt handling is not guaranteed: + * - a real x86 will disable all interrupts for one instruction + * after a "mov ss,xx" to make stack handling atomic even without + * the 'lss' instruction. We can't guarantee this in v86 mode, + * as the next instruction might result in a page fault or similar. + * - a real x86 will have interrupts disabled for one instruction + * past the 'sti' that enables them. We don't bother with all the + * details yet. + * + * Let's hope these problems do not actually matter for anything. + */ + + +#define KVM86 ((struct kernel_vm86_struct *)regs) +#define VMPI KVM86->vm86plus + + +/* + * 8- and 16-bit register defines.. + */ +#define AL(regs) (((unsigned char *)&((regs)->eax))[0]) +#define AH(regs) (((unsigned char *)&((regs)->eax))[1]) +#define IP(regs) (*(unsigned short *)&((regs)->eip)) +#define SP(regs) (*(unsigned short *)&((regs)->esp)) + +/* + * virtual flags (16 and 32-bit versions) + */ +#define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) +#define VEFLAGS (current->thread.v86flags) + +#define set_flags(X,new,mask) \ +((X) = ((X) & ~(mask)) | ((new) & (mask))) + +#define SAFE_MASK (0xDD5) +#define RETURN_MASK (0xDFF) + +#define VM86_REGS_PART2 orig_eax +#define VM86_REGS_SIZE1 \ + ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) ) +#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1) + +struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); +struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) +{ +#ifndef CONFIG_X86_NO_TSS + struct tss_struct *tss; +#endif + struct pt_regs *ret; + unsigned long tmp; + + /* + * This gets called from entry.S with interrupts disabled, but + * from process context. Enable interrupts here, before trying + * to access user space. + */ + local_irq_enable(); + + if (!current->thread.vm86_info) { + printk("no vm86_info: BAD\n"); + do_exit(SIGSEGV); + } + set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); + tmp = copy_to_user(¤t->thread.vm86_info->regs,regs, VM86_REGS_SIZE1); + tmp += copy_to_user(¤t->thread.vm86_info->regs.VM86_REGS_PART2, + ®s->VM86_REGS_PART2, VM86_REGS_SIZE2); + tmp += put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap); + if (tmp) { + printk("vm86: could not access userspace vm86_info\n"); + do_exit(SIGSEGV); + } + +#ifndef CONFIG_X86_NO_TSS + tss = &per_cpu(init_tss, get_cpu()); +#endif + current->thread.esp0 = current->thread.saved_esp0; + current->thread.sysenter_cs = __KERNEL_CS; + load_esp0(tss, ¤t->thread); + current->thread.saved_esp0 = 0; + put_cpu(); + + loadsegment(fs, current->thread.saved_fs); + loadsegment(gs, current->thread.saved_gs); + ret = KVM86->regs32; + return ret; +} + +static void mark_screen_rdonly(struct mm_struct *mm) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + spinlock_t *ptl; + int i; + + pgd = pgd_offset(mm, 0xA0000); + if (pgd_none_or_clear_bad(pgd)) + goto out; + pud = pud_offset(pgd, 0xA0000); + if (pud_none_or_clear_bad(pud)) + goto out; + pmd = pmd_offset(pud, 0xA0000); + if (pmd_none_or_clear_bad(pmd)) + goto out; + pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); + for (i = 0; i < 32; i++) { + if (pte_present(*pte)) + set_pte(pte, pte_wrprotect(*pte)); + pte++; + } + pte_unmap_unlock(pte, ptl); +out: + flush_tlb(); +} + + + +static int do_vm86_irq_handling(int subfunction, int irqnumber); +static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); + +asmlinkage int sys_vm86old(struct pt_regs regs) +{ + struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx; + struct kernel_vm86_struct info; /* declare this _on top_, + * this avoids wasting of stack space. + * This remains on the stack until we + * return to 32 bit user space. + */ + struct task_struct *tsk; + int tmp, ret = -EPERM; + + tsk = current; + if (tsk->thread.saved_esp0) + goto out; + tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); + tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, + (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2); + ret = -EFAULT; + if (tmp) + goto out; + memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); + info.regs32 = ®s; + tsk->thread.vm86_info = v86; + do_sys_vm86(&info, tsk); + ret = 0; /* we never return here */ +out: + return ret; +} + + +asmlinkage int sys_vm86(struct pt_regs regs) +{ + struct kernel_vm86_struct info; /* declare this _on top_, + * this avoids wasting of stack space. + * This remains on the stack until we + * return to 32 bit user space. + */ + struct task_struct *tsk; + int tmp, ret; + struct vm86plus_struct __user *v86; + + tsk = current; + switch (regs.ebx) { + case VM86_REQUEST_IRQ: + case VM86_FREE_IRQ: + case VM86_GET_IRQ_BITS: + case VM86_GET_AND_RESET_IRQ: + ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx); + goto out; + case VM86_PLUS_INSTALL_CHECK: + /* NOTE: on old vm86 stuff this will return the error + from access_ok(), because the subfunction is + interpreted as (invalid) address to vm86_struct. + So the installation check works. + */ + ret = 0; + goto out; + } + + /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ + ret = -EPERM; + if (tsk->thread.saved_esp0) + goto out; + v86 = (struct vm86plus_struct __user *)regs.ecx; + tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); + tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, + (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2); + ret = -EFAULT; + if (tmp) + goto out; + info.regs32 = ®s; + info.vm86plus.is_vm86pus = 1; + tsk->thread.vm86_info = (struct vm86_struct __user *)v86; + do_sys_vm86(&info, tsk); + ret = 0; /* we never return here */ +out: + return ret; +} + + +static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) +{ +#ifndef CONFIG_X86_NO_TSS + struct tss_struct *tss; +#endif +/* + * make sure the vm86() system call doesn't try to do anything silly + */ + info->regs.__null_ds = 0; + info->regs.__null_es = 0; + +/* we are clearing fs,gs later just before "jmp resume_userspace", + * because starting with Linux 2.1.x they aren't no longer saved/restored + */ + +/* + * The eflags register is also special: we cannot trust that the user + * has set it up safely, so this makes sure interrupt etc flags are + * inherited from protected mode. + */ + VEFLAGS = info->regs.eflags; + info->regs.eflags &= SAFE_MASK; + info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK; + info->regs.eflags |= VM_MASK; + + switch (info->cpu_type) { + case CPU_286: + tsk->thread.v86mask = 0; + break; + case CPU_386: + tsk->thread.v86mask = NT_MASK | IOPL_MASK; + break; + case CPU_486: + tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK; + break; + default: + tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK; + break; + } + +/* + * Save old state, set default return value (%eax) to 0 + */ + info->regs32->eax = 0; + tsk->thread.saved_esp0 = tsk->thread.esp0; + savesegment(fs, tsk->thread.saved_fs); + savesegment(gs, tsk->thread.saved_gs); + +#ifndef CONFIG_X86_NO_TSS + tss = &per_cpu(init_tss, get_cpu()); +#endif + tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; + if (cpu_has_sep) + tsk->thread.sysenter_cs = 0; + load_esp0(tss, &tsk->thread); + put_cpu(); + + tsk->thread.screen_bitmap = info->screen_bitmap; + if (info->flags & VM86_SCREEN_BITMAP) + mark_screen_rdonly(tsk->mm); + __asm__ __volatile__( + "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" + "movl %0,%%esp\n\t" + "movl %1,%%ebp\n\t" + "jmp resume_userspace" + : /* no outputs */ + :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax"); + /* we never return here */ +} + +static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval) +{ + struct pt_regs * regs32; + + regs32 = save_v86_state(regs16); + regs32->eax = retval; + __asm__ __volatile__("movl %0,%%esp\n\t" + "movl %1,%%ebp\n\t" + "jmp resume_userspace" + : : "r" (regs32), "r" (current_thread_info())); +} + +static inline void set_IF(struct kernel_vm86_regs * regs) +{ + VEFLAGS |= VIF_MASK; + if (VEFLAGS & VIP_MASK) + return_to_32bit(regs, VM86_STI); +} + +static inline void clear_IF(struct kernel_vm86_regs * regs) +{ + VEFLAGS &= ~VIF_MASK; +} + +static inline void clear_TF(struct kernel_vm86_regs * regs) +{ + regs->eflags &= ~TF_MASK; +} + +static inline void clear_AC(struct kernel_vm86_regs * regs) +{ + regs->eflags &= ~AC_MASK; +} + +/* It is correct to call set_IF(regs) from the set_vflags_* + * functions. However someone forgot to call clear_IF(regs) + * in the opposite case. + * After the command sequence CLI PUSHF STI POPF you should + * end up with interrups disabled, but you ended up with + * interrupts enabled. + * ( I was testing my own changes, but the only bug I + * could find was in a function I had not changed. ) + * [KD] + */ + +static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs) +{ + set_flags(VEFLAGS, eflags, current->thread.v86mask); + set_flags(regs->eflags, eflags, SAFE_MASK); + if (eflags & IF_MASK) + set_IF(regs); + else + clear_IF(regs); +} + +static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) +{ + set_flags(VFLAGS, flags, current->thread.v86mask); + set_flags(regs->eflags, flags, SAFE_MASK); + if (flags & IF_MASK) + set_IF(regs); + else + clear_IF(regs); +} + +static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) +{ + unsigned long flags = regs->eflags & RETURN_MASK; + + if (VEFLAGS & VIF_MASK) + flags |= IF_MASK; + flags |= IOPL_MASK; + return flags | (VEFLAGS & current->thread.v86mask); +} + +static inline int is_revectored(int nr, struct revectored_struct * bitmap) +{ + __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" + :"=r" (nr) + :"m" (*bitmap),"r" (nr)); + return nr; +} + +#define val_byte(val, n) (((__u8 *)&val)[n]) + +#define pushb(base, ptr, val, err_label) \ + do { \ + __u8 __val = val; \ + ptr--; \ + if (put_user(__val, base + ptr) < 0) \ + goto err_label; \ + } while(0) + +#define pushw(base, ptr, val, err_label) \ + do { \ + __u16 __val = val; \ + ptr--; \ + if (put_user(val_byte(__val, 1), base + ptr) < 0) \ + goto err_label; \ + ptr--; \ + if (put_user(val_byte(__val, 0), base + ptr) < 0) \ + goto err_label; \ + } while(0) + +#define pushl(base, ptr, val, err_label) \ + do { \ + __u32 __val = val; \ + ptr--; \ + if (put_user(val_byte(__val, 3), base + ptr) < 0) \ + goto err_label; \ + ptr--; \ + if (put_user(val_byte(__val, 2), base + ptr) < 0) \ + goto err_label; \ + ptr--; \ + if (put_user(val_byte(__val, 1), base + ptr) < 0) \ + goto err_label; \ + ptr--; \ + if (put_user(val_byte(__val, 0), base + ptr) < 0) \ + goto err_label; \ + } while(0) + +#define popb(base, ptr, err_label) \ + ({ \ + __u8 __res; \ + if (get_user(__res, base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + __res; \ + }) + +#define popw(base, ptr, err_label) \ + ({ \ + __u16 __res; \ + if (get_user(val_byte(__res, 0), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + if (get_user(val_byte(__res, 1), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + __res; \ + }) + +#define popl(base, ptr, err_label) \ + ({ \ + __u32 __res; \ + if (get_user(val_byte(__res, 0), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + if (get_user(val_byte(__res, 1), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + if (get_user(val_byte(__res, 2), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + if (get_user(val_byte(__res, 3), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + __res; \ + }) + +/* There are so many possible reasons for this function to return + * VM86_INTx, so adding another doesn't bother me. We can expect + * userspace programs to be able to handle it. (Getting a problem + * in userspace is always better than an Oops anyway.) [KD] + */ +static void do_int(struct kernel_vm86_regs *regs, int i, + unsigned char __user * ssp, unsigned short sp) +{ + unsigned long __user *intr_ptr; + unsigned long segoffs; + + if (regs->cs == BIOSSEG) + goto cannot_handle; + if (is_revectored(i, &KVM86->int_revectored)) + goto cannot_handle; + if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored)) + goto cannot_handle; + intr_ptr = (unsigned long __user *) (i << 2); + if (get_user(segoffs, intr_ptr)) + goto cannot_handle; + if ((segoffs >> 16) == BIOSSEG) + goto cannot_handle; + pushw(ssp, sp, get_vflags(regs), cannot_handle); + pushw(ssp, sp, regs->cs, cannot_handle); + pushw(ssp, sp, IP(regs), cannot_handle); + regs->cs = segoffs >> 16; + SP(regs) -= 6; + IP(regs) = segoffs & 0xffff; + clear_TF(regs); + clear_IF(regs); + clear_AC(regs); + return; + +cannot_handle: + return_to_32bit(regs, VM86_INTx + (i << 8)); +} + +int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno) +{ + if (VMPI.is_vm86pus) { + if ( (trapno==3) || (trapno==1) ) + return_to_32bit(regs, VM86_TRAP + (trapno << 8)); + do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs)); + return 0; + } + if (trapno !=1) + return 1; /* we let this handle by the calling routine */ + if (current->ptrace & PT_PTRACED) { + unsigned long flags; + spin_lock_irqsave(¤t->sighand->siglock, flags); + sigdelset(¤t->blocked, SIGTRAP); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + send_sig(SIGTRAP, current, 1); + current->thread.trap_no = trapno; + current->thread.error_code = error_code; + return 0; +} + +void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) +{ + unsigned char opcode; + unsigned char __user *csp; + unsigned char __user *ssp; + unsigned short ip, sp, orig_flags; + int data32, pref_done; + +#define CHECK_IF_IN_TRAP \ + if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ + newflags |= TF_MASK +#define VM86_FAULT_RETURN do { \ + if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ + return_to_32bit(regs, VM86_PICRETURN); \ + if (orig_flags & TF_MASK) \ + handle_vm86_trap(regs, 0, 1); \ + return; } while (0) + + orig_flags = *(unsigned short *)®s->eflags; + + csp = (unsigned char __user *) (regs->cs << 4); + ssp = (unsigned char __user *) (regs->ss << 4); + sp = SP(regs); + ip = IP(regs); + + data32 = 0; + pref_done = 0; + do { + switch (opcode = popb(csp, ip, simulate_sigsegv)) { + case 0x66: /* 32-bit data */ data32=1; break; + case 0x67: /* 32-bit address */ break; + case 0x2e: /* CS */ break; + case 0x3e: /* DS */ break; + case 0x26: /* ES */ break; + case 0x36: /* SS */ break; + case 0x65: /* GS */ break; + case 0x64: /* FS */ break; + case 0xf2: /* repnz */ break; + case 0xf3: /* rep */ break; + default: pref_done = 1; + } + } while (!pref_done); + + switch (opcode) { + + /* pushf */ + case 0x9c: + if (data32) { + pushl(ssp, sp, get_vflags(regs), simulate_sigsegv); + SP(regs) -= 4; + } else { + pushw(ssp, sp, get_vflags(regs), simulate_sigsegv); + SP(regs) -= 2; + } + IP(regs) = ip; + VM86_FAULT_RETURN; + + /* popf */ + case 0x9d: + { + unsigned long newflags; + if (data32) { + newflags=popl(ssp, sp, simulate_sigsegv); + SP(regs) += 4; + } else { + newflags = popw(ssp, sp, simulate_sigsegv); + SP(regs) += 2; + } + IP(regs) = ip; + CHECK_IF_IN_TRAP; + if (data32) { + set_vflags_long(newflags, regs); + } else { + set_vflags_short(newflags, regs); + } + VM86_FAULT_RETURN; + } + + /* int xx */ + case 0xcd: { + int intno=popb(csp, ip, simulate_sigsegv); + IP(regs) = ip; + if (VMPI.vm86dbg_active) { + if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] ) + return_to_32bit(regs, VM86_INTx + (intno << 8)); + } + do_int(regs, intno, ssp, sp); + return; + } + + /* iret */ + case 0xcf: + { + unsigned long newip; + unsigned long newcs; + unsigned long newflags; + if (data32) { + newip=popl(ssp, sp, simulate_sigsegv); + newcs=popl(ssp, sp, simulate_sigsegv); + newflags=popl(ssp, sp, simulate_sigsegv); + SP(regs) += 12; + } else { + newip = popw(ssp, sp, simulate_sigsegv); + newcs = popw(ssp, sp, simulate_sigsegv); + newflags = popw(ssp, sp, simulate_sigsegv); + SP(regs) += 6; + } + IP(regs) = newip; + regs->cs = newcs; + CHECK_IF_IN_TRAP; + if (data32) { + set_vflags_long(newflags, regs); + } else { + set_vflags_short(newflags, regs); + } + VM86_FAULT_RETURN; + } + + /* cli */ + case 0xfa: + IP(regs) = ip; + clear_IF(regs); + VM86_FAULT_RETURN; + + /* sti */ + /* + * Damn. This is incorrect: the 'sti' instruction should actually + * enable interrupts after the /next/ instruction. Not good. + * + * Probably needs some horsing around with the TF flag. Aiee.. + */ + case 0xfb: + IP(regs) = ip; + set_IF(regs); + VM86_FAULT_RETURN; + + default: + return_to_32bit(regs, VM86_UNKNOWN); + } + + return; + +simulate_sigsegv: + /* FIXME: After a long discussion with Stas we finally + * agreed, that this is wrong. Here we should + * really send a SIGSEGV to the user program. + * But how do we create the correct context? We + * are inside a general protection fault handler + * and has just returned from a page fault handler. + * The correct context for the signal handler + * should be a mixture of the two, but how do we + * get the information? [KD] + */ + return_to_32bit(regs, VM86_UNKNOWN); +} + +/* ---------------- vm86 special IRQ passing stuff ----------------- */ + +#define VM86_IRQNAME "vm86irq" + +static struct vm86_irqs { + struct task_struct *tsk; + int sig; +} vm86_irqs[16]; + +static DEFINE_SPINLOCK(irqbits_lock); +static int irqbits; + +#define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \ + | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \ + | (1 << SIGUNUSED) ) + +static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs) +{ + int irq_bit; + unsigned long flags; + + spin_lock_irqsave(&irqbits_lock, flags); + irq_bit = 1 << intno; + if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk) + goto out; + irqbits |= irq_bit; + if (vm86_irqs[intno].sig) + send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1); + /* + * IRQ will be re-enabled when user asks for the irq (whether + * polling or as a result of the signal) + */ + disable_irq_nosync(intno); + spin_unlock_irqrestore(&irqbits_lock, flags); + return IRQ_HANDLED; + +out: + spin_unlock_irqrestore(&irqbits_lock, flags); + return IRQ_NONE; +} + +static inline void free_vm86_irq(int irqnumber) +{ + unsigned long flags; + + free_irq(irqnumber, NULL); + vm86_irqs[irqnumber].tsk = NULL; + + spin_lock_irqsave(&irqbits_lock, flags); + irqbits &= ~(1 << irqnumber); + spin_unlock_irqrestore(&irqbits_lock, flags); +} + +void release_vm86_irqs(struct task_struct *task) +{ + int i; + for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++) + if (vm86_irqs[i].tsk == task) + free_vm86_irq(i); +} + +static inline int get_and_reset_irq(int irqnumber) +{ + int bit; + unsigned long flags; + int ret = 0; + + if (invalid_vm86_irq(irqnumber)) return 0; + if (vm86_irqs[irqnumber].tsk != current) return 0; + spin_lock_irqsave(&irqbits_lock, flags); + bit = irqbits & (1 << irqnumber); + irqbits &= ~bit; + if (bit) { + enable_irq(irqnumber); + ret = 1; + } + + spin_unlock_irqrestore(&irqbits_lock, flags); + return ret; +} + + +static int do_vm86_irq_handling(int subfunction, int irqnumber) +{ + int ret; + switch (subfunction) { + case VM86_GET_AND_RESET_IRQ: { + return get_and_reset_irq(irqnumber); + } + case VM86_GET_IRQ_BITS: { + return irqbits; + } + case VM86_REQUEST_IRQ: { + int sig = irqnumber >> 8; + int irq = irqnumber & 255; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM; + if (invalid_vm86_irq(irq)) return -EPERM; + if (vm86_irqs[irq].tsk) return -EPERM; + ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL); + if (ret) return ret; + vm86_irqs[irq].sig = sig; + vm86_irqs[irq].tsk = current; + return irq; + } + case VM86_FREE_IRQ: { + if (invalid_vm86_irq(irqnumber)) return -EPERM; + if (!vm86_irqs[irqnumber].tsk) return 0; + if (vm86_irqs[irqnumber].tsk != current) return -EPERM; + free_vm86_irq(irqnumber); + return 0; + } + } + return -EINVAL; +} + diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S Fri Feb 24 22:41:08 2006 @@ -0,0 +1,17 @@ +#include <linux/init.h> + +__INITDATA + + .globl vsyscall_int80_start, vsyscall_int80_end +vsyscall_int80_start: + .incbin "arch/i386/kernel/vsyscall-int80.so" +vsyscall_int80_end: + +#ifdef CONFIG_X86_SYSENTER + .globl vsyscall_sysenter_start, vsyscall_sysenter_end +vsyscall_sysenter_start: + .incbin "arch/i386/kernel/vsyscall-sysenter.so" +vsyscall_sysenter_end: +#endif + +__FINIT diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/asm-offsets.c --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/asm-offsets.c Fri Feb 24 22:41:08 2006 @@ -0,0 +1,74 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed to extract + * and format the required data. + */ + +#include <linux/sched.h> +#include <linux/stddef.h> +#include <linux/errno.h> +#include <linux/hardirq.h> +#include <linux/suspend.h> +#include <asm/pda.h> +#include <asm/processor.h> +#include <asm/segment.h> +#include <asm/thread_info.h> +#include <asm/ia32.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +int main(void) +{ +#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) + ENTRY(state); + ENTRY(flags); + ENTRY(thread); + ENTRY(pid); + BLANK(); +#undef ENTRY +#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry)) + ENTRY(flags); + ENTRY(addr_limit); + ENTRY(preempt_count); + ENTRY(status); + BLANK(); +#undef ENTRY +#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) + ENTRY(kernelstack); + ENTRY(oldrsp); + ENTRY(pcurrent); + ENTRY(irqcount); + ENTRY(cpunumber); + ENTRY(irqstackptr); + ENTRY(data_offset); + BLANK(); +#undef ENTRY +#ifdef CONFIG_IA32_EMULATION +#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) + ENTRY(eax); + ENTRY(ebx); + ENTRY(ecx); + ENTRY(edx); + ENTRY(esi); + ENTRY(edi); + ENTRY(ebp); + ENTRY(esp); + ENTRY(eip); + BLANK(); +#undef ENTRY + DEFINE(IA32_RT_SIGFRAME_sigcontext, + offsetof (struct rt_sigframe32, uc.uc_mcontext)); + BLANK(); +#endif + DEFINE(pbe_address, offsetof(struct pbe, address)); + DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); + DEFINE(pbe_next, offsetof(struct pbe, next)); +#ifndef CONFIG_X86_NO_TSS + BLANK(); + DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); +#endif + return 0; +} diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c Fri Feb 24 22:41:08 2006 @@ -0,0 +1,52 @@ +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/init_task.h> +#include <linux/fs.h> +#include <linux/mqueue.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/desc.h> + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); + +EXPORT_SYMBOL(init_mm); + +/* + * Initial task structure. + * + * We need to make sure that this is 8192-byte aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = + { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); + +#ifndef CONFIG_X86_NO_TSS +/* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data.cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +#endif + +#define ALIGN_TO_4K __attribute__((section(".data.init_task"))) diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/video/Kconfig --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/drivers/video/Kconfig Fri Feb 24 22:41:08 2006 @@ -0,0 +1,1462 @@ +# +# Video configuration +# + +menu "Graphics support" + +config FB + tristate "Support for frame buffer devices" + ---help--- + The frame buffer device provides an abstraction for the graphics + hardware. It represents the frame buffer of some video hardware and + allows application software to access the graphics hardware through + a well-defined interface, so the software doesn't need to know + anything about the low-level (hardware register) stuff. + + Frame buffer devices work identically across the different + architectures supported by Linux and make the implementation of + application programs easier and more portable; at this point, an X + server exists which uses the frame buffer device exclusively. + On several non-X86 architectures, the frame buffer device is the + only way to use the graphics hardware. + + The device is accessed through special device nodes, usually located + in the /dev directory, i.e. /dev/fb*. + + You need an utility program called fbset to make full use of frame + buffer devices. Please read <file:Documentation/fb/framebuffer.txt> + and the Framebuffer-HOWTO at + <http://www.tahallah.demon.co.uk/programming/prog.html> for more + information. + + Say Y here and to the driver for your graphics board below if you + are compiling a kernel for a non-x86 architecture. + + If you are compiling for the x86 architecture, you can say Y if you + want to play with it, but it is not essential. Please note that + running graphical applications that directly touch the hardware + (e.g. an accelerated X server) and that are not frame buffer + device-aware may cause unexpected results. If unsure, say N. + +config FB_CFB_FILLRECT + tristate + depends on FB + default n + ---help--- + Include the cfb_fillrect function for generic software rectangle + filling. This is used by drivers that don't provide their own + (accelerated) version. + +config FB_CFB_COPYAREA + tristate + depends on FB + default n + ---help--- + Include the cfb_copyarea function for generic software area copying. + This is used by drivers that don't provide their own (accelerated) + version. + +config FB_CFB_IMAGEBLIT + tristate + depends on FB + default n + ---help--- + Include the cfb_imageblit function for generic software image + blitting. This is used by drivers that don't provide their own + (accelerated) version. + +config FB_MACMODES + tristate + depends on FB + default n + +config FB_MODE_HELPERS + bool "Enable Video Mode Handling Helpers" + depends on FB + default n + ---help--- + This enables functions for handling video modes using the + Generalized Timing Formula and the EDID parser. A few drivers rely + on this feature such as the radeonfb, rivafb, and the i810fb. If + your driver does not take advantage of this feature, choosing Y will + just increase the kernel size by about 5K. + +config FB_TILEBLITTING + bool "Enable Tile Blitting Support" + depends on FB + default n + ---help--- + This enables tile blitting. Tile blitting is a drawing technique + where the screen is divided into rectangular sections (tiles), whereas + the standard blitting divides the screen into pixels. Because the + default drawing element is a tile, drawing functions will be passed + parameters in terms of number of tiles instead of number of pixels. + For example, to draw a single character, instead of using bitmaps, + an index to an array of bitmaps will be used. To clear or move a + rectangular section of a screen, the rectangle will be described in + terms of number of tiles in the x- and y-axis. + + This is particularly important to one driver, matroxfb. If + unsure, say N. + +config FB_CIRRUS + tristate "Cirrus Logic support" + depends on FB && (ZORRO || PCI) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + This enables support for Cirrus Logic GD542x/543x based boards on + Amiga: SD64, Piccolo, Picasso II/II+, Picasso IV, or EGS Spectrum. + + If you have a PCI-based system, this enables support for these + chips: GD-543x, GD-544x, GD-5480. + + Please read the file <file:Documentation/fb/cirrusfb.txt>. + + Say N unless you have such a graphics board or plan to get one + before you next recompile the kernel. + +config FB_PM2 + tristate "Permedia2 support" + depends on FB && ((AMIGA && BROKEN) || PCI) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the Permedia2 AGP frame + buffer card from ASK, aka `Graphic Blaster Exxtreme'. There is a + product page at + <http://www.ask.com.hk/product/Permedia%202/permedia2.htm>. + +config FB_PM2_FIFO_DISCONNECT + bool "enable FIFO disconnect feature" + depends on FB_PM2 && PCI + help + Support the Permedia2 FIFO disconnect feature (see CONFIG_FB_PM2). + +config FB_ARMCLCD + tristate "ARM PrimeCell PL110 support" + depends on FB && ARM && ARM_AMBA + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This framebuffer device driver is for the ARM PrimeCell PL110 + Colour LCD controller. ARM PrimeCells provide the building + blocks for System on a Chip devices. + + If you want to compile this as a module (=code which can be + inserted into and removed from the running kernel), say M + here and read <file:Documentation/modules.txt>. The module + will be called amba-clcd. + +config FB_ACORN + bool "Acorn VIDC support" + depends on (FB = y) && ARM && (ARCH_ACORN || ARCH_CLPS7500) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the Acorn VIDC graphics + hardware found in Acorn RISC PCs and other ARM-based machines. If + unsure, say N. + +config FB_CLPS711X + bool "CLPS711X LCD support" + depends on (FB = y) && ARM && ARCH_CLPS711X + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Say Y to enable the Framebuffer driver for the CLPS7111 and + EP7212 processors. + +config FB_SA1100 + bool "SA-1100 LCD support" + depends on (FB = y) && ARM && ARCH_SA1100 + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is a framebuffer device for the SA-1100 LCD Controller. + See <http://www.linux-fbdev.org/> for information on framebuffer + devices. + + If you plan to use the LCD display with your SA-1100 system, say + Y here. + +config FB_IMX + tristate "Motorola i.MX LCD support" + depends on FB && ARM && ARCH_IMX + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + +config FB_CYBER2000 + tristate "CyberPro 2000/2010/5000 support" + depends on FB && PCI && (BROKEN || !SPARC64) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This enables support for the Integraphics CyberPro 20x0 and 5000 + VGA chips used in the Rebel.com Netwinder and other machines. + Say Y if you have a NetWinder or a graphics card containing this + device, otherwise say N. + +config FB_APOLLO + bool + depends on (FB = y) && APOLLO + default y + select FB_CFB_FILLRECT + select FB_CFB_IMAGEBLIT + +config FB_Q40 + bool + depends on (FB = y) && Q40 + default y + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + +config FB_AMIGA + tristate "Amiga native chipset support" + depends on FB && AMIGA + help + This is the frame buffer device driver for the builtin graphics + chipset found in Amigas. + + To compile this driver as a module, choose M here: the + module will be called amifb. + +config FB_AMIGA_OCS + bool "Amiga OCS chipset support" + depends on FB_AMIGA + help + This enables support for the original Agnus and Denise video chips, + found in the Amiga 1000 and most A500's and A2000's. If you intend + to run Linux on any of these systems, say Y; otherwise say N. + +config FB_AMIGA_ECS + bool "Amiga ECS chipset support" + depends on FB_AMIGA + help + This enables support for the Enhanced Chip Set, found in later + A500's, later A2000's, the A600, the A3000, the A3000T and CDTV. If + you intend to run Linux on any of these systems, say Y; otherwise + say N. + +config FB_AMIGA_AGA + bool "Amiga AGA chipset support" + depends on FB_AMIGA + help + This enables support for the Advanced Graphics Architecture (also + known as the AGA or AA) Chip Set, found in the A1200, A4000, A4000T + and CD32. If you intend to run Linux on any of these systems, say Y; + otherwise say N. + +config FB_CYBER + tristate "Amiga CyberVision 64 support" + depends on FB && ZORRO && BROKEN + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This enables support for the Cybervision 64 graphics card from + Phase5. Please note that its use is not all that intuitive (i.e. if + you have any questions, be sure to ask!). Say N unless you have a + Cybervision 64 or plan to get one before you next recompile the + kernel. Please note that this driver DOES NOT support the + Cybervision 64/3D card, as they use incompatible video chips. + +config FB_VIRGE + bool "Amiga CyberVision 64/3D support " + depends on (FB = y) && ZORRO && BROKEN + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This enables support for the Cybervision 64/3D graphics card from + Phase5. Please note that its use is not all that intuitive (i.e. if + you have any questions, be sure to ask!). Say N unless you have a + Cybervision 64/3D or plan to get one before you next recompile the + kernel. Please note that this driver DOES NOT support the older + Cybervision 64 card, as they use incompatible video chips. + +config FB_RETINAZ3 + tristate "Amiga Retina Z3 support" + depends on (FB = y) && ZORRO && BROKEN + help + This enables support for the Retina Z3 graphics card. Say N unless + you have a Retina Z3 or plan to get one before you next recompile + the kernel. + +config FB_FM2 + bool "Amiga FrameMaster II/Rainbow II support" + depends on (FB = y) && ZORRO + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the Amiga FrameMaster + card from BSC (exhibited 1992 but not shipped as a CBM product). + +config FB_ARC + tristate "Arc Monochrome LCD board support" + depends on FB && X86 + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This enables support for the Arc Monochrome LCD board. The board + is based on the KS-108 lcd controller and is typically a matrix + of 2*n chips. This driver was tested with a 128x64 panel. This + driver supports it for use with x86 SBCs through a 16 bit GPIO + interface (8 bit data, 8 bit control). If you anticpate using + this driver, say Y or M; otherwise say N. You must specify the + GPIO IO address to be used for setting control and data. + +config FB_ATARI + bool "Atari native chipset support" + depends on (FB = y) && ATARI && BROKEN + help + This is the frame buffer device driver for the builtin graphics + chipset found in Ataris. + +config FB_OF + bool "Open Firmware frame buffer device support" + depends on (FB = y) && (PPC64 || PPC_OF) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_MACMODES + help + Say Y if you want support with Open Firmware for your graphics + board. + +config FB_CONTROL + bool "Apple \"control\" display support" + depends on (FB = y) && PPC_PMAC + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_MACMODES + help + This driver supports a frame buffer for the graphics adapter in the + Power Macintosh 7300 and others. + +config FB_PLATINUM + bool "Apple \"platinum\" display support" + depends on (FB = y) && PPC_PMAC + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_MACMODES + help + This driver supports a frame buffer for the "platinum" graphics + adapter in some Power Macintoshes. + +config FB_VALKYRIE + bool "Apple \"valkyrie\" display support" + depends on (FB = y) && (MAC || PPC_PMAC) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_MACMODES + help + This driver supports a frame buffer for the "valkyrie" graphics + adapter in some Power Macintoshes. + +config FB_CT65550 + bool "Chips 65550 display support" + depends on (FB = y) && PPC + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the Chips & Technologies + 65550 graphics chip in PowerBooks. + +config FB_ASILIANT + bool "Asiliant (Chips) 69000 display support" + depends on (FB = y) && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + +config FB_IMSTT + bool "IMS Twin Turbo display support" + depends on (FB = y) && PCI + select FB_CFB_IMAGEBLIT + select FB_MACMODES if PPC + help + The IMS Twin Turbo is a PCI-based frame buffer card bundled with + many Macintosh and compatible computers. + +config FB_VGA16 + tristate "VGA 16-color graphics support" + depends on FB && (X86 || PPC) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for VGA 16 color graphic + cards. Say Y if you have such a card. + + To compile this driver as a module, choose M here: the + module will be called vga16fb. + +config FB_STI + tristate "HP STI frame buffer device support" + depends on FB && PARISC + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + default y + ---help--- + STI refers to the HP "Standard Text Interface" which is a set of + BIOS routines contained in a ROM chip in HP PA-RISC based machines. + Enabling this option will implement the linux framebuffer device + using calls to the STI BIOS routines for initialisation. + + If you enable this option, you will get a planar framebuffer device + /dev/fb which will work on the most common HP graphic cards of the + NGLE family, including the artist chips (in the 7xx and Bxxx series), + HCRX, HCRX24, CRX, CRX24 and VisEG series. + + It is safe to enable this option, so you should probably say "Y". + +config FB_MAC + bool "Generic Macintosh display support" + depends on (FB = y) && MAC + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_MACMODES + +# bool ' Apple DAFB display support' CONFIG_FB_DAFB +config FB_HP300 + bool + depends on (FB = y) && HP300 + select FB_CFB_FILLRECT + select FB_CFB_IMAGEBLIT + default y + +config FB_TGA + tristate "TGA framebuffer support" + depends on FB && ALPHA + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for generic TGA graphic + cards. Say Y if you have one of those. + +config FB_VESA + bool "VESA VGA graphics support" + depends on (FB = y) && X86 + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for generic VESA 2.0 + compliant graphic cards. The older VESA 1.2 cards are not supported. + You will get a boot time penguin logo at no additional cost. Please + read <file:Documentation/fb/vesafb.txt>. If unsure, say Y. + +config VIDEO_SELECT + bool + depends on FB_VESA + default y + +config FB_HGA + tristate "Hercules mono graphics support" + depends on FB && X86 + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Say Y here if you have a Hercules mono graphics card. + + To compile this driver as a module, choose M here: the + module will be called hgafb. + + As this card technology is 15 years old, most people will answer N + here. + +config FB_HGA_ACCEL + bool "Hercules mono Acceleration functions (EXPERIMENTAL)" + depends on FB_HGA && EXPERIMENTAL + ---help--- + This will compile the Hercules mono graphics with + acceleration functions. + + +config VIDEO_SELECT + bool + depends on (FB = y) && X86 && !XEN + default y + +config FB_SGIVW + tristate "SGI Visual Workstation framebuffer support" + depends on FB && X86_VISWS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + SGI Visual Workstation support for framebuffer graphics. + +config FB_GBE + bool "SGI Graphics Backend frame buffer support" + depends on (FB = y) && (SGI_IP32 || X86_VISWS) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for SGI Graphics Backend. + This chip is used in SGI O2 and Visual Workstation 320/540. + +config FB_GBE_MEM + int "Video memory size in MB" + depends on FB_GBE + default 8 + help + This is the amount of memory reserved for the framebuffer, + which can be any value between 1MB and 8MB. + +config FB_SUN3 + bool "Sun3 framebuffer support" + depends on (FB = y) && (SUN3 || SUN3X) && BROKEN + +config FB_SBUS + bool "SBUS and UPA framebuffers" + depends on (FB = y) && SPARC + help + Say Y if you want support for SBUS or UPA based frame buffer device. + +config FB_BW2 + bool "BWtwo support" + depends on (FB = y) && (SPARC && FB_SBUS || (SUN3 || SUN3X) && FB_SUN3) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the BWtwo frame buffer. + +config FB_CG3 + bool "CGthree support" + depends on (FB = y) && (SPARC && FB_SBUS || (SUN3 || SUN3X) && FB_SUN3) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the CGthree frame buffer. + +config FB_CG6 + bool "CGsix (GX,TurboGX) support" + depends on (FB = y) && (SPARC && FB_SBUS || (SUN3 || SUN3X) && FB_SUN3) + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the CGsix (GX, TurboGX) + frame buffer. + +config FB_PVR2 + tristate "NEC PowerVR 2 display support" + depends on FB && SH_DREAMCAST + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Say Y here if you have a PowerVR 2 card in your box. If you plan to + run linux on your Dreamcast, you will have to say Y here. + This driver may or may not work on other PowerVR 2 cards, but is + totally untested. Use at your own risk. If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called pvr2fb. + + You can pass several parameters to the driver at boot time or at + module load time. The parameters look like "video=pvr2:XXX", where + the meaning of XXX can be found at the end of the main source file + (<file:drivers/video/pvr2fb.c>). Please see the file + <file:Documentation/fb/pvr2fb.txt>. + +config FB_EPSON1355 + bool "Epson 1355 framebuffer support" + depends on (FB = y) && (SUPERH || ARCH_CEIVA) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Build in support for the SED1355 Epson Research Embedded RAMDAC + LCD/CRT Controller (since redesignated as the S1D13505) as a + framebuffer. Product specs at + <http://www.erd.epson.com/vdc/html/products.htm>. + +config FB_S1D13XXX + tristate "Epson S1D13XXX framebuffer support" + depends on FB + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Support for S1D13XXX framebuffer device family (currently only + working with S1D13806). Product specs at + <http://www.erd.epson.com/vdc/html/legacy_13xxx.htm> + +config FB_NVIDIA + tristate "nVidia Framebuffer Support" + depends on FB && PCI + select I2C_ALGOBIT if FB_NVIDIA_I2C + select I2C if FB_NVIDIA_I2C + select FB_MODE_HELPERS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This driver supports graphics boards with the nVidia chips, TNT + and newer. For very old chipsets, such as the RIVA128, then use + the rivafb. + Say Y if you have such a graphics board. + + To compile this driver as a module, choose M here: the + module will be called nvidiafb. + +config FB_NVIDIA_I2C + bool "Enable DDC Support" + depends on FB_NVIDIA + help + This enables I2C support for nVidia Chipsets. This is used + only for getting EDID information from the attached display + allowing for robust video mode handling and switching. + + Because fbdev-2.6 requires that drivers must be able to + independently validate video mode parameters, you should say Y + here. + +config FB_RIVA + tristate "nVidia Riva support" + depends on FB && PCI + select I2C_ALGOBIT if FB_RIVA_I2C + select I2C if FB_RIVA_I2C + select FB_MODE_HELPERS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This driver supports graphics boards with the nVidia Riva/Geforce + chips. + Say Y if you have such a graphics board. + + To compile this driver as a module, choose M here: the + module will be called rivafb. + +config FB_RIVA_I2C + bool "Enable DDC Support" + depends on FB_RIVA + help + This enables I2C support for nVidia Chipsets. This is used + only for getting EDID information from the attached display + allowing for robust video mode handling and switching. + + Because fbdev-2.6 requires that drivers must be able to + independently validate video mode parameters, you should say Y + here. + +config FB_RIVA_DEBUG + bool "Lots of debug output from Riva(nVidia) driver" + depends on FB_RIVA + default n + help + Say Y here if you want the Riva driver to output all sorts + of debugging informations to provide to the maintainer when + something goes wrong. + +config FB_I810 + tristate "Intel 810/815 support (EXPERIMENTAL)" + depends on FB && EXPERIMENTAL && PCI && X86_32 + select AGP + select AGP_INTEL + select FB_MODE_HELPERS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This driver supports the on-board graphics built in to the Intel 810 + and 815 chipsets. Say Y if you have and plan to use such a board. + + To compile this driver as a module, choose M here: the + module will be called i810fb. + + For more information, please read + <file:Documentation/fb/intel810.txt> + +config FB_I810_GTF + bool "use VESA Generalized Timing Formula" + depends on FB_I810 + help + If you say Y, then the VESA standard, Generalized Timing Formula + or GTF, will be used to calculate the required video timing values + per video mode. Since the GTF allows nondiscrete timings + (nondiscrete being a range of values as opposed to discrete being a + set of values), you'll be able to use any combination of horizontal + and vertical resolutions, and vertical refresh rates without having + to specify your own timing parameters. This is especially useful + to maximize the performance of an aging display, or if you just + have a display with nonstandard dimensions. A VESA compliant + monitor is recommended, but can still work with non-compliant ones. + If you need or want this, then select this option. The timings may + not be compliant with Intel's recommended values. Use at your own + risk. + + If you say N, the driver will revert to discrete video timings + using a set recommended by Intel in their documentation. + + If unsure, say N. + +config FB_I810_I2C + bool "Enable DDC Support" + depends on FB_I810 && FB_I810_GTF + select I2C + select I2C_ALGOBIT + help + +config FB_INTEL + tristate "Intel 830M/845G/852GM/855GM/865G support (EXPERIMENTAL)" + depends on FB && EXPERIMENTAL && PCI && X86_32 + select AGP + select AGP_INTEL + select FB_MODE_HELPERS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This driver supports the on-board graphics built in to the Intel + 830M/845G/852GM/855GM/865G chipsets. + Say Y if you have and plan to use such a board. + + To compile this driver as a module, choose M here: the + module will be called intelfb. + +config FB_INTEL_DEBUG + bool "Intel driver Debug Messages" + depends on FB_INTEL + ---help--- + Say Y here if you want the Intel driver to output all sorts + of debugging informations to provide to the maintainer when + something goes wrong. + +config FB_MATROX + tristate "Matrox acceleration" + depends on FB && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_TILEBLITTING + select FB_MACMODES if PPC_PMAC + ---help--- + Say Y here if you have a Matrox Millennium, Matrox Millennium II, + Matrox Mystique, Matrox Mystique 220, Matrox Productiva G100, Matrox + Mystique G200, Matrox Millennium G200, Matrox Marvel G200 video, + Matrox G400, G450 or G550 card in your box. + + To compile this driver as a module, choose M here: the + module will be called matroxfb. + + You can pass several parameters to the driver at boot time or at + module load time. The parameters look like "video=matrox:XXX", and + are described in <file:Documentation/fb/matroxfb.txt>. + +config FB_MATROX_MILLENIUM + bool "Millennium I/II support" + depends on FB_MATROX + help + Say Y here if you have a Matrox Millennium or Matrox Millennium II + video card. If you select "Advanced lowlevel driver options" below, + you should check 4 bpp packed pixel, 8 bpp packed pixel, 16 bpp + packed pixel, 24 bpp packed pixel and 32 bpp packed pixel. You can + also use font widths different from 8. + +config FB_MATROX_MYSTIQUE + bool "Mystique support" + depends on FB_MATROX + help + Say Y here if you have a Matrox Mystique or Matrox Mystique 220 + video card. If you select "Advanced lowlevel driver options" below, + you should check 8 bpp packed pixel, 16 bpp packed pixel, 24 bpp + packed pixel and 32 bpp packed pixel. You can also use font widths + different from 8. + +config FB_MATROX_G + bool "G100/G200/G400/G450/G550 support" + depends on FB_MATROX + ---help--- + Say Y here if you have a Matrox G100, G200, G400, G450 or G550 based + video card. If you select "Advanced lowlevel driver options", you + should check 8 bpp packed pixel, 16 bpp packed pixel, 24 bpp packed + pixel and 32 bpp packed pixel. You can also use font widths + different from 8. + + If you need support for G400 secondary head, you must first say Y to + "I2C support" in the character devices section, and then to + "Matrox I2C support" and "G400 second head support" here in the + framebuffer section. G450/G550 secondary head and digital output + are supported without additional modules. + + The driver starts in monitor mode. You must use the matroxset tool + (available at <ftp://platan.vc.cvut.cz/pub/linux/matrox-latest/>) to + swap primary and secondary head outputs, or to change output mode. + Secondary head driver always start in 640x480 resolution and you + must use fbset to change it. + + Do not forget that second head supports only 16 and 32 bpp + packed pixels, so it is a good idea to compile them into the kernel + too. You can use only some font widths, as the driver uses generic + painting procedures (the secondary head does not use acceleration + engine). + + G450/G550 hardware can display TV picture only from secondary CRTC, + and it performs no scaling, so picture must have 525 or 625 lines. + +config FB_MATROX_I2C + tristate "Matrox I2C support" + depends on FB_MATROX && I2C + select I2C_ALGOBIT + ---help--- + This drivers creates I2C buses which are needed for accessing the + DDC (I2C) bus present on all Matroxes, an I2C bus which + interconnects Matrox optional devices, like MGA-TVO on G200 and + G400, and the secondary head DDC bus, present on G400 only. + + You can say Y or M here if you want to experiment with monitor + detection code. You must say Y or M here if you want to use either + second head of G400 or MGA-TVO on G200 or G400. + + If you compile it as module, it will create a module named + i2c-matroxfb. + +config FB_MATROX_MAVEN + tristate "G400 second head support" + depends on FB_MATROX_G && FB_MATROX_I2C + ---help--- + WARNING !!! This support does not work with G450 !!! + + Say Y or M here if you want to use a secondary head (meaning two + monitors in parallel) on G400 or MGA-TVO add-on on G200. Secondary + head is not compatible with accelerated XFree 3.3.x SVGA servers - + secondary head output is blanked while you are in X. With XFree + 3.9.17 preview you can use both heads if you use SVGA over fbdev or + the fbdev driver on first head and the fbdev driver on second head. + + If you compile it as module, two modules are created, + matroxfb_crtc2 and matroxfb_maven. Matroxfb_maven is needed for + both G200 and G400, matroxfb_crtc2 is needed only by G400. You must + also load i2c-matroxfb to get it to run. + + The driver starts in monitor mode and you must use the matroxset + tool (available at + <ftp://platan.vc.cvut.cz/pub/linux/matrox-latest/>) to switch it to + PAL or NTSC or to swap primary and secondary head outputs. + Secondary head driver also always start in 640x480 resolution, you + must use fbset to change it. + + Also do not forget that second head supports only 16 and 32 bpp + packed pixels, so it is a good idea to compile them into the kernel + too. You can use only some font widths, as the driver uses generic + painting procedures (the secondary head does not use acceleration + engine). + +config FB_MATROX_MULTIHEAD + bool "Multihead support" + depends on FB_MATROX + ---help--- + Say Y here if you have more than one (supported) Matrox device in + your computer and you want to use all of them for different monitors + ("multihead"). If you have only one device, you should say N because + the driver compiled with Y is larger and a bit slower, especially on + ia32 (ix86). + + If you said M to "Matrox unified accelerated driver" and N here, you + will still be able to use several Matrox devices simultaneously: + insert several instances of the module matroxfb into the kernel + with insmod, supplying the parameter "dev=N" where N is 0, 1, etc. + for the different Matrox devices. This method is slightly faster but + uses 40 KB of kernel memory per Matrox card. + + There is no need for enabling 'Matrox multihead support' if you have + only one Matrox card in the box. + +config FB_RADEON_OLD + tristate "ATI Radeon display support (Old driver)" + depends on FB && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_MACMODES if PPC + help + Choose this option if you want to use an ATI Radeon graphics card as + a framebuffer device. There are both PCI and AGP versions. You + don't need to choose this to run the Radeon in plain VGA mode. + There is a product page at + <http://www.ati.com/na/pages/products/pc/radeon32/index.html>. + +config FB_RADEON + tristate "ATI Radeon display support" + depends on FB && PCI + select I2C_ALGOBIT if FB_RADEON_I2C + select I2C if FB_RADEON_I2C + select FB_MODE_HELPERS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_MACMODES if PPC_OF + help + Choose this option if you want to use an ATI Radeon graphics card as + a framebuffer device. There are both PCI and AGP versions. You + don't need to choose this to run the Radeon in plain VGA mode. + + If you say Y here and want DDC/I2C support you must first say Y to + "I2C support" and "I2C bit-banging support" in the character devices + section. + + If you say M here then "I2C support" and "I2C bit-banging support" + can be build either as modules or built-in. + + There is a product page at + http://apps.ati.com/ATIcompare/ +config FB_RADEON_I2C + bool "DDC/I2C for ATI Radeon support" + depends on FB_RADEON + default y + help + Say Y here if you want DDC/I2C support for your Radeon board. + +config FB_RADEON_DEBUG + bool "Lots of debug output from Radeon driver" + depends on FB_RADEON + default n + help + Say Y here if you want the Radeon driver to output all sorts + of debugging informations to provide to the maintainer when + something goes wrong. + +config FB_ATY128 + tristate "ATI Rage128 display support" + depends on FB && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_MACMODES if PPC_PMAC + help + This driver supports graphics boards with the ATI Rage128 chips. + Say Y if you have such a graphics board and read + <file:Documentation/fb/aty128fb.txt>. + + To compile this driver as a module, choose M here: the + module will be called aty128fb. + +config FB_ATY + tristate "ATI Mach64 display support" if PCI || ATARI + depends on FB + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FB_MACMODES if PPC + help + This driver supports graphics boards with the ATI Mach64 chips. + Say Y if you have such a graphics board. + + To compile this driver as a module, choose M here: the + module will be called atyfb. + +config FB_ATY_CT + bool "Mach64 CT/VT/GT/LT (incl. 3D RAGE) support" + depends on PCI && FB_ATY + default y if SPARC64 && FB_PCI + help + Say Y here to support use of ATI's 64-bit Rage boards (or other + boards based on the Mach64 CT, VT, GT, and LT chipsets) as a + framebuffer device. The ATI product support page for these boards + is at <http://support.ati.com/products/pc/mach64/>. + +config FB_ATY_GENERIC_LCD + bool "Mach64 generic LCD support (EXPERIMENTAL)" + depends on FB_ATY_CT + help + Say Y if you have a laptop with an ATI Rage LT PRO, Rage Mobility, + Rage XC, or Rage XL chipset. + +config FB_ATY_GX + bool "Mach64 GX support" if PCI + depends on FB_ATY + default y if ATARI + help + Say Y here to support use of the ATI Mach64 Graphics Expression + board (or other boards based on the Mach64 GX chipset) as a + framebuffer device. The ATI product support page for these boards + is at + <http://support.ati.com/products/pc/mach64/graphics_xpression.html>. + +config FB_S3TRIO + bool "S3 Trio display support" + depends on (FB = y) && PPC && BROKEN + help + If you have a S3 Trio say Y. Say N for S3 Virge. + +config FB_SAVAGE + tristate "S3 Savage support" + depends on FB && PCI && EXPERIMENTAL + select I2C_ALGOBIT if FB_SAVAGE_I2C + select I2C if FB_SAVAGE_I2C + select FB_MODE_HELPERS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This driver supports notebooks and computers with S3 Savage PCI/AGP + chips. + + Say Y if you have such a graphics card. + + To compile this driver as a module, choose M here; the module + will be called savagefb. + +config FB_SAVAGE_I2C + bool "Enable DDC2 Support" + depends on FB_SAVAGE + help + This enables I2C support for S3 Savage Chipsets. This is used + only for getting EDID information from the attached display + allowing for robust video mode handling and switching. + + Because fbdev-2.6 requires that drivers must be able to + independently validate video mode parameters, you should say Y + here. + +config FB_SAVAGE_ACCEL + bool "Enable Console Acceleration" + depends on FB_SAVAGE + default n + help + This option will compile in console acceleration support. If + the resulting framebuffer console has bothersome glitches, then + choose N here. + +config FB_SIS + tristate "SiS/XGI display support" + depends on FB && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the SiS 300, 315, 330 + and 340 series as well as XGI V3XT, V5, V8, Z7 graphics chipsets. + Specs available at <http://www.sis.com> and <http://www.xgitech.com>. + + To compile this driver as a module, choose M here; the module + will be called sisfb. + +config FB_SIS_300 + bool "SiS 300 series support" + depends on FB_SIS + help + Say Y here to support use of the SiS 300/305, 540, 630 and 730. + +config FB_SIS_315 + bool "SiS 315/330/340 series and XGI support" + depends on FB_SIS + help + Say Y here to support use of the SiS 315, 330 and 340 series + (315/H/PRO, 55x, 650, 651, 740, 330, 661, 741, 760, 761) as well + as XGI V3XT, V5, V8 and Z7. + +config FB_NEOMAGIC + tristate "NeoMagic display support" + depends on FB && PCI + select FB_MODE_HELPERS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This driver supports notebooks with NeoMagic PCI chips. + Say Y if you have such a graphics card. + + To compile this driver as a module, choose M here: the + module will be called neofb. + +config FB_KYRO + tristate "IMG Kyro support" + depends on FB && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Say Y here if you have a STG4000 / Kyro / PowerVR 3 based + graphics board. + + To compile this driver as a module, choose M here: the + module will be called kyrofb. + +config FB_3DFX + tristate "3Dfx Banshee/Voodoo3 display support" + depends on FB && PCI + select FB_CFB_IMAGEBLIT + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + help + This driver supports graphics boards with the 3Dfx Banshee/Voodoo3 + chips. Say Y if you have such a graphics board. + + To compile this driver as a module, choose M here: the + module will be called tdfxfb. + +config FB_3DFX_ACCEL + bool "3Dfx Banshee/Voodoo3 Acceleration functions (EXPERIMENTAL)" + depends on FB_3DFX && EXPERIMENTAL + ---help--- + This will compile the 3Dfx Banshee/Voodoo3 frame buffer device + with acceleration functions. + + +config FB_VOODOO1 + tristate "3Dfx Voodoo Graphics (sst1) support" + depends on FB && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Say Y here if you have a 3Dfx Voodoo Graphics (Voodoo1/sst1) or + Voodoo2 (cvg) based graphics card. + + To compile this driver as a module, choose M here: the + module will be called sstfb. + + WARNING: Do not use any application that uses the 3D engine + (namely glide) while using this driver. + Please read the <file:Documentation/fb/README-sstfb.txt> for supported + options and other important info support. + +config FB_CYBLA + tristate "Cyberblade/i1 support" + depends on FB && PCI && X86_32 && !64BIT + select FB_CFB_IMAGEBLIT + select VIDEO_SELECT + ---help--- + This driver is supposed to support the Trident Cyberblade/i1 + graphics core integrated in the VIA VT8601A North Bridge, + also known as VIA Apollo PLE133. + + Status: + - Developed, tested and working on EPIA 5000 and EPIA 800. + - Does work reliable on all systems with CRT/LCD connected to + normal VGA ports. + - Should work on systems that do use the internal LCD port, but + this is absolutely not tested. + + Character imageblit, copyarea and rectangle fill are hw accelerated, + ypan scrolling is used by default. + + Please do read <file:Documentation/fb/cyblafb/*>. + + To compile this driver as a module, choose M here: the + module will be called cyblafb. + +config FB_TRIDENT + tristate "Trident support" + depends on FB && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + This driver is supposed to support graphics boards with the + Trident CyberXXXX/Image/CyberBlade chips mostly found in laptops + but also on some motherboards. For more information, read + <file:Documentation/fb/tridentfb.txt> + + Cyberblade/i1 support will be removed soon, use the cyblafb driver + instead. + + Say Y if you have such a graphics board. + + + To compile this driver as a module, choose M here: the + module will be called tridentfb. + +config FB_TRIDENT_ACCEL + bool "Trident Acceleration functions (EXPERIMENTAL)" + depends on FB_TRIDENT && EXPERIMENTAL + ---help--- + This will compile the Trident frame buffer device with + acceleration functions. + +config FB_PM3 + tristate "Permedia3 support" + depends on FB && PCI && BROKEN + help + This is the frame buffer device driver for the 3DLabs Permedia3 + chipset, used in Formac ProFormance III, 3DLabs Oxygen VX1 & + similar boards, 3DLabs Permedia3 Create!, Appian Jeronimo 2000 + and maybe other boards. + +config FB_AU1100 + bool "Au1100 LCD Driver" + depends on (FB = y) && EXPERIMENTAL && PCI && MIPS && MIPS_PB1100=y + +source "drivers/video/geode/Kconfig" + +config FB_FFB + bool "Creator/Creator3D/Elite3D support" + depends on FB_SBUS && SPARC64 + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the Creator, Creator3D, + and Elite3D graphics boards. + +config FB_TCX + bool "TCX (SS4/SS5 only) support" + depends on FB_SBUS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the TCX 24/8bit frame + buffer. + +config FB_CG14 + bool "CGfourteen (SX) support" + depends on FB_SBUS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the CGfourteen frame + buffer on Desktop SPARCsystems with the SX graphics option. + +config FB_P9100 + bool "P9100 (Sparcbook 3 only) support" + depends on FB_SBUS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the P9100 card + supported on Sparcbook 3 machines. + +config FB_LEO + bool "Leo (ZX) support" + depends on FB_SBUS + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the SBUS-based Sun ZX + (leo) frame buffer cards. + +config FB_PCI + bool "PCI framebuffers" + depends on (FB = y) && PCI && SPARC + +config FB_IGA + bool "IGA 168x display support" + depends on SPARC32 && FB_PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the framebuffer device for the INTERGRAPHICS 1680 and + successor frame buffer cards. + +config FB_HIT + tristate "HD64461 Frame Buffer support" + depends on FB && HD64461 + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This is the frame buffer device driver for the Hitachi HD64461 LCD + frame buffer card. + +config FB_PMAG_AA + bool "PMAG-AA TURBOchannel framebuffer support" + depends on (FB = y) && TC + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Support for the PMAG-AA TURBOchannel framebuffer card (1280x1024x1) + used mainly in the MIPS-based DECstation series. + +config FB_PMAG_BA + bool "PMAG-BA TURBOchannel framebuffer support" + depends on (FB = y) && TC + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Support for the PMAG-BA TURBOchannel framebuffer card (1024x864x8) + used mainly in the MIPS-based DECstation series. + +config FB_PMAGB_B + bool "PMAGB-B TURBOchannel framebuffer support" + depends on (FB = y) && TC + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Support for the PMAGB-B TURBOchannel framebuffer card used mainly + in the MIPS-based DECstation series. The card is currently only + supported in 1280x1024x8 mode. + +config FB_MAXINE + bool "Maxine (Personal DECstation) onboard framebuffer support" + depends on (FB = y) && MACH_DECSTATION + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Support for the onboard framebuffer (1024x768x8) in the Personal + DECstation series (Personal DECstation 5000/20, /25, /33, /50, + Codename "Maxine"). + +config FB_TX3912 + bool "TMPTX3912/PR31700 frame buffer support" + depends on (FB = y) && NINO + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + The TX3912 is a Toshiba RISC processor based on the MIPS 3900 core + see <http://www.toshiba.com/taec/components/Generic/risc/tx3912.htm>. + + Say Y here to enable kernel support for the on-board framebuffer. + +config FB_G364 + bool "G364 frame buffer support" + depends on (FB = y) && (MIPS_MAGNUM_4000 || OLIVETTI_M700) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + The G364 driver is the framebuffer used in MIPS Magnum 4000 and + Olivetti M700-10 systems. + +config FB_68328 + bool "Motorola 68328 native frame buffer support" + depends on FB && (M68328 || M68EZ328 || M68VZ328) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Say Y here if you want to support the built-in frame buffer of + the Motorola 68328 CPU family. + +config FB_PXA + tristate "PXA LCD framebuffer support" + depends on FB && ARCH_PXA + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Frame buffer driver for the built-in LCD controller in the Intel + PXA2x0 processor. + + This driver is also available as a module ( = code which can be + inserted and removed from the running kernel whenever you want). The + module will be called pxafb. If you want to compile it as a module, + say M here and read <file:Documentation/modules.txt>. + + If unsure, say N. + +config FB_PXA_PARAMETERS + bool "PXA LCD command line parameters" + default n + depends on FB_PXA + ---help--- + Enable the use of kernel command line or module parameters + to configure the physical properties of the LCD panel when + using the PXA LCD driver. + + This option allows you to override the panel parameters + supplied by the platform in order to support multiple + different models of flatpanel. If you will only be using a + single model of flatpanel then you can safely leave this + option disabled. + + <file:Documentation/fb/pxafb.txt> describes the available parameters. + +config FB_W100 + tristate "W100 frame buffer support" + depends on FB && PXA_SHARPSL + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Frame buffer driver for the w100 as found on the Sharp SL-Cxx series. + + This driver is also available as a module ( = code which can be + inserted and removed from the running kernel whenever you want). The + module will be called w100fb. If you want to compile it as a module, + say M here and read <file:Documentation/modules.txt>. + + If unsure, say N. + +config FB_S3C2410 + tristate "S3C2410 LCD framebuffer support" + depends on FB && ARCH_S3C2410 + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Frame buffer driver for the built-in LCD controller in the Samsung + S3C2410 processor. + + This driver is also available as a module ( = code which can be + inserted and removed from the running kernel whenever you want). The + module will be called s3c2410fb. If you want to compile it as a module, + say M here and read <file:Documentation/modules.txt>. + + If unsure, say N. +config FB_S3C2410_DEBUG + bool "S3C2410 lcd debug messages" + depends on FB_S3C2410 + help + Turn on debugging messages. Note that you can set/unset at run time + through sysfs + +config FB_VIRTUAL + tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)" + depends on FB + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + This is a `virtual' frame buffer device. It operates on a chunk of + unswappable kernel memory instead of on the memory of a graphics + board. This means you cannot see any output sent to this frame + buffer device, while it does consume precious memory. The main use + of this frame buffer device is testing and debugging the frame + buffer subsystem. Do NOT enable it for normal systems! To protect + the innocent, it has to be enabled explicitly at boot time using the + kernel option `video=vfb:'. + + To compile this driver as a module, choose M here: the + module will be called vfb. + + If unsure, say N. +if VT + source "drivers/video/console/Kconfig" +endif + +if FB || SGI_NEWPORT_CONSOLE + source "drivers/video/logo/Kconfig" +endif + +if FB && SYSFS + source "drivers/video/backlight/Kconfig" +endif + +endmenu + diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/a.out.h --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/a.out.h Fri Feb 24 22:41:08 2006 @@ -0,0 +1,26 @@ +#ifndef __I386_A_OUT_H__ +#define __I386_A_OUT_H__ + +struct exec +{ + unsigned long a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#ifdef __KERNEL__ + +#define STACK_TOP (TASK_SIZE - 3*PAGE_SIZE) + +#endif + +#endif /* __A_OUT_GNU_H__ */ diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/apic.h --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/apic.h Fri Feb 24 22:41:08 2006 @@ -0,0 +1,147 @@ +#ifndef __ASM_APIC_H +#define __ASM_APIC_H + +#include <linux/config.h> +#include <linux/pm.h> +#include <asm/fixmap.h> +#include <asm/apicdef.h> +#include <asm/processor.h> +#include <asm/system.h> + +#define Dprintk(x...) + +/* + * Debugging macros + */ +#define APIC_QUIET 0 +#define APIC_VERBOSE 1 +#define APIC_DEBUG 2 + +extern int enable_local_apic; +extern int apic_verbosity; + +static inline void lapic_disable(void) +{ + enable_local_apic = -1; + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); +} + +static inline void lapic_enable(void) +{ + enable_local_apic = 1; +} + +/* + * Define the default level of output to be very little + * This can be turned up by using apic=verbose for more + * information and apic=debug for _lots_ of information. + * apic_verbosity is defined in apic.c + */ +#define apic_printk(v, s, a...) do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ + } while (0) + + +#ifdef CONFIG_X86_LOCAL_APIC + +/* + * Basic functions accessing APICs. + */ + +static __inline void apic_write(unsigned long reg, unsigned long v) +{ + *((volatile unsigned long *)(APIC_BASE+reg)) = v; +} + +static __inline void apic_write_atomic(unsigned long reg, unsigned long v) +{ + xchg((volatile unsigned long *)(APIC_BASE+reg), v); +} + +static __inline unsigned long apic_read(unsigned long reg) +{ + return *((volatile unsigned long *)(APIC_BASE+reg)); +} + +static __inline__ void apic_wait_icr_idle(void) +{ + while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ) + cpu_relax(); +} + +int get_physical_broadcast(void); + +#ifdef CONFIG_X86_GOOD_APIC +# define FORCE_READ_AROUND_WRITE 0 +# define apic_read_around(x) +# define apic_write_around(x,y) apic_write((x),(y)) +#else +# define FORCE_READ_AROUND_WRITE 1 +# define apic_read_around(x) apic_read(x) +# define apic_write_around(x,y) apic_write_atomic((x),(y)) +#endif + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction: + * - a single rmw on Pentium/82489DX + * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write_around(APIC_EOI, 0); +} + +extern void (*wait_timer_tick)(void); + +extern int get_maxlvt(void); +extern void clear_local_APIC(void); +extern void connect_bsp_APIC (void); +extern void disconnect_bsp_APIC (int virt_wire_setup); +extern void disable_local_APIC (void); +extern void lapic_shutdown (void); +extern int verify_local_APIC (void); +extern void cache_APIC_registers (void); +extern void sync_Arb_IDs (void); +extern void init_bsp_APIC (void); +extern void setup_local_APIC (void); +extern void init_apic_mappings (void); +extern void smp_local_timer_interrupt (struct pt_regs * regs); +extern void setup_boot_APIC_clock (void); +extern void setup_secondary_APIC_clock (void); +extern void setup_apic_nmi_watchdog (void); +extern int reserve_lapic_nmi(void); +extern void release_lapic_nmi(void); +extern void disable_timer_nmi_watchdog(void); +extern void enable_timer_nmi_watchdog(void); +extern void nmi_watchdog_tick (struct pt_regs * regs); +extern int APIC_init_uniprocessor (void); +extern void disable_APIC_timer(void); +extern void enable_APIC_timer(void); + +extern void enable_NMI_through_LVT0 (void * dummy); + +extern unsigned int nmi_watchdog; +#define NMI_NONE 0 +#define NMI_IO_APIC 1 +#define NMI_LOCAL_APIC 2 +#define NMI_INVALID 3 + +extern int disable_timer_pin_1; + +#ifndef CONFIG_XEN +void smp_send_timer_broadcast_ipi(struct pt_regs *regs); +void switch_APIC_timer_to_ipi(void *cpumask); +void switch_ipi_to_APIC_timer(void *cpumask); +#define ARCH_APICTIMER_STOPS_ON_C3 1 +#endif + +#else /* !CONFIG_X86_LOCAL_APIC */ +static inline void lapic_shutdown(void) { } + +#endif /* !CONFIG_X86_LOCAL_APIC */ + +#endif /* __ASM_APIC_H */ diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/elf.h --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/elf.h Fri Feb 24 22:41:08 2006 @@ -0,0 +1,194 @@ +#ifndef __ASMi386_ELF_H +#define __ASMi386_ELF_H + +/* + * ELF register definitions.. + */ + +#include <asm/ptrace.h> +#include <asm/user.h> +#include <asm/processor.h> +#include <asm/system.h> /* for savesegment */ +#include <asm/auxvec.h> + +#include <linux/utsname.h> + +#define R_386_NONE 0 +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_COPY 5 +#define R_386_GLOB_DAT 6 +#define R_386_JMP_SLOT 7 +#define R_386_RELATIVE 8 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_NUM 11 + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_i387_struct elf_fpregset_t; +typedef struct user_fxsr_struct elf_fpxregset_t; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx + contains a pointer to a function which might be registered using `atexit'. + This provides a mean for the dynamic linker to call DT_FINI functions for + shared libraries that have been loaded before the code runs. + + A value of 0 tells we have no such handler. + + We might as well make sure everything else is cleared too (except for %esp), + just to make things more deterministic. + */ +#define ELF_PLAT_INIT(_r, load_addr) do { \ + _r->ebx = 0; _r->ecx = 0; _r->edx = 0; \ + _r->esi = 0; _r->edi = 0; _r->ebp = 0; \ + _r->eax = 0; \ +} while (0) + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is + now struct_user_regs, they are different) */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + pr_reg[0] = regs->ebx; \ + pr_reg[1] = regs->ecx; \ + pr_reg[2] = regs->edx; \ + pr_reg[3] = regs->esi; \ + pr_reg[4] = regs->edi; \ + pr_reg[5] = regs->ebp; \ + pr_reg[6] = regs->eax; \ + pr_reg[7] = regs->xds; \ + pr_reg[8] = regs->xes; \ + savesegment(fs,pr_reg[9]); \ + savesegment(gs,pr_reg[10]); \ + pr_reg[11] = regs->orig_eax; \ + pr_reg[12] = regs->eip; \ + pr_reg[13] = regs->xcs; \ + pr_reg[14] = regs->eflags; \ + pr_reg[15] = regs->esp; \ + pr_reg[16] = regs->xss; + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (boot_cpu_data.x86_capability[0]) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define ELF_PLATFORM (system_utsname.machine) + +#ifdef __KERNEL__ +#define SET_PERSONALITY(ex, ibcs2) do { } while (0) + +/* + * An executable for which elf_read_implies_exec() returns TRUE will + * have the READ_IMPLIES_EXEC personality flag set automatically. + */ +#define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) + +struct task_struct; + +extern int dump_task_regs (struct task_struct *, elf_gregset_t *); +extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); +extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct *); + +#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs) +#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs) +#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) + +#define VSYSCALL_BASE (PAGE_OFFSET - 2*PAGE_SIZE) +#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE) +#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall) +extern void __kernel_vsyscall; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack); + +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \ +} while (0) + +/* + * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out + * extra segments containing the vsyscall DSO contents. Dumping its + * contents makes post-mortem fully interpretable later without matching up + * the same kernel and hardware config to see what PC values meant. + * Dumping its extra ELF program headers includes all the other information + * a debugger needs to easily find how the vsyscall DSO was being used. + */ +#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum) +#define ELF_CORE_WRITE_EXTRA_PHDRS \ +do { \ + const struct elf_phdr *const vsyscall_phdrs = \ + (const struct elf_phdr *) (VSYSCALL_BASE \ + + VSYSCALL_EHDR->e_phoff); \ + int i; \ + Elf32_Off ofs = 0; \ + for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ + struct elf_phdr phdr = vsyscall_phdrs[i]; \ + if (phdr.p_type == PT_LOAD) { \ + BUG_ON(ofs != 0); \ + ofs = phdr.p_offset = offset; \ + phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); \ + phdr.p_filesz = phdr.p_memsz; \ + offset += phdr.p_filesz; \ + } \ + else \ + phdr.p_offset += ofs; \ + phdr.p_paddr = 0; /* match other core phdrs */ \ + DUMP_WRITE(&phdr, sizeof(phdr)); \ + } \ +} while (0) +#define ELF_CORE_WRITE_EXTRA_DATA \ +do { \ + const struct elf_phdr *const vsyscall_phdrs = \ + (const struct elf_phdr *) (VSYSCALL_BASE \ + + VSYSCALL_EHDR->e_phoff); \ + int i; \ + for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ + if (vsyscall_phdrs[i].p_type == PT_LOAD) \ + DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \ + PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \ + } \ +} while (0) + +#endif + +#endif diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-x86_64/apic.h --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/apic.h Fri Feb 24 22:41:08 2006 @@ -0,0 +1,120 @@ +#ifndef __ASM_APIC_H +#define __ASM_APIC_H + +#include <linux/config.h> +#include <linux/pm.h> +#include <asm/fixmap.h> +#include <asm/apicdef.h> +#include <asm/system.h> + +#define Dprintk(x...) + +/* + * Debugging macros + */ +#define APIC_QUIET 0 +#define APIC_VERBOSE 1 +#define APIC_DEBUG 2 + +extern int apic_verbosity; +extern int apic_runs_main_timer; + +/* + * Define the default level of output to be very little + * This can be turned up by using apic=verbose for more + * information and apic=debug for _lots_ of information. + * apic_verbosity is defined in apic.c + */ +#define apic_printk(v, s, a...) do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ + } while (0) + +#ifdef CONFIG_X86_LOCAL_APIC + +struct pt_regs; + +/* + * Basic functions accessing APICs. + */ + +static __inline void apic_write(unsigned long reg, unsigned int v) +{ + *((volatile unsigned int *)(APIC_BASE+reg)) = v; +} + +static __inline unsigned int apic_read(unsigned long reg) +{ + return *((volatile unsigned int *)(APIC_BASE+reg)); +} + +static __inline__ void apic_wait_icr_idle(void) +{ + while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ); +} + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction: + * - a single rmw on Pentium/82489DX + * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write(APIC_EOI, 0); +} + +extern int get_maxlvt (void); +extern void clear_local_APIC (void); +extern void connect_bsp_APIC (void); +extern void disconnect_bsp_APIC (int virt_wire_setup); +extern void disable_local_APIC (void); +extern int verify_local_APIC (void); +extern void cache_APIC_registers (void); +extern void sync_Arb_IDs (void); +extern void init_bsp_APIC (void); +extern void setup_local_APIC (void); +extern void init_apic_mappings (void); +extern void smp_local_timer_interrupt (struct pt_regs * regs); +extern void setup_boot_APIC_clock (void); +extern void setup_secondary_APIC_clock (void); +extern void setup_apic_nmi_watchdog (void); +extern int reserve_lapic_nmi(void); +extern void release_lapic_nmi(void); +extern void disable_timer_nmi_watchdog(void); +extern void enable_timer_nmi_watchdog(void); +extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); +extern int APIC_init_uniprocessor (void); +extern void disable_APIC_timer(void); +extern void enable_APIC_timer(void); +extern void clustered_apic_check(void); + +extern void nmi_watchdog_default(void); +extern int setup_nmi_watchdog(char *); + +extern unsigned int nmi_watchdog; +#define NMI_DEFAULT -1 +#define NMI_NONE 0 +#define NMI_IO_APIC 1 +#define NMI_LOCAL_APIC 2 +#define NMI_INVALID 3 + +extern int disable_timer_pin_1; + +extern void setup_threshold_lvt(unsigned long lvt_off); + +#ifndef CONFIG_XEN +void smp_send_timer_broadcast_ipi(void); +void switch_APIC_timer_to_ipi(void *cpumask); +void switch_ipi_to_APIC_timer(void *cpumask); + +#define ARCH_APICTIMER_STOPS_ON_C3 1 +#endif + +#endif /* CONFIG_X86_LOCAL_APIC */ + +extern unsigned boot_cpu_id; + +#endif /* __ASM_APIC_H */ diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc4/i386-mach-io-check-nmi.patch --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/patches/linux-2.6.16-rc4/i386-mach-io-check-nmi.patch Fri Feb 24 22:41:08 2006 @@ -0,0 +1,45 @@ +diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c +--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c 2006-02-15 20:38:51.000000000 +0000 ++++ ./arch/i386/kernel/traps.c 2006-02-15 20:40:43.000000000 +0000 +@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch + + static void io_check_error(unsigned char reason, struct pt_regs * regs) + { +- unsigned long i; +- + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ +- reason = (reason & 0xf) | 8; +- outb(reason, 0x61); +- i = 2000; +- while (--i) udelay(1000); +- reason &= ~8; +- outb(reason, 0x61); ++ clear_io_check_error(reason); + } + + static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h +--- ../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/mach-default/mach_traps.h 2006-02-15 20:40:43.000000000 +0000 +@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig + outb(reason, 0x61); + } + ++static inline void clear_io_check_error(unsigned char reason) ++{ ++ unsigned long i; ++ ++ reason = (reason & 0xf) | 8; ++ outb(reason, 0x61); ++ i = 2000; ++ while (--i) udelay(1000); ++ reason &= ~8; ++ outb(reason, 0x61); ++} ++ + static inline unsigned char get_nmi_reason(void) + { + return inb(0x61); diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc4/net-csum.patch --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/patches/linux-2.6.16-rc4/net-csum.patch Fri Feb 24 22:41:08 2006 @@ -0,0 +1,41 @@ +diff -pruN ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c +--- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-02 17:39:51.000000000 +0000 ++++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-02 17:44:18.000000000 +0000 +@@ -129,10 +129,14 @@ tcp_manip_pkt(struct sk_buff **pskb, + if (hdrsize < sizeof(*hdr)) + return 1; + +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++ if ((*pskb)->proto_csum_blank) { ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ } else { ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(oldport ^ 0xFFFF, + newport, + hdr->check)); ++ } + return 1; + } + +diff -pruN ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c +--- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-02 17:39:51.000000000 +0000 ++++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-02 17:44:18.000000000 +0000 +@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } +- if (hdr->check) /* 0 is a special case meaning no checksum */ +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++ if (hdr->check) { /* 0 is a special case meaning no checksum */ ++ if ((*pskb)->proto_csum_blank) { ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ } else { ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + newport, + hdr->check)); ++ } ++ } + *portptr = newport; + return 1; + } diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc4/pmd-shared.patch --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/patches/linux-2.6.16-rc4/pmd-shared.patch Fri Feb 24 22:41:08 2006 @@ -0,0 +1,111 @@ +diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c +--- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c 2006-02-02 17:39:29.000000000 +0000 ++++ ./arch/i386/mm/pageattr.c 2006-02-02 17:45:14.000000000 +0000 +@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns + unsigned long flags; + + set_pte_atomic(kpte, pte); /* change init_mm */ +- if (PTRS_PER_PMD > 1) ++ if (HAVE_SHARED_KERNEL_PMD) + return; + + spin_lock_irqsave(&pgd_lock, flags); +diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c +--- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c 2006-01-03 03:21:10.000000000 +0000 ++++ ./arch/i386/mm/pgtable.c 2006-02-02 17:45:14.000000000 +0000 +@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c + spin_lock_irqsave(&pgd_lock, flags); + } + +- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, +- swapper_pg_dir + USER_PTRS_PER_PGD, +- KERNEL_PGD_PTRS); ++ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD) ++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, ++ swapper_pg_dir + USER_PTRS_PER_PGD, ++ KERNEL_PGD_PTRS); + if (PTRS_PER_PMD > 1) + return; + +@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + goto out_oom; + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); + } ++ ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); ++ if (!pmd) ++ goto out_oom; ++ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); ++ } ++ ++ spin_lock_irqsave(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ unsigned long v = (unsigned long)i << PGDIR_SHIFT; ++ pgd_t *kpgd = pgd_offset_k(v); ++ pud_t *kpud = pud_offset(kpgd, v); ++ pmd_t *kpmd = pmd_offset(kpud, v); ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memcpy(pmd, kpmd, PAGE_SIZE); ++ } ++ pgd_list_add(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ } ++ + return pgd; + + out_oom: +@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd) + int i; + + /* in the PAE case user pgd entries are overwritten before usage */ +- if (PTRS_PER_PMD > 1) +- for (i = 0; i < USER_PTRS_PER_PGD; ++i) +- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); ++ if (PTRS_PER_PMD > 1) { ++ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ spin_lock_irqsave(&pgd_lock, flags); ++ pgd_list_del(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ } ++ } + /* in the non-PAE case, free_pgtables() clears user pgd entries */ + kmem_cache_free(pgd_cache, pgd); + } +diff -pruN ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h +--- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/pgtable-2level-defs.h 2006-02-02 17:45:14.000000000 +0000 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_2LEVEL_DEFS_H + #define _I386_PGTABLE_2LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 0 ++ + /* + * traditional i386 two-level paging structure: + */ +diff -pruN ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h +--- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/pgtable-3level-defs.h 2006-02-02 17:45:14.000000000 +0000 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_3LEVEL_DEFS_H + #define _I386_PGTABLE_3LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 1 ++ + /* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc4/smp-alts.patch --- /dev/null Fri Feb 24 21:03:07 2006 +++ b/patches/linux-2.6.16-rc4/smp-alts.patch Fri Feb 24 22:41:08 2006 @@ -0,0 +1,591 @@ +diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig ./arch/i386/Kconfig +--- ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig 2006-02-15 20:38:51.000000000 +0000 ++++ ./arch/i386/Kconfig 2006-02-15 20:45:57.000000000 +0000 +@@ -202,6 +202,19 @@ config SMP + + If you don't know what to do here, say N. + ++config SMP_ALTERNATIVES ++ bool "SMP alternatives support (EXPERIMENTAL)" ++ depends on SMP && EXPERIMENTAL ++ help ++ Try to reduce the overhead of running an SMP kernel on a uniprocessor ++ host slightly by replacing certain key instruction sequences ++ according to whether we currently have more than one CPU available. ++ This should provide a noticeable boost to performance when ++ running SMP kernels on UP machines, and have negligible impact ++ when running on an true SMP host. ++ ++ If unsure, say N. ++ + config NR_CPUS + int "Maximum number of CPUs (2-255)" + range 2 255 +diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile ./arch/i386/kernel/Makefile +--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile 2006-02-15 20:38:51.000000000 +0000 ++++ ./arch/i386/kernel/Makefile 2006-02-15 20:45:57.000000000 +0000 +@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o + obj-$(CONFIG_DOUBLEFAULT) += doublefault.o + obj-$(CONFIG_VM86) += vm86.o + obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ++obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o + + EXTRA_AFLAGS := -traditional + +diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c ./arch/i386/kernel/smpalts.c +--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100 ++++ ./arch/i386/kernel/smpalts.c 2006-02-15 20:45:57.000000000 +0000 +@@ -0,0 +1,85 @@ ++#include <linux/kernel.h> ++#include <asm/system.h> ++#include <asm/smp_alt.h> ++#include <asm/processor.h> ++#include <asm/string.h> ++ ++struct smp_replacement_record { ++ unsigned char targ_size; ++ unsigned char smp1_size; ++ unsigned char smp2_size; ++ unsigned char up_size; ++ unsigned char feature; ++ unsigned char data[0]; ++}; ++ ++struct smp_alternative_record { ++ void *targ_start; ++ struct smp_replacement_record *repl; ++}; ++ ++extern struct smp_alternative_record __start_smp_alternatives_table, ++ __stop_smp_alternatives_table; ++extern unsigned long __init_begin, __init_end; ++ ++void prepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Enabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ if (system_state == SYSTEM_RUNNING && ++ r->targ_start >= (void *)&__init_begin && ++ r->targ_start < (void *)&__init_end) ++ continue; ++ if (r->repl->feature != (unsigned char)-1 && ++ boot_cpu_has(r->repl->feature)) { ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size, ++ r->repl->smp2_size); ++ memset(r->targ_start + r->repl->smp2_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp2_size); ++ } else { ++ memcpy(r->targ_start, ++ r->repl->data, ++ r->repl->smp1_size); ++ memset(r->targ_start + r->repl->smp1_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp1_size); ++ } ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} ++ ++void unprepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Disabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ if (system_state == SYSTEM_RUNNING && ++ r->targ_start >= (void *)&__init_begin && ++ r->targ_start < (void *)&__init_end) ++ continue; ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size + r->repl->smp2_size, ++ r->repl->up_size); ++ memset(r->targ_start + r->repl->up_size, ++ 0x90, ++ r->repl->targ_size - r->repl->up_size); ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} +diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c ./arch/i386/kernel/smpboot.c +--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c 2006-02-15 20:38:51.000000000 +0000 ++++ ./arch/i386/kernel/smpboot.c 2006-02-15 20:45:57.000000000 +0000 +@@ -1214,6 +1214,11 @@ static void __init smp_boot_cpus(unsigne + if (max_cpus <= cpucount+1) + continue; + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (kicked == 1) ++ prepare_for_smp(); ++#endif ++ + if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) + printk("CPU #%d not responding - cannot use it.\n", + apicid); +@@ -1392,6 +1397,11 @@ int __devinit __cpu_up(unsigned int cpu) + return -EIO; + } + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (num_online_cpus() == 1) ++ prepare_for_smp(); ++#endif ++ + local_irq_enable(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* Unleash the CPU! */ +diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S +--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S 2006-01-03 03:21:10.000000000 +0000 ++++ ./arch/i386/kernel/vmlinux.lds.S 2006-02-15 20:45:57.000000000 +0000 +@@ -34,6 +34,13 @@ SECTIONS + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } + __stop___ex_table = .; + ++ . = ALIGN(16); ++ __start_smp_alternatives_table = .; ++ __smp_alternatives : { *(__smp_alternatives) } ++ __stop_smp_alternatives_table = .; ++ ++ __smp_replacements : { *(__smp_replacements) } ++ + RODATA + + /* writeable */ +diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h ./include/asm-i386/atomic.h +--- ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h 2006-02-15 20:38:57.000000000 +0000 ++++ ./include/asm-i386/atomic.h 2006-02-15 20:45:57.000000000 +0000 +@@ -4,18 +4,13 @@ + #include <linux/config.h> + #include <linux/compiler.h> + #include <asm/processor.h> ++#include <asm/smp_alt.h> + + /* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +-#ifdef CONFIG_SMP +-#define LOCK "lock ; " +-#else +-#define LOCK "" +-#endif +- + /* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, +diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h ./include/asm-i386/bitops.h +--- ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h 2006-02-15 20:38:57.000000000 +0000 ++++ ./include/asm-i386/bitops.h 2006-02-15 20:45:57.000000000 +0000 +@@ -7,6 +7,7 @@ + + #include <linux/config.h> + #include <linux/compiler.h> ++#include <asm/smp_alt.h> + + /* + * These have to be done with inline assembly: that way the bit-setting +@@ -16,12 +17,6 @@ + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +-#ifdef CONFIG_SMP +-#define LOCK_PREFIX "lock ; " +-#else +-#define LOCK_PREFIX "" +-#endif +- + #define ADDR (*(volatile long *) addr) + + /** +@@ -41,7 +36,7 @@ + */ + static inline void set_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol + */ + static inline void clear_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -121,7 +116,7 @@ static inline void __change_bit(int nr, + */ + static inline void change_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h ./include/asm-i386/futex.h +--- ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h 2006-02-15 20:38:57.000000000 +0000 ++++ ./include/asm-i386/futex.h 2006-02-15 20:45:57.000000000 +0000 +@@ -28,7 +28,7 @@ + "1: movl %2, %0\n\ + movl %0, %3\n" \ + insn "\n" \ +-"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ ++"2: " LOCK "cmpxchgl %3, %2\n\ + jnz 1b\n\ + 3: .section .fixup,\"ax\"\n\ + 4: mov %5, %1\n\ +@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op, + #endif + switch (op) { + case FUTEX_OP_ADD: +- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, ++ __futex_atomic_op1(LOCK "xaddl %0, %2", ret, + oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: +diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h ./include/asm-i386/rwsem.h +--- ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/rwsem.h 2006-02-15 20:45:57.000000000 +0000 +@@ -40,6 +40,7 @@ + + #include <linux/list.h> + #include <linux/spinlock.h> ++#include <asm/smp_alt.h> + + struct rwsem_waiter; + +@@ -99,7 +100,7 @@ static inline void __down_read(struct rw + { + __asm__ __volatile__( + "# beginning down_read\n\t" +-LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ ++LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ + " js 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st + " movl %1,%2\n\t" + " addl %3,%2\n\t" + " jle 2f\n\t" +-LOCK_PREFIX " cmpxchgl %2,%0\n\t" ++LOCK " cmpxchgl %2,%0\n\t" + " jnz 1b\n\t" + "2:\n\t" + "# ending __down_read_trylock\n\t" +@@ -150,7 +151,7 @@ static inline void __down_write(struct r + tmp = RWSEM_ACTIVE_WRITE_BIAS; + __asm__ __volatile__( + "# beginning down_write\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ + " testl %%edx,%%edx\n\t" /* was the count 0 before? */ + " jnz 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" +@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + __asm__ __volatile__( + "# beginning __up_read\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_ + __asm__ __volatile__( + "# beginning __up_write\n\t" + " movl %2,%%edx\n\t" +-LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ ++LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ + " jnz 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -239,7 +240,7 @@ static inline void __downgrade_write(str + { + __asm__ __volatile__( + "# beginning __downgrade_write\n\t" +-LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ ++LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t" + static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) + { + __asm__ __volatile__( +-LOCK_PREFIX "addl %1,%0" ++LOCK "addl %1,%0" + : "=m"(sem->count) + : "ir"(delta), "m"(sem->count)); + } +@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in + int tmp = delta; + + __asm__ __volatile__( +-LOCK_PREFIX "xadd %0,(%2)" ++LOCK "xadd %0,(%2)" + : "+r"(tmp), "=m"(sem->count) + : "r"(sem), "m"(sem->count) + : "memory"); +diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h ./include/asm-i386/smp_alt.h +--- ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100 ++++ ./include/asm-i386/smp_alt.h 2006-02-15 20:45:57.000000000 +0000 +@@ -0,0 +1,32 @@ ++#ifndef __ASM_SMP_ALT_H__ ++#define __ASM_SMP_ALT_H__ ++ ++#include <linux/config.h> ++ ++#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define LOCK \ ++ "6677: nop\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6677b\n" \ ++ ".long 6678f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6678: .byte 1\n" \ ++ ".byte 1\n" \ ++ ".byte 0\n" \ ++ ".byte 1\n" \ ++ ".byte -1\n" \ ++ "lock\n" \ ++ "nop\n" \ ++ ".previous\n" ++void prepare_for_smp(void); ++void unprepare_for_smp(void); ++#else ++#define LOCK "lock ; " ++#endif ++#else ++#define LOCK "" ++#endif ++ ++#endif /* __ASM_SMP_ALT_H__ */ +diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h ./include/asm-i386/spinlock.h +--- ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/spinlock.h 2006-02-15 20:45:57.000000000 +0000 +@@ -6,6 +6,7 @@ + #include <asm/page.h> + #include <linux/config.h> + #include <linux/compiler.h> ++#include <asm/smp_alt.h> + + /* + * Your basic SMP spinlocks, allowing only a single CPU anywhere +@@ -23,7 +24,8 @@ + + #define __raw_spin_lock_string \ + "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ +@@ -34,7 +36,8 @@ + + #define __raw_spin_lock_string_flags \ + "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 4f\n\t" \ + "2:\t" \ + "testl $0x200, %1\n\t" \ +@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags + static inline int __raw_spin_trylock(raw_spinlock_t *lock) + { + char oldval; ++#ifdef CONFIG_SMP_ALTERNATIVES + __asm__ __volatile__( +- "xchgb %b0,%1" ++ "1:movb %1,%b0\n" ++ "movb $0,%1\n" ++ "2:" ++ ".section __smp_alternatives,\"a\"\n" ++ ".long 1b\n" ++ ".long 3f\n" ++ ".previous\n" ++ ".section __smp_replacements,\"a\"\n" ++ "3: .byte 2b - 1b\n" ++ ".byte 5f-4f\n" ++ ".byte 0\n" ++ ".byte 6f-5f\n" ++ ".byte -1\n" ++ "4: xchgb %b0,%1\n" ++ "5: movb %1,%b0\n" ++ "movb $0,%1\n" ++ "6:\n" ++ ".previous\n" + :"=q" (oldval), "=m" (lock->slock) + :"0" (0) : "memory"); ++#else ++ __asm__ __volatile__( ++ "xchgb %b0,%1\n" ++ :"=q" (oldval), "=m" (lock->slock) ++ :"0" (0) : "memory"); ++#endif + return oldval > 0; + } + +@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra + + static inline void __raw_read_unlock(raw_rwlock_t *rw) + { +- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory"); ++ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory"); + } + + static inline void __raw_write_unlock(raw_rwlock_t *rw) + { +- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0" ++ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0" + : "=m" (rw->lock) : : "memory"); + } + +diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h ./include/asm-i386/system.h +--- ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h 2006-02-15 20:38:57.000000000 +0000 ++++ ./include/asm-i386/system.h 2006-02-15 20:45:57.000000000 +0000 +@@ -5,7 +5,7 @@ + #include <linux/kernel.h> + #include <asm/segment.h> + #include <asm/cpufeature.h> +-#include <linux/bitops.h> /* for LOCK_PREFIX */ ++#include <asm/smp_alt.h> + + #ifdef __KERNEL__ + +@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo + unsigned long prev; + switch (size) { + case 1: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); +@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc + unsigned long long new) + { + unsigned long long prev; +- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" ++ __asm__ __volatile__(LOCK "cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), +@@ -503,11 +503,55 @@ struct alt_instr { + #endif + + #ifdef CONFIG_SMP ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define smp_alt_mb(instr) \ ++__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673:.byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte 0\n" \ ++ ".byte %c0\n" \ ++ "6669:lock;addl $0,0(%%esp)\n" \ ++ "6670:" instr "\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : \ ++ : "i" (X86_FEATURE_XMM2) \ ++ : "memory") ++#define smp_rmb() smp_alt_mb("lfence") ++#define smp_mb() smp_alt_mb("mfence") ++#define set_mb(var, value) do { \ ++unsigned long __set_mb_temp; \ ++__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673: .byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 0\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte -1\n" \ ++ "6669: xchg %1, %0\n" \ ++ "6670:movl %1, %0\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : "=m" (var), "=r" (__set_mb_temp) \ ++ : "1" (value) \ ++ : "memory"); } while (0) ++#else + #define smp_mb() mb() + #define smp_rmb() rmb() ++#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) ++#endif + #define smp_wmb() wmb() + #define smp_read_barrier_depends() read_barrier_depends() +-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) + #else + #define smp_mb() barrier() + #define smp_rmb() barrier() diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/segment.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/segment.h Fri Feb 24 21:03:07 2006 +++ /dev/null Fri Feb 24 22:41:08 2006 @@ -1,44 +0,0 @@ -#ifndef _ASM_SEGMENT_H -#define _ASM_SEGMENT_H - -#include <asm/cache.h> - -#define __KERNEL_CS 0x10 -#define __KERNEL_DS 0x1b - -#define __KERNEL32_CS 0x3b - -/* - * we cannot use the same code segment descriptor for user and kernel - * -- not even in the long flat mode, because of different DPL /kkeil - * The segment offset needs to contain a RPL. Grr. -AK - * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) - */ - -#define __USER32_CS 0x23 /* 4*8+3 */ -#define __USER_DS 0x2b /* 5*8+3 */ -#define __USER_CS 0x33 /* 6*8+3 */ -#define __USER32_DS __USER_DS - -#define GDT_ENTRY_TLS 1 -#define GDT_ENTRY_TSS 8 /* needs two entries */ -#define GDT_ENTRY_LDT 10 /* needs two entries */ -#define GDT_ENTRY_TLS_MIN 12 -#define GDT_ENTRY_TLS_MAX 14 -/* 15 free */ - -#define GDT_ENTRY_TLS_ENTRIES 3 - -/* TLS indexes for 64bit - hardcoded in arch_prctl */ -#define FS_TLS 0 -#define GS_TLS 1 - -#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) -#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) - -#define IDT_ENTRIES 256 -#define GDT_ENTRIES 16 -#define GDT_SIZE (GDT_ENTRIES * 8) -#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) - -#endif diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc3/i386-mach-io-check-nmi.patch --- a/patches/linux-2.6.16-rc3/i386-mach-io-check-nmi.patch Fri Feb 24 21:03:07 2006 +++ /dev/null Fri Feb 24 22:41:08 2006 @@ -1,45 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c 2006-02-15 20:38:51.000000000 +0000 -+++ ./arch/i386/kernel/traps.c 2006-02-15 20:40:43.000000000 +0000 -@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch - - static void io_check_error(unsigned char reason, struct pt_regs * regs) - { -- unsigned long i; -- - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); - show_registers(regs); - - /* Re-enable the IOCK line, wait for a few seconds */ -- reason = (reason & 0xf) | 8; -- outb(reason, 0x61); -- i = 2000; -- while (--i) udelay(1000); -- reason &= ~8; -- outb(reason, 0x61); -+ clear_io_check_error(reason); - } - - static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/mach-default/mach_traps.h 2006-02-15 20:40:43.000000000 +0000 -@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig - outb(reason, 0x61); - } - -+static inline void clear_io_check_error(unsigned char reason) -+{ -+ unsigned long i; -+ -+ reason = (reason & 0xf) | 8; -+ outb(reason, 0x61); -+ i = 2000; -+ while (--i) udelay(1000); -+ reason &= ~8; -+ outb(reason, 0x61); -+} -+ - static inline unsigned char get_nmi_reason(void) - { - return inb(0x61); diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc3/net-csum.patch --- a/patches/linux-2.6.16-rc3/net-csum.patch Fri Feb 24 21:03:07 2006 +++ /dev/null Fri Feb 24 22:41:08 2006 @@ -1,41 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c ---- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-02 17:39:51.000000000 +0000 -+++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-02 17:44:18.000000000 +0000 -@@ -129,10 +129,14 @@ tcp_manip_pkt(struct sk_buff **pskb, - if (hdrsize < sizeof(*hdr)) - return 1; - -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+ if ((*pskb)->proto_csum_blank) { -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ } else { -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); -+ } - return 1; - } - -diff -pruN ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c ---- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-02 17:39:51.000000000 +0000 -+++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-02 17:44:18.000000000 +0000 -@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } -- if (hdr->check) /* 0 is a special case meaning no checksum */ -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+ if (hdr->check) { /* 0 is a special case meaning no checksum */ -+ if ((*pskb)->proto_csum_blank) { -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ } else { -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); -+ } -+ } - *portptr = newport; - return 1; - } diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc3/pmd-shared.patch --- a/patches/linux-2.6.16-rc3/pmd-shared.patch Fri Feb 24 21:03:07 2006 +++ /dev/null Fri Feb 24 22:41:08 2006 @@ -1,111 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c ---- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c 2006-02-02 17:39:29.000000000 +0000 -+++ ./arch/i386/mm/pageattr.c 2006-02-02 17:45:14.000000000 +0000 -@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns - unsigned long flags; - - set_pte_atomic(kpte, pte); /* change init_mm */ -- if (PTRS_PER_PMD > 1) -+ if (HAVE_SHARED_KERNEL_PMD) - return; - - spin_lock_irqsave(&pgd_lock, flags); -diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c ---- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c 2006-01-03 03:21:10.000000000 +0000 -+++ ./arch/i386/mm/pgtable.c 2006-02-02 17:45:14.000000000 +0000 -@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c - spin_lock_irqsave(&pgd_lock, flags); - } - -- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -- swapper_pg_dir + USER_PTRS_PER_PGD, -- KERNEL_PGD_PTRS); -+ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD) -+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -+ swapper_pg_dir + USER_PTRS_PER_PGD, -+ KERNEL_PGD_PTRS); - if (PTRS_PER_PMD > 1) - return; - -@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm) - goto out_oom; - set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); - } -+ -+ if (!HAVE_SHARED_KERNEL_PMD) { -+ unsigned long flags; -+ -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); -+ if (!pmd) -+ goto out_oom; -+ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); -+ } -+ -+ spin_lock_irqsave(&pgd_lock, flags); -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ unsigned long v = (unsigned long)i << PGDIR_SHIFT; -+ pgd_t *kpgd = pgd_offset_k(v); -+ pud_t *kpud = pud_offset(kpgd, v); -+ pmd_t *kpmd = pmd_offset(kpud, v); -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ memcpy(pmd, kpmd, PAGE_SIZE); -+ } -+ pgd_list_add(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ } -+ - return pgd; - - out_oom: -@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd) - int i; - - /* in the PAE case user pgd entries are overwritten before usage */ -- if (PTRS_PER_PMD > 1) -- for (i = 0; i < USER_PTRS_PER_PGD; ++i) -- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); -+ if (PTRS_PER_PMD > 1) { -+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ kmem_cache_free(pmd_cache, pmd); -+ } -+ if (!HAVE_SHARED_KERNEL_PMD) { -+ unsigned long flags; -+ spin_lock_irqsave(&pgd_lock, flags); -+ pgd_list_del(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); -+ kmem_cache_free(pmd_cache, pmd); -+ } -+ } -+ } - /* in the non-PAE case, free_pgtables() clears user pgd entries */ - kmem_cache_free(pgd_cache, pgd); - } -diff -pruN ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h ---- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/pgtable-2level-defs.h 2006-02-02 17:45:14.000000000 +0000 -@@ -1,6 +1,8 @@ - #ifndef _I386_PGTABLE_2LEVEL_DEFS_H - #define _I386_PGTABLE_2LEVEL_DEFS_H - -+#define HAVE_SHARED_KERNEL_PMD 0 -+ - /* - * traditional i386 two-level paging structure: - */ -diff -pruN ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h ---- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/pgtable-3level-defs.h 2006-02-02 17:45:14.000000000 +0000 -@@ -1,6 +1,8 @@ - #ifndef _I386_PGTABLE_3LEVEL_DEFS_H - #define _I386_PGTABLE_3LEVEL_DEFS_H - -+#define HAVE_SHARED_KERNEL_PMD 1 -+ - /* - * PGDIR_SHIFT determines what a top-level page table entry can map - */ diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc3/smp-alts.patch --- a/patches/linux-2.6.16-rc3/smp-alts.patch Fri Feb 24 21:03:07 2006 +++ /dev/null Fri Feb 24 22:41:08 2006 @@ -1,591 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig ./arch/i386/Kconfig ---- ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig 2006-02-15 20:38:51.000000000 +0000 -+++ ./arch/i386/Kconfig 2006-02-15 20:45:57.000000000 +0000 -@@ -202,6 +202,19 @@ config SMP - - If you don't know what to do here, say N. - -+config SMP_ALTERNATIVES -+ bool "SMP alternatives support (EXPERIMENTAL)" -+ depends on SMP && EXPERIMENTAL -+ help -+ Try to reduce the overhead of running an SMP kernel on a uniprocessor -+ host slightly by replacing certain key instruction sequences -+ according to whether we currently have more than one CPU available. -+ This should provide a noticeable boost to performance when -+ running SMP kernels on UP machines, and have negligible impact -+ when running on an true SMP host. -+ -+ If unsure, say N. -+ - config NR_CPUS - int "Maximum number of CPUs (2-255)" - range 2 255 -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile ./arch/i386/kernel/Makefile ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile 2006-02-15 20:38:51.000000000 +0000 -+++ ./arch/i386/kernel/Makefile 2006-02-15 20:45:57.000000000 +0000 -@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o - obj-$(CONFIG_DOUBLEFAULT) += doublefault.o - obj-$(CONFIG_VM86) += vm86.o - obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -+obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o - - EXTRA_AFLAGS := -traditional - -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c ./arch/i386/kernel/smpalts.c ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100 -+++ ./arch/i386/kernel/smpalts.c 2006-02-15 20:45:57.000000000 +0000 -@@ -0,0 +1,85 @@ -+#include <linux/kernel.h> -+#include <asm/system.h> -+#include <asm/smp_alt.h> -+#include <asm/processor.h> -+#include <asm/string.h> -+ -+struct smp_replacement_record { -+ unsigned char targ_size; -+ unsigned char smp1_size; -+ unsigned char smp2_size; -+ unsigned char up_size; -+ unsigned char feature; -+ unsigned char data[0]; -+}; -+ -+struct smp_alternative_record { -+ void *targ_start; -+ struct smp_replacement_record *repl; -+}; -+ -+extern struct smp_alternative_record __start_smp_alternatives_table, -+ __stop_smp_alternatives_table; -+extern unsigned long __init_begin, __init_end; -+ -+void prepare_for_smp(void) -+{ -+ struct smp_alternative_record *r; -+ printk(KERN_INFO "Enabling SMP...\n"); -+ for (r = &__start_smp_alternatives_table; -+ r != &__stop_smp_alternatives_table; -+ r++) { -+ BUG_ON(r->repl->targ_size < r->repl->smp1_size); -+ BUG_ON(r->repl->targ_size < r->repl->smp2_size); -+ BUG_ON(r->repl->targ_size < r->repl->up_size); -+ if (system_state == SYSTEM_RUNNING && -+ r->targ_start >= (void *)&__init_begin && -+ r->targ_start < (void *)&__init_end) -+ continue; -+ if (r->repl->feature != (unsigned char)-1 && -+ boot_cpu_has(r->repl->feature)) { -+ memcpy(r->targ_start, -+ r->repl->data + r->repl->smp1_size, -+ r->repl->smp2_size); -+ memset(r->targ_start + r->repl->smp2_size, -+ 0x90, -+ r->repl->targ_size - r->repl->smp2_size); -+ } else { -+ memcpy(r->targ_start, -+ r->repl->data, -+ r->repl->smp1_size); -+ memset(r->targ_start + r->repl->smp1_size, -+ 0x90, -+ r->repl->targ_size - r->repl->smp1_size); -+ } -+ } -+ /* Paranoia */ -+ asm volatile ("jmp 1f\n1:"); -+ mb(); -+} -+ -+void unprepare_for_smp(void) -+{ -+ struct smp_alternative_record *r; -+ printk(KERN_INFO "Disabling SMP...\n"); -+ for (r = &__start_smp_alternatives_table; -+ r != &__stop_smp_alternatives_table; -+ r++) { -+ BUG_ON(r->repl->targ_size < r->repl->smp1_size); -+ BUG_ON(r->repl->targ_size < r->repl->smp2_size); -+ BUG_ON(r->repl->targ_size < r->repl->up_size); -+ if (system_state == SYSTEM_RUNNING && -+ r->targ_start >= (void *)&__init_begin && -+ r->targ_start < (void *)&__init_end) -+ continue; -+ memcpy(r->targ_start, -+ r->repl->data + r->repl->smp1_size + r->repl->smp2_size, -+ r->repl->up_size); -+ memset(r->targ_start + r->repl->up_size, -+ 0x90, -+ r->repl->targ_size - r->repl->up_size); -+ } -+ /* Paranoia */ -+ asm volatile ("jmp 1f\n1:"); -+ mb(); -+} -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c ./arch/i386/kernel/smpboot.c ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c 2006-02-15 20:38:51.000000000 +0000 -+++ ./arch/i386/kernel/smpboot.c 2006-02-15 20:45:57.000000000 +0000 -@@ -1214,6 +1214,11 @@ static void __init smp_boot_cpus(unsigne - if (max_cpus <= cpucount+1) - continue; - -+#ifdef CONFIG_SMP_ALTERNATIVES -+ if (kicked == 1) -+ prepare_for_smp(); -+#endif -+ - if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) - printk("CPU #%d not responding - cannot use it.\n", - apicid); -@@ -1392,6 +1397,11 @@ int __devinit __cpu_up(unsigned int cpu) - return -EIO; - } - -+#ifdef CONFIG_SMP_ALTERNATIVES -+ if (num_online_cpus() == 1) -+ prepare_for_smp(); -+#endif -+ - local_irq_enable(); - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - /* Unleash the CPU! */ -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S 2006-01-03 03:21:10.000000000 +0000 -+++ ./arch/i386/kernel/vmlinux.lds.S 2006-02-15 20:45:57.000000000 +0000 -@@ -34,6 +34,13 @@ SECTIONS - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } - __stop___ex_table = .; - -+ . = ALIGN(16); -+ __start_smp_alternatives_table = .; -+ __smp_alternatives : { *(__smp_alternatives) } -+ __stop_smp_alternatives_table = .; -+ -+ __smp_replacements : { *(__smp_replacements) } -+ - RODATA - - /* writeable */ -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h ./include/asm-i386/atomic.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h 2006-02-15 20:38:57.000000000 +0000 -+++ ./include/asm-i386/atomic.h 2006-02-15 20:45:57.000000000 +0000 -@@ -4,18 +4,13 @@ - #include <linux/config.h> - #include <linux/compiler.h> - #include <asm/processor.h> -+#include <asm/smp_alt.h> - - /* - * Atomic operations that C can't guarantee us. Useful for - * resource counting etc.. - */ - --#ifdef CONFIG_SMP --#define LOCK "lock ; " --#else --#define LOCK "" --#endif -- - /* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h ./include/asm-i386/bitops.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h 2006-02-15 20:38:57.000000000 +0000 -+++ ./include/asm-i386/bitops.h 2006-02-15 20:45:57.000000000 +0000 -@@ -7,6 +7,7 @@ - - #include <linux/config.h> - #include <linux/compiler.h> -+#include <asm/smp_alt.h> - - /* - * These have to be done with inline assembly: that way the bit-setting -@@ -16,12 +17,6 @@ - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). - */ - --#ifdef CONFIG_SMP --#define LOCK_PREFIX "lock ; " --#else --#define LOCK_PREFIX "" --#endif -- - #define ADDR (*(volatile long *) addr) - - /** -@@ -41,7 +36,7 @@ - */ - static inline void set_bit(int nr, volatile unsigned long * addr) - { -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btsl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol - */ - static inline void clear_bit(int nr, volatile unsigned long * addr) - { -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btrl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -@@ -121,7 +116,7 @@ static inline void __change_bit(int nr, - */ - static inline void change_bit(int nr, volatile unsigned long * addr) - { -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btcl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n - { - int oldbit; - -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); -@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int - { - int oldbit; - -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); -@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in - { - int oldbit; - -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h ./include/asm-i386/futex.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h 2006-02-15 20:38:57.000000000 +0000 -+++ ./include/asm-i386/futex.h 2006-02-15 20:45:57.000000000 +0000 -@@ -28,7 +28,7 @@ - "1: movl %2, %0\n\ - movl %0, %3\n" \ - insn "\n" \ --"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ -+"2: " LOCK "cmpxchgl %3, %2\n\ - jnz 1b\n\ - 3: .section .fixup,\"ax\"\n\ - 4: mov %5, %1\n\ -@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op, - #endif - switch (op) { - case FUTEX_OP_ADD: -- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, -+ __futex_atomic_op1(LOCK "xaddl %0, %2", ret, - oldval, uaddr, oparg); - break; - case FUTEX_OP_OR: -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h ./include/asm-i386/rwsem.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/rwsem.h 2006-02-15 20:45:57.000000000 +0000 -@@ -40,6 +40,7 @@ - - #include <linux/list.h> - #include <linux/spinlock.h> -+#include <asm/smp_alt.h> - - struct rwsem_waiter; - -@@ -99,7 +100,7 @@ static inline void __down_read(struct rw - { - __asm__ __volatile__( - "# beginning down_read\n\t" --LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ -+LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ - " js 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st - " movl %1,%2\n\t" - " addl %3,%2\n\t" - " jle 2f\n\t" --LOCK_PREFIX " cmpxchgl %2,%0\n\t" -+LOCK " cmpxchgl %2,%0\n\t" - " jnz 1b\n\t" - "2:\n\t" - "# ending __down_read_trylock\n\t" -@@ -150,7 +151,7 @@ static inline void __down_write(struct r - tmp = RWSEM_ACTIVE_WRITE_BIAS; - __asm__ __volatile__( - "# beginning down_write\n\t" --LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ -+LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ - " testl %%edx,%%edx\n\t" /* was the count 0 before? */ - " jnz 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" -@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s - __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; - __asm__ __volatile__( - "# beginning __up_read\n\t" --LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ -+LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_ - __asm__ __volatile__( - "# beginning __up_write\n\t" - " movl %2,%%edx\n\t" --LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ -+LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ - " jnz 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -239,7 +240,7 @@ static inline void __downgrade_write(str - { - __asm__ __volatile__( - "# beginning __downgrade_write\n\t" --LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ -+LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t" - static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) - { - __asm__ __volatile__( --LOCK_PREFIX "addl %1,%0" -+LOCK "addl %1,%0" - : "=m"(sem->count) - : "ir"(delta), "m"(sem->count)); - } -@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in - int tmp = delta; - - __asm__ __volatile__( --LOCK_PREFIX "xadd %0,(%2)" -+LOCK "xadd %0,(%2)" - : "+r"(tmp), "=m"(sem->count) - : "r"(sem), "m"(sem->count) - : "memory"); -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h ./include/asm-i386/smp_alt.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100 -+++ ./include/asm-i386/smp_alt.h 2006-02-15 20:45:57.000000000 +0000 -@@ -0,0 +1,32 @@ -+#ifndef __ASM_SMP_ALT_H__ -+#define __ASM_SMP_ALT_H__ -+ -+#include <linux/config.h> -+ -+#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) -+#define LOCK \ -+ "6677: nop\n" \ -+ ".section __smp_alternatives,\"a\"\n" \ -+ ".long 6677b\n" \ -+ ".long 6678f\n" \ -+ ".previous\n" \ -+ ".section __smp_replacements,\"a\"\n" \ -+ "6678: .byte 1\n" \ -+ ".byte 1\n" \ -+ ".byte 0\n" \ -+ ".byte 1\n" \ -+ ".byte -1\n" \ -+ "lock\n" \ -+ "nop\n" \ -+ ".previous\n" -+void prepare_for_smp(void); -+void unprepare_for_smp(void); -+#else -+#define LOCK "lock ; " -+#endif -+#else -+#define LOCK "" -+#endif -+ -+#endif /* __ASM_SMP_ALT_H__ */ -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h ./include/asm-i386/spinlock.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/spinlock.h 2006-02-15 20:45:57.000000000 +0000 -@@ -6,6 +6,7 @@ - #include <asm/page.h> - #include <linux/config.h> - #include <linux/compiler.h> -+#include <asm/smp_alt.h> - - /* - * Your basic SMP spinlocks, allowing only a single CPU anywhere -@@ -23,7 +24,8 @@ - - #define __raw_spin_lock_string \ - "\n1:\t" \ -- "lock ; decb %0\n\t" \ -+ LOCK \ -+ "decb %0\n\t" \ - "jns 3f\n" \ - "2:\t" \ - "rep;nop\n\t" \ -@@ -34,7 +36,8 @@ - - #define __raw_spin_lock_string_flags \ - "\n1:\t" \ -- "lock ; decb %0\n\t" \ -+ LOCK \ -+ "decb %0\n\t" \ - "jns 4f\n\t" \ - "2:\t" \ - "testl $0x200, %1\n\t" \ -@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags - static inline int __raw_spin_trylock(raw_spinlock_t *lock) - { - char oldval; -+#ifdef CONFIG_SMP_ALTERNATIVES - __asm__ __volatile__( -- "xchgb %b0,%1" -+ "1:movb %1,%b0\n" -+ "movb $0,%1\n" -+ "2:" -+ ".section __smp_alternatives,\"a\"\n" -+ ".long 1b\n" -+ ".long 3f\n" -+ ".previous\n" -+ ".section __smp_replacements,\"a\"\n" -+ "3: .byte 2b - 1b\n" -+ ".byte 5f-4f\n" -+ ".byte 0\n" -+ ".byte 6f-5f\n" -+ ".byte -1\n" -+ "4: xchgb %b0,%1\n" -+ "5: movb %1,%b0\n" -+ "movb $0,%1\n" -+ "6:\n" -+ ".previous\n" - :"=q" (oldval), "=m" (lock->slock) - :"0" (0) : "memory"); -+#else -+ __asm__ __volatile__( -+ "xchgb %b0,%1\n" -+ :"=q" (oldval), "=m" (lock->slock) -+ :"0" (0) : "memory"); -+#endif - return oldval > 0; - } - -@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra - - static inline void __raw_read_unlock(raw_rwlock_t *rw) - { -- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory"); -+ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory"); - } - - static inline void __raw_write_unlock(raw_rwlock_t *rw) - { -- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0" -+ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0" - : "=m" (rw->lock) : : "memory"); - } - -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h ./include/asm-i386/system.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h 2006-02-15 20:38:57.000000000 +0000 -+++ ./include/asm-i386/system.h 2006-02-15 20:45:57.000000000 +0000 -@@ -5,7 +5,7 @@ - #include <linux/kernel.h> - #include <asm/segment.h> - #include <asm/cpufeature.h> --#include <linux/bitops.h> /* for LOCK_PREFIX */ -+#include <asm/smp_alt.h> - - #ifdef __KERNEL__ - -@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo - unsigned long prev; - switch (size) { - case 1: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" -+ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" -+ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" -+ __asm__ __volatile__(LOCK "cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); -@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc - unsigned long long new) - { - unsigned long long prev; -- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" -+ __asm__ __volatile__(LOCK "cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), -@@ -503,11 +503,55 @@ struct alt_instr { - #endif - - #ifdef CONFIG_SMP -+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) -+#define smp_alt_mb(instr) \ -+__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \ -+ ".section __smp_alternatives,\"a\"\n" \ -+ ".long 6667b\n" \ -+ ".long 6673f\n" \ -+ ".previous\n" \ -+ ".section __smp_replacements,\"a\"\n" \ -+ "6673:.byte 6668b-6667b\n" \ -+ ".byte 6670f-6669f\n" \ -+ ".byte 6671f-6670f\n" \ -+ ".byte 0\n" \ -+ ".byte %c0\n" \ -+ "6669:lock;addl $0,0(%%esp)\n" \ -+ "6670:" instr "\n" \ -+ "6671:\n" \ -+ ".previous\n" \ -+ : \ -+ : "i" (X86_FEATURE_XMM2) \ -+ : "memory") -+#define smp_rmb() smp_alt_mb("lfence") -+#define smp_mb() smp_alt_mb("mfence") -+#define set_mb(var, value) do { \ -+unsigned long __set_mb_temp; \ -+__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \ -+ ".section __smp_alternatives,\"a\"\n" \ -+ ".long 6667b\n" \ -+ ".long 6673f\n" \ -+ ".previous\n" \ -+ ".section __smp_replacements,\"a\"\n" \ -+ "6673: .byte 6668b-6667b\n" \ -+ ".byte 6670f-6669f\n" \ -+ ".byte 0\n" \ -+ ".byte 6671f-6670f\n" \ -+ ".byte -1\n" \ -+ "6669: xchg %1, %0\n" \ -+ "6670:movl %1, %0\n" \ -+ "6671:\n" \ -+ ".previous\n" \ -+ : "=m" (var), "=r" (__set_mb_temp) \ -+ : "1" (value) \ -+ : "memory"); } while (0) -+#else - #define smp_mb() mb() - #define smp_rmb() rmb() -+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) -+#endif - #define smp_wmb() wmb() - #define smp_read_barrier_depends() read_barrier_depends() --#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) - #else - #define smp_mb() barrier() - #define smp_rmb() barrier() _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |