[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] The attached patch now allows x86_64 xenlinux to run 32-bit x86 binaries



ChangeSet 1.1732, 2005/06/23 10:24:21+01:00, kaf24@xxxxxxxxxxxxxxxxxxxx

        The attached patch now allows x86_64 xenlinux to run 32-bit x86 binaries
        as the native x86_64 Linux does. I checked LTP using 32-bit binaries,
        and got the same results on the native x86_64 Linux. At this point, 'int
        0x80' is used for system calls, as it's unlikely that the fast systems
        (sysenter or syscall) are significantly faster under the current
        trampoline mechanism. However, it should be easy to replace
        vsyscall-int80 with the fast system calls if one wants to do so (look at
        USE_INT80 in the patch).
        
        I found bugs with LDT handling in x86_64 Xen/XenLinux (exposed by 32-bit
        LTP testcases fork05 and modify_ldt02), and the bugs have been fixed by
        a separate patch in the next email:=20
        [PATCH] [x86_64] Fixing LDT handling with x86_64 Xen
        
        Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>



 configs/xen0_defconfig_x86_64 |    4 
 configs/xenU_defconfig_x86_64 |    4 
 x86_64/ia32/Makefile          |   58 +++
 x86_64/ia32/ia32entry.S       |  629 ++++++++++++++++++++++++++++++++++++++++++
 x86_64/ia32/syscall32.c       |  143 +++++++++
 x86_64/ia32/vsyscall-int80.S  |   57 +++
 x86_64/kernel/entry.S         |   41 --
 x86_64/kernel/traps.c         |    3 
 x86_64/kernel/xen_entry.S     |   38 ++
 x86_64/mm/fault.c             |    1 
 10 files changed, 935 insertions(+), 43 deletions(-)


diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 
b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
--- a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64    
2005-06-23 07:05:39 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64    
2005-06-23 07:05:39 -04:00
@@ -133,8 +133,8 @@
 #
 # Executable file formats / Emulations
 #
-# CONFIG_IA32_EMULATION is not set
-
+CONFIG_IA32_EMULATION=y
+# CONFIG_IA32_AOUT is not set
 #
 # Executable file formats
 #
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 
b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
--- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64    
2005-06-23 07:05:39 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64    
2005-06-23 07:05:39 -04:00
@@ -128,8 +128,8 @@
 #
 # Executable file formats / Emulations
 #
-# CONFIG_IA32_EMULATION is not set
-
+CONFIG_IA32_EMULATION=y
+# CONFIG_IA32_AOUT is not set
 #
 # Executable file formats
 #
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/Makefile 
b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/Makefile
--- /dev/null   Wed Dec 31 16:00:00 196900
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/Makefile     2005-06-23 
07:05:39 -04:00
@@ -0,0 +1,58 @@
+#
+# Makefile for the ia32 kernel emulation subsystem.
+#
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
+
+CFLAGS += -Iarch/$(XENARCH)/kernel
+
+obj-$(CONFIG_IA32_EMULATION) := ia32entry.o syscall32.o
+
+c-obj-$(CONFIG_IA32_EMULATION) := sys_ia32.o ia32_ioctl.o \
+       ia32_signal.o tls32.o \
+       ia32_binfmt.o fpu32.o ptrace32.o 
+
+s-obj-y :=
+
+sysv-$(CONFIG_SYSVIPC) := ipc32.o
+c-obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
+
+c-obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
+
+$(obj)/syscall32.o: $(src)/syscall32.c \
+       $(foreach F,int80 sysenter syscall,$(obj)/vsyscall-$F.so)
+
+# Teach kbuild about targets
+targets := $(foreach F,int80 sysenter syscall,vsyscall-$F.o vsyscall-$F.so)
+
+# The DSO images are built using a special linker script
+quiet_cmd_syscall = SYSCALL $@
+      cmd_syscall = $(CC) -m32 -nostdlib -shared -s \
+                          -Wl,-soname=linux-gate.so.1 -o $@ \
+                          -Wl,-T,$(filter-out FORCE,$^)
+
+
+$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so 
$(obj)/vsyscall-syscall.so: \
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(src)/vsyscall-%.o FORCE
+       $(call if_changed,syscall)
+
+AFLAGS_vsyscall-int80.o = -m32
+AFLAGS_vsyscall-sysenter.o = -m32
+AFLAGS_vsyscall-syscall.o = -m32
+CFLAGS_ia32_ioctl.o += -Ifs/
+
+s-link := vsyscall-syscall.o vsyscall-sysenter.o vsyscall-sigreturn.o
+
+$(src)/vsyscall.lds:
+       @ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
+       @ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@
+
+$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
+$(obj)/vsyscall-sysenter.S: $(obj)/vsyscall-sigreturn.S
+$(obj)/vsyscall-syscall.S: $(obj)/vsyscall-sigreturn.S
+
+obj-y  += $(c-obj-y) $(s-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
+clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S 
b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S
--- /dev/null   Wed Dec 31 16:00:00 196900
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S  2005-06-23 
07:05:39 -04:00
@@ -0,0 +1,629 @@
+/*
+ * Compatibility mode system call entry point for x86-64. 
+ *             
+ * Copyright 2000-2002 Andi Kleen, SuSE Labs.
+ */             
+
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/offset.h>
+#include <asm/current.h>
+#include <asm/errno.h>
+#include <asm/ia32_unistd.h>   
+#include <asm/thread_info.h>   
+#include <asm/segment.h>
+#include <asm/vsyscall32.h>
+#include <linux/linkage.h>
+
+#define __XEN_X86_64 1
+       
+       .macro IA32_ARG_FIXUP noebp=0
+       movl    %edi,%r8d
+       .if \noebp
+       .else
+       movl    %ebp,%r9d
+       .endif
+       xchg    %ecx,%esi
+       movl    %ebx,%edi
+       movl    %edx,%edx       /* zero extension */
+       .endm 
+
+       /* clobbers %eax */     
+       .macro  CLEAR_RREGS
+       xorl    %eax,%eax
+       movq    %rax,R11(%rsp)
+       movq    %rax,R10(%rsp)
+       movq    %rax,R9(%rsp)
+       movq    %rax,R8(%rsp)
+       .endm
+
+#if defined (__XEN_X86_64)
+#include "../kernel/xen_entry.S"
+               
+#define        __swapgs
+#define __cli
+#define __sti  
+#else
+/*
+ * Use the native instructions
+ */    
+#define        __swapgs        swapgs
+#define __cli          cli
+#define __sti          sti     
+#endif                 
+
+/*
+ * 32bit SYSENTER instruction entry.
+ *
+ * Arguments:
+ * %eax        System call number.
+ * %ebx Arg1
+ * %ecx Arg2
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp user stack
+ * 0(%ebp) Arg6        
+ *     
+ * Interrupts off.
+ *     
+ * This is purely a fast path. For anything complicated we use the int 0x80
+ * path below. Set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */    
+ENTRY(ia32_sysenter_target)
+       CFI_STARTPROC
+       __swapgs 
+       movq    %gs:pda_kernelstack, %rsp
+       addq    $(PDA_STACKOFFSET),%rsp
+       XEN_UNBLOCK_EVENTS(%r11)        
+       __sti
+       movl    %ebp,%ebp               /* zero extension */
+       pushq   $__USER32_DS
+       pushq   %rbp
+       pushfq
+       movl    $VSYSCALL32_SYSEXIT, %r10d
+       pushq   $__USER32_CS
+       movl    %eax, %eax
+       pushq   %r10
+       pushq   %rax
+       cld
+       SAVE_ARGS 0,0,1
+       /* no need to do an access_ok check here because rbp has been
+          32bit zero extended */ 
+1:     movl    (%rbp),%r9d
+       .section __ex_table,"a"
+       .quad 1b,ia32_badarg
+       .previous       
+       GET_THREAD_INFO(%r10)
+       testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+       jnz  sysenter_tracesys
+sysenter_do_call:      
+       cmpl    $(IA32_NR_syscalls),%eax
+       jae     ia32_badsys
+       IA32_ARG_FIXUP 1
+       call    *ia32_sys_call_table(,%rax,8)
+       movq    %rax,RAX-ARGOFFSET(%rsp)
+       GET_THREAD_INFO(%r10)
+       XEN_BLOCK_EVENTS(%r11)  
+       __cli
+       testl   $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
+       jnz     int_ret_from_sys_call
+       /* clear IF, that popfq doesn't enable interrupts early */
+       andl  $~0x200,EFLAGS-R11(%rsp) 
+       RESTORE_ARGS 1,24,1,1,1,1
+       popfq
+       popq    %rcx                            /* User %esp */
+       movl    $VSYSCALL32_SYSEXIT,%edx        /* User %eip */
+       __swapgs
+       XEN_UNBLOCK_EVENTS(%r11)                
+       __sti           /* sti only takes effect after the next instruction */
+       /* sysexit */
+       .byte   0xf, 0x35  /* TBD */
+
+sysenter_tracesys:
+       SAVE_REST
+       CLEAR_RREGS
+       movq    $-ENOSYS,RAX(%rsp)      /* really needed? */
+       movq    %rsp,%rdi        /* &pt_regs -> arg1 */
+       call    syscall_trace_enter
+       LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed 
it */
+       RESTORE_REST
+       movl    %ebp, %ebp
+       /* no need to do an access_ok check here because rbp has been
+          32bit zero extended */ 
+1:     movl    (%rbp),%r9d
+       .section __ex_table,"a"
+       .quad 1b,ia32_badarg
+       .previous
+       jmp     sysenter_do_call
+       CFI_ENDPROC
+
+/*
+ * 32bit SYSCALL instruction entry.
+ *
+ * Arguments:
+ * %eax        System call number.
+ * %ebx Arg1
+ * %ecx return EIP 
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp Arg2    [note: not saved in the stack frame, should not be touched]
+ * %esp user stack 
+ * 0(%esp) Arg6
+ *     
+ * Interrupts off.
+ *     
+ * This is purely a fast path. For anything complicated we use the int 0x80
+ * path below. Set up a complete hardware stack frame to share code
+ * with the int 0x80 path.     
+ */    
+ENTRY(ia32_cstar_target)
+       CFI_STARTPROC
+       __swapgs
+       movl    %esp,%r8d
+       movq    %gs:pda_kernelstack,%rsp
+       XEN_UNBLOCK_EVENTS(%r11)        
+       __sti
+       SAVE_ARGS 8,1,1
+       movl    %eax,%eax       /* zero extension */
+       movq    %rax,ORIG_RAX-ARGOFFSET(%rsp)
+       movq    %rcx,RIP-ARGOFFSET(%rsp)
+       movq    %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+       movl    %ebp,%ecx
+       movq    $__USER32_CS,CS-ARGOFFSET(%rsp)
+       movq    $__USER32_DS,SS-ARGOFFSET(%rsp)
+       movq    %r11,EFLAGS-ARGOFFSET(%rsp)
+       movq    %r8,RSP-ARGOFFSET(%rsp) 
+       /* no need to do an access_ok check here because r8 has been
+          32bit zero extended */ 
+       /* hardware stack frame is complete now */      
+1:     movl    (%r8),%r9d
+       .section __ex_table,"a"
+       .quad 1b,ia32_badarg
+       .previous       
+       GET_THREAD_INFO(%r10)
+       testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+       jnz   cstar_tracesys
+cstar_do_call: 
+       cmpl $IA32_NR_syscalls,%eax
+       jae  ia32_badsys
+       IA32_ARG_FIXUP 1
+       call *ia32_sys_call_table(,%rax,8)
+       movq %rax,RAX-ARGOFFSET(%rsp)
+       GET_THREAD_INFO(%r10)
+       XEN_BLOCK_EVENTS(%r11)          
+       __cli
+       testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
+       jnz  int_ret_from_sys_call
+       RESTORE_ARGS 1,-ARG_SKIP,1,1,1
+       movl RIP-ARGOFFSET(%rsp),%ecx
+       movl EFLAGS-ARGOFFSET(%rsp),%r11d       
+       movl RSP-ARGOFFSET(%rsp),%esp
+       __swapgs
+       sysretl  /* TBD */
+       
+cstar_tracesys:        
+       SAVE_REST
+       CLEAR_RREGS
+       movq $-ENOSYS,RAX(%rsp) /* really needed? */
+       movq %rsp,%rdi        /* &pt_regs -> arg1 */
+       call syscall_trace_enter
+       LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed 
it */
+       RESTORE_REST
+       movl RSP-ARGOFFSET(%rsp), %r8d
+       /* no need to do an access_ok check here because r8 has been
+          32bit zero extended */ 
+1:     movl    (%r8),%r9d
+       .section __ex_table,"a"
+       .quad 1b,ia32_badarg
+       .previous
+       jmp cstar_do_call
+                               
+ia32_badarg:
+       movq $-EFAULT,%rax
+       jmp ia32_sysret
+       CFI_ENDPROC
+
+/* 
+ * Emulated IA32 system calls via int 0x80. 
+ *
+ * Arguments:   
+ * %eax        System call number.
+ * %ebx Arg1
+ * %ecx Arg2
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp Arg6    [note: not saved in the stack frame, should not be touched]
+ *
+ * Notes:
+ * Uses the same stack frame as the x86-64 version.    
+ * All registers except %eax must be saved (but ptrace may violate that)
+ * Arguments are zero extended. For system calls that want sign extension and
+ * take long arguments a wrapper is needed. Most calls can just be called
+ * directly.
+ * Assumes it is only called from user space and entered with interrupts off.  
+ */                            
+
+ENTRY(ia32_syscall)
+       CFI_STARTPROC
+       __swapgs
+       XEN_UNBLOCK_EVENTS(%r11)
+       __sti
+       movq (%rsp),%rcx
+       movq 8(%rsp),%r11
+        addq $0x10,%rsp /* skip rcx and r11 */
+       movl %eax,%eax
+       pushq %rax
+       cld
+/* 1:  jmp 1b   */
+       /* note the registers are not zero extended to the sf.
+          this could be a problem. */
+       SAVE_ARGS 0,0,1
+       GET_THREAD_INFO(%r10)
+       testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
+       jnz ia32_tracesys
+ia32_do_syscall:       
+       cmpl $(IA32_NR_syscalls),%eax
+       jae  ia32_badsys
+       IA32_ARG_FIXUP
+       call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
+ia32_sysret:
+       movq %rax,RAX-ARGOFFSET(%rsp)
+       jmp int_ret_from_sys_call 
+
+ia32_tracesys:                  
+       SAVE_REST
+       movq $-ENOSYS,RAX(%rsp) /* really needed? */
+       movq %rsp,%rdi        /* &pt_regs -> arg1 */
+       call syscall_trace_enter
+       LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed 
it */
+       RESTORE_REST
+       jmp ia32_do_syscall
+
+ia32_badsys:
+       movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+       movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
+       jmp int_ret_from_sys_call
+
+ni_syscall:
+       movq %rax,%rdi
+       jmp  sys32_ni_syscall                   
+
+quiet_ni_syscall:
+       movq $-ENOSYS,%rax
+       ret
+       CFI_ENDPROC
+       
+       .macro PTREGSCALL label, func, arg
+       .globl \label
+\label:
+       leaq \func(%rip),%rax
+       leaq -ARGOFFSET+8(%rsp),\arg    /* 8 for return address */
+       jmp  ia32_ptregs_common 
+       .endm
+
+       PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
+       PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
+       PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
+       PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
+       PTREGSCALL stub32_execve, sys32_execve, %rcx
+       PTREGSCALL stub32_fork, sys_fork, %rdi
+       PTREGSCALL stub32_clone, sys32_clone, %rdx
+       PTREGSCALL stub32_vfork, sys_vfork, %rdi
+       PTREGSCALL stub32_iopl, sys_iopl, %rsi
+       PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
+
+ENTRY(ia32_ptregs_common)
+       CFI_STARTPROC
+       popq %r11
+       SAVE_REST
+       call *%rax
+       RESTORE_REST
+       jmp  ia32_sysret        /* misbalances the return cache */
+       CFI_ENDPROC
+
+       .data
+       .align 8
+       .globl ia32_sys_call_table
+ia32_sys_call_table:
+       .quad sys_restart_syscall
+       .quad sys_exit
+       .quad stub32_fork
+       .quad sys_read
+       .quad sys_write
+       .quad sys32_open                /* 5 */
+       .quad sys_close
+       .quad sys32_waitpid
+       .quad sys_creat
+       .quad sys_link
+       .quad sys_unlink                /* 10 */
+       .quad stub32_execve
+       .quad sys_chdir
+       .quad compat_sys_time
+       .quad sys_mknod
+       .quad sys_chmod         /* 15 */
+       .quad sys_lchown16
+       .quad quiet_ni_syscall                  /* old break syscall holder */
+       .quad sys_stat
+       .quad sys32_lseek
+       .quad sys_getpid                /* 20 */
+       .quad compat_sys_mount  /* mount  */
+       .quad sys_oldumount     /* old_umount  */
+       .quad sys_setuid16
+       .quad sys_getuid16
+       .quad compat_sys_stime  /* stime */             /* 25 */
+       .quad sys32_ptrace      /* ptrace */
+       .quad sys_alarm
+       .quad sys_fstat /* (old)fstat */
+       .quad sys_pause
+       .quad compat_sys_utime  /* 30 */
+       .quad quiet_ni_syscall  /* old stty syscall holder */
+       .quad quiet_ni_syscall  /* old gtty syscall holder */
+       .quad sys_access
+       .quad sys_nice  
+       .quad quiet_ni_syscall  /* 35 */        /* old ftime syscall holder */
+       .quad sys_sync
+       .quad sys32_kill
+       .quad sys_rename
+       .quad sys_mkdir
+       .quad sys_rmdir         /* 40 */
+       .quad sys_dup
+       .quad sys32_pipe
+       .quad compat_sys_times
+       .quad quiet_ni_syscall                  /* old prof syscall holder */
+       .quad sys_brk           /* 45 */
+       .quad sys_setgid16
+       .quad sys_getgid16
+       .quad sys_signal
+       .quad sys_geteuid16
+       .quad sys_getegid16     /* 50 */
+       .quad sys_acct
+       .quad sys_umount                        /* new_umount */
+       .quad quiet_ni_syscall                  /* old lock syscall holder */
+       .quad compat_sys_ioctl
+       .quad compat_sys_fcntl64                /* 55 */
+       .quad quiet_ni_syscall                  /* old mpx syscall holder */
+       .quad sys_setpgid
+       .quad quiet_ni_syscall                  /* old ulimit syscall holder */
+       .quad sys32_olduname
+       .quad sys_umask         /* 60 */
+       .quad sys_chroot
+       .quad sys32_ustat
+       .quad sys_dup2
+       .quad sys_getppid
+       .quad sys_getpgrp               /* 65 */
+       .quad sys_setsid
+       .quad sys32_sigaction
+       .quad sys_sgetmask
+       .quad sys_ssetmask
+       .quad sys_setreuid16    /* 70 */
+       .quad sys_setregid16
+       .quad stub32_sigsuspend
+       .quad compat_sys_sigpending
+       .quad sys_sethostname
+       .quad compat_sys_setrlimit      /* 75 */
+       .quad compat_sys_old_getrlimit  /* old_getrlimit */
+       .quad compat_sys_getrusage
+       .quad sys32_gettimeofday
+       .quad sys32_settimeofday
+       .quad sys_getgroups16   /* 80 */
+       .quad sys_setgroups16
+       .quad sys32_old_select
+       .quad sys_symlink
+       .quad sys_lstat
+       .quad sys_readlink              /* 85 */
+#ifdef CONFIG_IA32_AOUT
+       .quad sys_uselib
+#else
+       .quad quiet_ni_syscall
+#endif
+       .quad sys_swapon
+       .quad sys_reboot
+       .quad compat_sys_old_readdir
+       .quad sys32_mmap                /* 90 */
+       .quad sys_munmap
+       .quad sys_truncate
+       .quad sys_ftruncate
+       .quad sys_fchmod
+       .quad sys_fchown16              /* 95 */
+       .quad sys_getpriority
+       .quad sys_setpriority
+       .quad quiet_ni_syscall                  /* old profil syscall holder */
+       .quad compat_sys_statfs
+       .quad compat_sys_fstatfs                /* 100 */
+       .quad sys_ioperm
+       .quad compat_sys_socketcall
+       .quad sys_syslog
+       .quad compat_sys_setitimer
+       .quad compat_sys_getitimer      /* 105 */
+       .quad compat_sys_newstat
+       .quad compat_sys_newlstat
+       .quad compat_sys_newfstat
+       .quad sys32_uname
+       .quad stub32_iopl               /* 110 */
+       .quad sys_vhangup
+       .quad quiet_ni_syscall  /* old "idle" system call */
+       .quad sys32_vm86_warning        /* vm86old */ 
+       .quad compat_sys_wait4
+       .quad sys_swapoff               /* 115 */
+       .quad sys32_sysinfo
+       .quad sys32_ipc
+       .quad sys_fsync
+       .quad stub32_sigreturn
+       .quad stub32_clone              /* 120 */
+       .quad sys_setdomainname
+       .quad sys_uname
+       .quad sys_modify_ldt
+       .quad sys32_adjtimex
+       .quad sys32_mprotect            /* 125 */
+       .quad compat_sys_sigprocmask
+       .quad quiet_ni_syscall          /* create_module */
+       .quad sys_init_module
+       .quad sys_delete_module
+       .quad quiet_ni_syscall          /* 130  get_kernel_syms */
+       .quad sys_quotactl
+       .quad sys_getpgid
+       .quad sys_fchdir
+       .quad quiet_ni_syscall  /* bdflush */
+       .quad sys_sysfs         /* 135 */
+       .quad sys_personality
+       .quad quiet_ni_syscall  /* for afs_syscall */
+       .quad sys_setfsuid16
+       .quad sys_setfsgid16
+       .quad sys_llseek                /* 140 */
+       .quad compat_sys_getdents
+       .quad compat_sys_select
+       .quad sys_flock
+       .quad sys_msync
+       .quad compat_sys_readv          /* 145 */
+       .quad compat_sys_writev
+       .quad sys_getsid
+       .quad sys_fdatasync
+       .quad sys32_sysctl      /* sysctl */
+       .quad sys_mlock         /* 150 */
+       .quad sys_munlock
+       .quad sys_mlockall
+       .quad sys_munlockall
+       .quad sys_sched_setparam
+       .quad sys_sched_getparam   /* 155 */
+       .quad sys_sched_setscheduler
+       .quad sys_sched_getscheduler
+       .quad sys_sched_yield
+       .quad sys_sched_get_priority_max
+       .quad sys_sched_get_priority_min  /* 160 */
+       .quad sys_sched_rr_get_interval
+       .quad compat_sys_nanosleep
+       .quad sys_mremap
+       .quad sys_setresuid16
+       .quad sys_getresuid16   /* 165 */
+       .quad sys32_vm86_warning        /* vm86 */ 
+       .quad quiet_ni_syscall  /* query_module */
+       .quad sys_poll
+       .quad compat_sys_nfsservctl
+       .quad sys_setresgid16   /* 170 */
+       .quad sys_getresgid16
+       .quad sys_prctl
+       .quad stub32_rt_sigreturn
+       .quad sys32_rt_sigaction
+       .quad sys32_rt_sigprocmask      /* 175 */
+       .quad sys32_rt_sigpending
+       .quad compat_sys_rt_sigtimedwait
+       .quad sys32_rt_sigqueueinfo
+       .quad stub32_rt_sigsuspend
+       .quad sys32_pread               /* 180 */
+       .quad sys32_pwrite
+       .quad sys_chown16
+       .quad sys_getcwd
+       .quad sys_capget
+       .quad sys_capset
+       .quad stub32_sigaltstack
+       .quad sys32_sendfile
+       .quad quiet_ni_syscall          /* streams1 */
+       .quad quiet_ni_syscall          /* streams2 */
+       .quad stub32_vfork            /* 190 */
+       .quad compat_sys_getrlimit
+       .quad sys32_mmap2
+       .quad sys32_truncate64
+       .quad sys32_ftruncate64
+       .quad sys32_stat64              /* 195 */
+       .quad sys32_lstat64
+       .quad sys32_fstat64
+       .quad sys_lchown
+       .quad sys_getuid
+       .quad sys_getgid                /* 200 */
+       .quad sys_geteuid
+       .quad sys_getegid
+       .quad sys_setreuid
+       .quad sys_setregid
+       .quad sys_getgroups     /* 205 */
+       .quad sys_setgroups
+       .quad sys_fchown
+       .quad sys_setresuid
+       .quad sys_getresuid
+       .quad sys_setresgid     /* 210 */
+       .quad sys_getresgid
+       .quad sys_chown
+       .quad sys_setuid
+       .quad sys_setgid
+       .quad sys_setfsuid              /* 215 */
+       .quad sys_setfsgid
+       .quad sys_pivot_root
+       .quad sys_mincore
+       .quad sys_madvise
+       .quad compat_sys_getdents64     /* 220 getdents64 */
+       .quad compat_sys_fcntl64        
+       .quad quiet_ni_syscall          /* tux */
+       .quad quiet_ni_syscall          /* security */
+       .quad sys_gettid        
+       .quad sys_readahead     /* 225 */
+       .quad sys_setxattr
+       .quad sys_lsetxattr
+       .quad sys_fsetxattr
+       .quad sys_getxattr
+       .quad sys_lgetxattr     /* 230 */
+       .quad sys_fgetxattr
+       .quad sys_listxattr
+       .quad sys_llistxattr
+       .quad sys_flistxattr
+       .quad sys_removexattr   /* 235 */
+       .quad sys_lremovexattr
+       .quad sys_fremovexattr
+       .quad sys_tkill
+       .quad sys_sendfile64 
+       .quad compat_sys_futex          /* 240 */
+       .quad compat_sys_sched_setaffinity
+       .quad compat_sys_sched_getaffinity
+       .quad sys32_set_thread_area
+       .quad sys32_get_thread_area
+       .quad compat_sys_io_setup       /* 245 */
+       .quad sys_io_destroy
+       .quad compat_sys_io_getevents
+       .quad compat_sys_io_submit
+       .quad sys_io_cancel
+       .quad sys_fadvise64             /* 250 */
+       .quad quiet_ni_syscall  /* free_huge_pages */
+       .quad sys_exit_group
+       .quad sys32_lookup_dcookie
+       .quad sys_epoll_create
+       .quad sys_epoll_ctl             /* 255 */
+       .quad sys_epoll_wait
+       .quad sys_remap_file_pages
+       .quad sys_set_tid_address
+       .quad sys32_timer_create
+       .quad compat_sys_timer_settime  /* 260 */
+       .quad compat_sys_timer_gettime
+       .quad sys_timer_getoverrun
+       .quad sys_timer_delete
+       .quad compat_sys_clock_settime
+       .quad compat_sys_clock_gettime  /* 265 */
+       .quad compat_sys_clock_getres
+       .quad compat_sys_clock_nanosleep
+       .quad compat_sys_statfs64
+       .quad compat_sys_fstatfs64
+       .quad sys_tgkill                /* 270 */
+       .quad compat_sys_utimes
+       .quad sys32_fadvise64_64
+       .quad quiet_ni_syscall  /* sys_vserver */
+       .quad sys_mbind
+       .quad compat_sys_get_mempolicy  /* 275 */
+       .quad sys_set_mempolicy
+       .quad compat_sys_mq_open
+       .quad sys_mq_unlink
+       .quad compat_sys_mq_timedsend
+       .quad compat_sys_mq_timedreceive        /* 280 */
+       .quad compat_sys_mq_notify
+       .quad compat_sys_mq_getsetattr
+       .quad quiet_ni_syscall          /* reserved for kexec */
+       .quad sys32_waitid
+       .quad quiet_ni_syscall          /* sys_altroot */
+       .quad sys_add_key
+       .quad sys_request_key
+       .quad sys_keyctl
+       /* don't forget to change IA32_NR_syscalls */
+ia32_syscall_end:              
+       .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
+               .quad ni_syscall
+       .endr
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/syscall32.c 
b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/syscall32.c
--- /dev/null   Wed Dec 31 16:00:00 196900
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/syscall32.c  2005-06-23 
07:05:39 -04:00
@@ -0,0 +1,143 @@
+/* Copyright 2002,2003 Andi Kleen, SuSE Labs */
+
+/* vsyscall handling for 32bit processes. Map a stub page into it 
+   on demand because 32bit cannot reach the kernel's fixmaps */
+
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/stringify.h>
+#include <asm/proto.h>
+#include <asm/tlbflush.h>
+#include <asm/ia32_unistd.h>
+
+#define USE_INT80
+
+#ifdef USE_INT80
+/* 32bit VDSOs mapped into user space. */ 
+asm(".section \".init.data\",\"aw\"\n"
+    "syscall32_int80:\n"
+    ".incbin \"arch/xen/x86_64/ia32/vsyscall-int80.so\"\n"
+    "syscall32_int80_end:\n"
+    "syscall32_syscall:\n"
+    ".incbin \"arch/xen/x86_64/ia32/vsyscall-syscall.so\"\n"
+    "syscall32_syscall_end:\n"
+    "syscall32_sysenter:\n"
+    ".incbin \"arch/xen/x86_64/ia32/vsyscall-sysenter.so\"\n"
+    "syscall32_sysenter_end:\n"
+    ".previous");
+
+extern unsigned char syscall32_int80[], syscall32_int80_end[];
+#else
+/* 32bit VDSOs mapped into user space. */ 
+asm(".section \".init.data\",\"aw\"\n"
+    "syscall32_syscall:\n"
+    ".incbin \"arch/xen/x86_64/ia32/vsyscall-syscall.so\"\n"
+    "syscall32_syscall_end:\n"
+    "syscall32_sysenter:\n"
+    ".incbin \"arch/xen/x86_64/ia32/vsyscall-sysenter.so\"\n"
+    "syscall32_sysenter_end:\n"
+    ".previous");
+
+static int use_sysenter = -1;
+#endif
+
+extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
+extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
+extern int sysctl_vsyscall32;
+
+char *syscall32_page; 
+
+/*
+ * Map the 32bit vsyscall page on demand.
+ *
+ * RED-PEN: This knows too much about high level VM.
+ *
+ * Alternative would be to generate a vma with appropriate backing options
+ * and let it be handled by generic VM.
+ */
+int __map_syscall32(struct mm_struct *mm, unsigned long address)
+{ 
+       pgd_t *pgd;
+       pud_t *pud;
+       pte_t *pte;
+       pmd_t *pmd;
+       int err = -ENOMEM;
+
+       spin_lock(&mm->page_table_lock); 
+       pgd = pgd_offset(mm, address);
+       pud = pud_alloc(mm, pgd, address);
+       if (pud) {
+               pmd = pmd_alloc(mm, pud, address);
+               if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) {
+                       if (pte_none(*pte)) {
+                               set_pte(pte,
+                                       mk_pte(virt_to_page(syscall32_page),
+                                              PAGE_KERNEL_VSYSCALL32));
+                       }
+                       /* Flush only the local CPU. Other CPUs taking a fault
+                          will just end up here again
+                          This probably not needed and just paranoia. */
+                       __flush_tlb_one(address);
+                       err = 0;
+               }
+       }
+       spin_unlock(&mm->page_table_lock);
+       return err;
+}
+
+int map_syscall32(struct mm_struct *mm, unsigned long address)
+{
+       int err;
+       down_read(&mm->mmap_sem);
+       err = __map_syscall32(mm, address);
+       up_read(&mm->mmap_sem);
+       return err;
+}
+
+static int __init init_syscall32(void)
+{ 
+       syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); 
+       if (!syscall32_page) 
+               panic("Cannot allocate syscall32 page"); 
+       SetPageReserved(virt_to_page(syscall32_page));
+
+#ifdef USE_INT80
+       /*
+        * At this point we use int 0x80.
+        */
+       memcpy(syscall32_page, syscall32_int80,
+              syscall32_int80_end - syscall32_int80);
+#else
+
+       if (use_sysenter > 0) {
+               memcpy(syscall32_page, syscall32_sysenter,
+                      syscall32_sysenter_end - syscall32_sysenter);
+       } else {
+               memcpy(syscall32_page, syscall32_syscall,
+                      syscall32_syscall_end - syscall32_syscall);
+       }       
+#endif
+       return 0;
+} 
+       
+__initcall(init_syscall32); 
+
+/* May not be __init: called during resume */
+void syscall32_cpu_init(void)
+{
+#ifndef USE_INT80
+       if (use_sysenter < 0)
+               use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
+
+       /* Load these always in case some future AMD CPU supports
+          SYSENTER from compat mode too. */
+       checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)(__KERNEL_CS | 3));
+       checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
+       checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+
+       wrmsrl(MSR_CSTAR, ia32_cstar_target);
+#endif
+}
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S 
b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S
--- /dev/null   Wed Dec 31 16:00:00 196900
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S     
2005-06-23 07:05:39 -04:00
@@ -0,0 +1,57 @@
+/*
+ * Code for the vsyscall page.  This version uses the old int $0x80 method.
+ *
+ * NOTE:
+ * 1) __kernel_vsyscall _must_ be first in this page.
+ * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
+ *    for details.
+ */
+#include <asm/ia32_unistd.h>
+#include <asm/offset.h>
+
+       .text
+       .section .text.vsyscall,"ax"
+       .globl __kernel_vsyscall
+       .type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+       int $0x80
+       ret
+.LEND_vsyscall:
+       .size __kernel_vsyscall,.-.LSTART_vsyscall
+       .previous
+
+       .section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+       .long .LENDCIE-.LSTARTCIE
+.LSTARTCIE:
+       .long 0                 /* CIE ID */
+       .byte 1                 /* Version number */
+       .string "zR"            /* NUL-terminated augmentation string */
+       .uleb128 1              /* Code alignment factor */
+       .sleb128 -4             /* Data alignment factor */
+       .byte 8                 /* Return address register column */
+       .uleb128 1              /* Augmentation value length */
+       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+       .byte 0x0c              /* DW_CFA_def_cfa */
+       .uleb128 4
+       .uleb128 4
+       .byte 0x88              /* DW_CFA_offset, column 0x8 */
+       .uleb128 1
+       .align 4
+.LENDCIE:
+
+       .long .LENDFDE1-.LSTARTFDE1     /* Length FDE */
+.LSTARTFDE1:
+       .long .LSTARTFDE1-.LSTARTFRAME  /* CIE pointer */
+       .long .LSTART_vsyscall-.        /* PC-relative start address */
+       .long .LEND_vsyscall-.LSTART_vsyscall
+       .uleb128 0                      /* Augmentation length */
+       .align 4
+.LENDFDE1:
+               
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#define SYSCALL_ENTER_KERNEL    int $0x80
+#include "vsyscall-sigreturn.S"
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S 
b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S
--- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S    2005-06-23 
07:05:39 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S    2005-06-23 
07:05:39 -04:00
@@ -50,43 +50,8 @@
 
 #include "irq_vectors.h"
 
-EVENT_MASK      = (CS+4)
-VGCF_IN_SYSCALL = (1<<8)
-        
-/*
- * Copied from arch/xen/i386/kernel/entry.S
- */                        
-/* Offsets into shared_info_t. */                
-#define evtchn_upcall_pending          0
-#define evtchn_upcall_mask             1
-
-#define sizeof_vcpu_shift              3
-
-#ifdef CONFIG_SMP
-#define preempt_disable(reg)   incl threadinfo_preempt_count(reg)
-#define preempt_enable(reg)    decl threadinfo_preempt_count(reg)
-#define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp)                   ; \
-                               movq %gs:pda_cpunumber,reg              ; \
-                               shl  $sizeof_vcpu_shift,reg             ; \
-                               addq HYPERVISOR_shared_info,reg
-#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp)                    ; \
-#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
-#else
-#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg
-#define XEN_PUT_VCPU_INFO(reg)
-#define XEN_PUT_VCPU_INFO_fixup
-#endif
-
-#define XEN_LOCKED_BLOCK_EVENTS(reg)   movb $1,evtchn_upcall_mask(reg)
-#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
-#define XEN_BLOCK_EVENTS(reg)  XEN_GET_VCPU_INFO(reg)                  ; \
-                               XEN_LOCKED_BLOCK_EVENTS(reg)            ; \
-                               XEN_PUT_VCPU_INFO(reg)
-#define XEN_UNBLOCK_EVENTS(reg)        XEN_GET_VCPU_INFO(reg)                  
; \
-                               XEN_LOCKED_UNBLOCK_EVENTS(reg)          ; \
-                               XEN_PUT_VCPU_INFO(reg)
-#define XEN_TEST_PENDING(reg)  testb $0xFF,evtchn_upcall_pending(reg)
-
+#include "xen_entry.S"
+       
        .code64
 
 #ifdef CONFIG_PREEMPT
@@ -172,8 +137,6 @@
         movq %r11,1*8(%rsp)
         movq %rcx,2*8(%rsp)              # we saved %rcx upon exceptions
         movq $\flag,3*8(%rsp)
-        movq $__USER_CS,5*8(%rsp)
-        movq $__USER_DS,8*8(%rsp)
         movq $__HYPERVISOR_switch_to_user,%rax
         syscall
         .endm
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c 
b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
--- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c    2005-06-23 
07:05:39 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c    2005-06-23 
07:05:39 -04:00
@@ -931,6 +931,9 @@
 #endif
         { 19, 0, (__KERNEL_CS|0x3), 0, (unsigned long)simd_coprocessor_error   
  },
         { SYSCALL_VECTOR,  3, (__KERNEL_CS|0x3), 0, (unsigned long)system_call 
  },
+#ifdef CONFIG_IA32_EMULATION
+       { IA32_SYSCALL_VECTOR, 3, (__KERNEL_CS|0x3), 0, (unsigned 
long)ia32_syscall},
+#endif
         {  0, 0,           0, 0,  0                                            
  }
 };
 
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S 
b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
--- /dev/null   Wed Dec 31 16:00:00 196900
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S        
2005-06-23 07:05:39 -04:00
@@ -0,0 +1,38 @@
+/*
+ * Copied from arch/xen/i386/kernel/entry.S
+ */                        
+/* Offsets into shared_info_t. */                
+#define evtchn_upcall_pending          0
+#define evtchn_upcall_mask             1
+
+#define sizeof_vcpu_shift              3
+
+#ifdef CONFIG_SMP
+#define preempt_disable(reg)   incl threadinfo_preempt_count(reg)
+#define preempt_enable(reg)    decl threadinfo_preempt_count(reg)
+#define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp)                   ; \
+                               movq %gs:pda_cpunumber,reg              ; \
+                               shl  $sizeof_vcpu_shift,reg             ; \
+                               addq HYPERVISOR_shared_info,reg
+#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp)                    ; \
+#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
+#else
+#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg
+#define XEN_PUT_VCPU_INFO(reg)
+#define XEN_PUT_VCPU_INFO_fixup
+#endif
+
+#define XEN_LOCKED_BLOCK_EVENTS(reg)   movb $1,evtchn_upcall_mask(reg)
+#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
+#define XEN_BLOCK_EVENTS(reg)  XEN_GET_VCPU_INFO(reg)                  ; \
+                               XEN_LOCKED_BLOCK_EVENTS(reg)            ; \
+                               XEN_PUT_VCPU_INFO(reg)
+#define XEN_UNBLOCK_EVENTS(reg)        XEN_GET_VCPU_INFO(reg)                  
; \
+                               XEN_LOCKED_UNBLOCK_EVENTS(reg)          ; \
+                               XEN_PUT_VCPU_INFO(reg)
+#define XEN_TEST_PENDING(reg)  testb $0xFF,evtchn_upcall_pending(reg)
+
+EVENT_MASK      = (CS+4)
+VGCF_IN_SYSCALL = (1<<8)
+        
+       
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c 
b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c
--- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c        2005-06-23 
07:05:39 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c        2005-06-23 
07:05:39 -04:00
@@ -563,6 +563,7 @@
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
+out_of_memory2:
        if (current->pid == 1) { 
                yield();
                goto again;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.