[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Linux: upgrade to 2.6.16.13.



# HG changeset patch
# User cl349@xxxxxxxxxxxxxxxxxxxx
# Node ID 44e5abbf333b8d393423d99f89d2191dba022659
# Parent  d36ac8bf715ebd874f963eee9819edcff409dfc6
Linux: upgrade to 2.6.16.13.

Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
---
 buildconfigs/mk.linux-2.6-xen                                       |    2 
 linux-2.6-xen-sparse/arch/i386/kernel/vm86.c                        |   12 +++
 linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile                      |    6 -
 linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S                 |   28 
+++-----
 linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c               |    4 +
 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c                 |    4 +
 linux-2.6-xen-sparse/drivers/char/tty_io.c                          |    8 +-
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h |    3 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h |   20 
++++++
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h        |   13 ---
 linux-2.6-xen-sparse/include/linux/mm.h                             |    5 -
 linux-2.6-xen-sparse/mm/page_alloc.c                                |   33 
+++++-----
 linux-2.6-xen-sparse/net/core/dev.c                                 |    2 
 13 files changed, 84 insertions(+), 56 deletions(-)

diff -r d36ac8bf715e -r 44e5abbf333b buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen     Thu May 04 16:49:58 2006 +0100
+++ b/buildconfigs/mk.linux-2.6-xen     Thu May 04 17:38:25 2006 +0100
@@ -1,5 +1,5 @@ LINUX_SERIES = 2.6
 LINUX_SERIES = 2.6
-LINUX_VER    = 2.6.16
+LINUX_VER    = 2.6.16.13
 
 EXTRAVERSION ?= xen
 
diff -r d36ac8bf715e -r 44e5abbf333b 
linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c      Thu May 04 16:49:58 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c      Thu May 04 17:38:25 
2006 +0100
@@ -43,6 +43,7 @@
 #include <linux/smp_lock.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
+#include <linux/audit.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -258,6 +259,7 @@ static void do_sys_vm86(struct kernel_vm
 #ifndef CONFIG_X86_NO_TSS
        struct tss_struct *tss;
 #endif
+       long eax;
 /*
  * make sure the vm86() system call doesn't try to do anything silly
  */
@@ -313,13 +315,19 @@ static void do_sys_vm86(struct kernel_vm
        tsk->thread.screen_bitmap = info->screen_bitmap;
        if (info->flags & VM86_SCREEN_BITMAP)
                mark_screen_rdonly(tsk->mm);
+       __asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl 
%eax,%gs\n\t");
+       __asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax));
+
+       /*call audit_syscall_exit since we do not exit via the normal paths */
+       if (unlikely(current->audit_context))
+               audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
+
        __asm__ __volatile__(
-               "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
                "movl %0,%%esp\n\t"
                "movl %1,%%ebp\n\t"
                "jmp resume_userspace"
                : /* no outputs */
-               :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax");
+               :"r" (&info->regs), "r" (task_thread_info(tsk)));
        /* we never return here */
 }
 
diff -r d36ac8bf715e -r 44e5abbf333b 
linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile
--- a/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile    Thu May 04 16:49:58 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile    Thu May 04 17:38:25 
2006 +0100
@@ -28,11 +28,11 @@ quiet_cmd_syscall = SYSCALL $@
 $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
        $(call if_changed,syscall)
 
-AFLAGS_vsyscall-sysenter.o = -m32 -Iarch/i386/kernel
-AFLAGS_vsyscall-syscall.o = -m32 -Iarch/i386/kernel
+AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -Iarch/i386/kernel
+AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 -Iarch/i386/kernel
 
 ifdef CONFIG_XEN
-AFLAGS_vsyscall-int80.o = -m32 -Iarch/i386/kernel
+AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel
 CFLAGS_syscall32-xen.o += -DUSE_INT80
 AFLAGS_syscall32_syscall-xen.o += -DUSE_INT80
 
diff -r d36ac8bf715e -r 44e5abbf333b 
linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S       Thu May 04 
16:49:58 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S       Thu May 04 
17:38:25 2006 +0100
@@ -221,6 +221,10 @@ rff_trace:
  *
  * XXX if we had a free scratch register we could save the RSP into the stack 
frame
  *      and report it properly in ps. Unfortunately we haven't.
+ *
+ * When user can change the frames always force IRET. That is because
+ * it deals with uncanonical addresses better. SYSRET has trouble
+ * with them due to bugs in both AMD and Intel CPUs.
  */                                    
 
 ENTRY(system_call)
@@ -289,7 +293,10 @@ sysret_signal:
        xorl %esi,%esi # oldset -> arg2
        call ptregscall_common
 1:     movl $_TIF_NEED_RESCHED,%edi
-       jmp sysret_check
+       /* Use IRET because user could have changed frame. This
+          works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
+       cli
+       jmp int_with_check
        
 badsys:
        movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -315,7 +322,8 @@ 1:  SAVE_REST
        call syscall_trace_leave
        RESTORE_TOP_OF_STACK %rbx
        RESTORE_REST
-       jmp ret_from_sys_call
+       /* Use IRET because user could have changed frame */
+       jmp int_ret_from_sys_call
        CFI_ENDPROC
                
 /* 
@@ -449,25 +457,9 @@ ENTRY(stub_execve)
        CFI_ADJUST_CFA_OFFSET -8
        CFI_REGISTER rip, r11
        SAVE_REST
-       movq %r11, %r15
-       CFI_REGISTER rip, r15
        FIXUP_TOP_OF_STACK %r11
        call sys_execve
-       GET_THREAD_INFO(%rcx)
-       bt $TIF_IA32,threadinfo_flags(%rcx)
-       CFI_REMEMBER_STATE
-       jc exec_32bit
        RESTORE_TOP_OF_STACK %r11
-       movq %r15, %r11
-       CFI_REGISTER rip, r11
-       RESTORE_REST
-       pushq %r11
-       CFI_ADJUST_CFA_OFFSET 8
-       CFI_REL_OFFSET rip, 0
-       ret
-
-exec_32bit:
-       CFI_RESTORE_STATE
        movq %rax,RAX(%rsp)
        RESTORE_REST
        jmp int_ret_from_sys_call
diff -r d36ac8bf715e -r 44e5abbf333b 
linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c     Thu May 04 
16:49:58 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c     Thu May 04 
17:38:25 2006 +0100
@@ -484,6 +484,10 @@ __switch_to(struct task_struct *prev_p, 
         * This is basically '__unlazy_fpu', except that we queue a
         * multicall to indicate FPU task switch, rather than
         * synchronously trapping to Xen.
+        * This must be here to ensure both math_state_restore() and
+        * kernel_fpu_begin() work consistently.
+        * The AMD workaround requires it to be after DS reload, or
+        * after DS has been cleared, which we do in __prepare_arch_switch.
         */
        if (prev_p->thread_info->status & TS_USEDFPU) {
                __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
diff -r d36ac8bf715e -r 44e5abbf333b 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Thu May 04 
16:49:58 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Thu May 04 
17:38:25 2006 +0100
@@ -1157,6 +1157,10 @@ static int __init init_amd(struct cpuinf
        if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 
0x0f58))
                set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
 
+       /* Enable workaround for FXSAVE leak */
+       if (c->x86 >= 6)
+               set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+
        r = get_model_name(c);
        if (!r) { 
                switch (c->x86) { 
diff -r d36ac8bf715e -r 44e5abbf333b linux-2.6-xen-sparse/drivers/char/tty_io.c
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c        Thu May 04 16:49:58 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c        Thu May 04 17:38:25 
2006 +0100
@@ -2708,7 +2708,11 @@ static void __do_SAK(void *arg)
                }
                task_lock(p);
                if (p->files) {
-                       rcu_read_lock();
+                       /*
+                        * We don't take a ref to the file, so we must
+                        * hold ->file_lock instead.
+                        */
+                       spin_lock(&p->files->file_lock);
                        fdt = files_fdtable(p->files);
                        for (i=0; i < fdt->max_fds; i++) {
                                filp = fcheck_files(p->files, i);
@@ -2723,7 +2727,7 @@ static void __do_SAK(void *arg)
                                        break;
                                }
                        }
-                       rcu_read_unlock();
+                       spin_unlock(&p->files->file_lock);
                }
                task_unlock(p);
        } while_each_task_pid(session, PIDTYPE_SID, p);
diff -r d36ac8bf715e -r 44e5abbf333b 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h       
Thu May 04 16:49:58 2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h       
Thu May 04 17:38:25 2006 +0100
@@ -32,6 +32,9 @@
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
 
 #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
+
+#define pte_clear(mm,addr,xp)  do { set_pte_at(mm, addr, xp, __pte(0)); } 
while (0)
+#define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
 
 #define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0))
 #define pte_same(a, b)         ((a).pte_low == (b).pte_low)
diff -r d36ac8bf715e -r 44e5abbf333b 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h       
Thu May 04 16:49:58 2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h       
Thu May 04 17:38:25 2006 +0100
@@ -107,6 +107,26 @@ static inline void pud_clear (pud_t * pu
 #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
                        pmd_index(address))
 
+/*
+ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
+ * entry, so clear the bottom half first and enforce ordering with a compiler
+ * barrier.
+ */
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t 
*ptep)
+{
+       ptep->pte_low = 0;
+       smp_wmb();
+       ptep->pte_high = 0;
+}
+
+static inline void pmd_clear(pmd_t *pmd)
+{
+       u32 *tmp = (u32 *)pmd;
+       *tmp = 0;
+       smp_wmb();
+       *(tmp + 1) = 0;
+}
+
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr, pte_t *ptep)
 {
        pte_t res;
diff -r d36ac8bf715e -r 44e5abbf333b 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h      Thu May 
04 16:49:58 2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h      Thu May 
04 17:38:25 2006 +0100
@@ -205,14 +205,12 @@ extern unsigned long pg0[];
 extern unsigned long pg0[];
 
 #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp)  do { set_pte_at(mm, addr, xp, __pte(0)); } 
while (0)
 
 /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
 #define pmd_none(x)    (!(unsigned long)pmd_val(x))
 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
    can temporarily clear it. */
 #define pmd_present(x) (pmd_val(x))
-#define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
 #define pmd_bad(x)     ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & 
~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
 
 
@@ -272,16 +270,7 @@ static inline pte_t ptep_get_and_clear_f
        pte_t pte;
        if (full) {
                pte = *ptep;
-#ifdef CONFIG_X86_PAE
-               /* Cannot do this in a single step, as the compiler may
-                  issue the two stores in either order, but the hypervisor
-                  must not see the high part before the low one. */
-               ptep->pte_low = 0;
-               barrier();
-               ptep->pte_high = 0;
-#else
-               *ptep = __pte(0);
-#endif
+               pte_clear(mm, addr, ptep);
        } else {
                pte = ptep_get_and_clear(mm, addr, ptep);
        }
diff -r d36ac8bf715e -r 44e5abbf333b linux-2.6-xen-sparse/include/linux/mm.h
--- a/linux-2.6-xen-sparse/include/linux/mm.h   Thu May 04 16:49:58 2006 +0100
+++ b/linux-2.6-xen-sparse/include/linux/mm.h   Thu May 04 17:38:25 2006 +0100
@@ -232,10 +232,9 @@ struct page {
                unsigned long private;          /* Mapping-private opaque data:
                                                 * usually used for buffer_heads
                                                 * if PagePrivate set; used for
-                                                * swp_entry_t if PageSwapCache.
-                                                * When page is free, this
+                                                * swp_entry_t if PageSwapCache;
                                                 * indicates order in the buddy
-                                                * system.
+                                                * system if PG_buddy is set.
                                                 */
                struct address_space *mapping;  /* If low bit clear, points to
                                                 * inode address_space, or NULL.
diff -r d36ac8bf715e -r 44e5abbf333b linux-2.6-xen-sparse/mm/page_alloc.c
--- a/linux-2.6-xen-sparse/mm/page_alloc.c      Thu May 04 16:49:58 2006 +0100
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c      Thu May 04 17:38:25 2006 +0100
@@ -153,7 +153,8 @@ static void bad_page(struct page *page)
                        1 << PG_reclaim |
                        1 << PG_slab    |
                        1 << PG_swapcache |
-                       1 << PG_writeback );
+                       1 << PG_writeback |
+                       1 << PG_buddy );
        set_page_count(page, 0);
        reset_page_mapcount(page);
        page->mapping = NULL;
@@ -224,12 +225,12 @@ static inline unsigned long page_order(s
 
 static inline void set_page_order(struct page *page, int order) {
        set_page_private(page, order);
-       __SetPagePrivate(page);
+       __SetPageBuddy(page);
 }
 
 static inline void rmv_page_order(struct page *page)
 {
-       __ClearPagePrivate(page);
+       __ClearPageBuddy(page);
        set_page_private(page, 0);
 }
 
@@ -268,11 +269,13 @@ __find_combined_index(unsigned long page
  * This function checks whether a page is free && is the buddy
  * we can do coalesce a page and its buddy if
  * (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
- *
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
+ *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
+ *
+ * For recording page's order, we use page_private(page).
  */
 static inline int page_is_buddy(struct page *page, int order)
 {
@@ -281,10 +284,10 @@ static inline int page_is_buddy(struct p
                return 0;
 #endif
 
-       if (PagePrivate(page)           &&
-           (page_order(page) == order) &&
-            page_count(page) == 0)
+       if (PageBuddy(page) && page_order(page) == order) {
+               BUG_ON(page_count(page) != 0);
                return 1;
+       }
        return 0;
 }
 
@@ -301,7 +304,7 @@ static inline int page_is_buddy(struct p
  * as necessary, plus some accounting needed to play nicely with other
  * parts of the VM system.
  * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
  * order is recorded in page_private(page) field.
  * So when we are allocating or freeing one, we can derive the state of the
  * other.  That is, if we allocate a small block, and both were   
@@ -364,7 +367,8 @@ static inline int free_pages_check(struc
                        1 << PG_slab    |
                        1 << PG_swapcache |
                        1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                bad_page(page);
        if (PageDirty(page))
                __ClearPageDirty(page);
@@ -523,7 +527,8 @@ static int prep_new_page(struct page *pa
                        1 << PG_slab    |
                        1 << PG_swapcache |
                        1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                bad_page(page);
 
        /*
diff -r d36ac8bf715e -r 44e5abbf333b linux-2.6-xen-sparse/net/core/dev.c
--- a/linux-2.6-xen-sparse/net/core/dev.c       Thu May 04 16:49:58 2006 +0100
+++ b/linux-2.6-xen-sparse/net/core/dev.c       Thu May 04 17:38:25 2006 +0100
@@ -2994,11 +2994,11 @@ void netdev_run_todo(void)
 
                switch(dev->reg_state) {
                case NETREG_REGISTERING:
+                       dev->reg_state = NETREG_REGISTERED;
                        err = netdev_register_sysfs(dev);
                        if (err)
                                printk(KERN_ERR "%s: failed sysfs registration 
(%d)\n",
                                       dev->name, err);
-                       dev->reg_state = NETREG_REGISTERED;
                        break;
 
                case NETREG_UNREGISTERING:

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.