[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Attached is a patch to x86_64 xenlinux. It also includes cleanups. We



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID c4512592a1dc11c76b94d87f14849bcc38871f72
# Parent  69bf77e1b10272ebc40013ae6b8e5b0740df701c
Attached is a patch to x86_64 xenlinux. It also includes cleanups. We
are also working on SMP + writable pagetable support now.
Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>

diff -r 69bf77e1b102 -r c4512592a1dc 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S        Mon Aug  8 
08:18:06 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S        Mon Aug  8 
08:18:38 2005
@@ -28,8 +28,6 @@
 #include <asm/page.h>
 #include <asm/msr.h>
 #include <asm/cache.h>
-/* #include <asm/thread_info.h> */
-        
        
 /* we are not able to switch in one step to the final KERNEL ADRESS SPACE
  * because we need identity-mapped pages on setup so define __START_KERNEL to
@@ -116,15 +114,81 @@
 ENTRY(init_level4_user_pgt)
        .fill   512,8,0
 
+       /*
+        * In Xen the following pre-initialized pgt entries are re-initialized.
+        */
+.org 0x3000
+ENTRY(level3_kernel_pgt)
+       .fill   510,8,0
+       /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
+       .quad   0x0000000000105007              /* -> level2_kernel_pgt */
+       .fill   1,8,0
+
+.org 0x4000
+ENTRY(level2_ident_pgt)
+       /* 40MB for bootup.     */
+       .quad   0x0000000000000283
+       .quad   0x0000000000200183
+       .quad   0x0000000000400183
+       .quad   0x0000000000600183
+       .quad   0x0000000000800183
+       .quad   0x0000000000A00183
+       .quad   0x0000000000C00183
+       .quad   0x0000000000E00183
+       .quad   0x0000000001000183
+       .quad   0x0000000001200183
+       .quad   0x0000000001400183
+       .quad   0x0000000001600183
+       .quad   0x0000000001800183
+       .quad   0x0000000001A00183
+       .quad   0x0000000001C00183
+       .quad   0x0000000001E00183
+       .quad   0x0000000002000183
+       .quad   0x0000000002200183
+       .quad   0x0000000002400183
+       .quad   0x0000000002600183
+       /* Temporary mappings for the super early allocator in 
arch/x86_64/mm/init.c */
+       .globl temp_boot_pmds
+temp_boot_pmds:
+       .fill   492,8,0
+
+.org 0x5000
+ENTRY(level2_kernel_pgt)
+       /* 40MB kernel mapping. The kernel code cannot be bigger than that.
+          When you change this change KERNEL_TEXT_SIZE in page.h too. */
+       /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
+       .quad   0x0000000000000183
+       .quad   0x0000000000200183
+       .quad   0x0000000000400183
+       .quad   0x0000000000600183
+       .quad   0x0000000000800183
+       .quad   0x0000000000A00183
+       .quad   0x0000000000C00183
+       .quad   0x0000000000E00183
+       .quad   0x0000000001000183
+       .quad   0x0000000001200183
+       .quad   0x0000000001400183
+       .quad   0x0000000001600183
+       .quad   0x0000000001800183
+       .quad   0x0000000001A00183
+       .quad   0x0000000001C00183
+       .quad   0x0000000001E00183
+       .quad   0x0000000002000183
+       .quad   0x0000000002200183
+       .quad   0x0000000002400183
+       .quad   0x0000000002600183
+       /* Module mapping starts here */
+       .fill   492,8,0
+       
         /*
          * This is used for vsyscall area mapping as we have a different
          * level4 page table for user.
          */
-.org 0x3000
+.org 0x6000
 ENTRY(level3_user_pgt)
         .fill  512,8,0
 
-.org 0x4000
+.org 0x7000
 ENTRY(cpu_gdt_table)
 /* The TLS descriptors are currently at a different place compared to i386.
    Hopefully nobody expects them at a fixed place (Wine?) */
@@ -147,19 +211,24 @@
        /* GDTs of other CPUs: */       
        .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table)
 
-.org 0x5000
+.org 0x8000
 ENTRY(empty_zero_page)
 
-.org 0x6000
+.org 0x9000
 ENTRY(empty_bad_page)
 
-.org 0x7000
+.org 0xa000
 ENTRY(empty_bad_pte_table)
 
-.org 0x8000
+.org 0xb000
 ENTRY(empty_bad_pmd_table)
 
-       .org 0x9000
+.org 0xc000
+ENTRY(level3_physmem_pgt)
+       .quad   0x0000000000105007              /* -> level2_kernel_pgt (so 
that __va works even before pagetable_init) */
+
+       
+       .org 0xd000
 #ifdef CONFIG_ACPI_SLEEP
 ENTRY(wakeup_level4_pgt)
        .quad   0x0000000000102007              /* -> level3_ident_pgt */
diff -r 69bf77e1b102 -r c4512592a1dc 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Mon Aug  8 
08:18:06 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Mon Aug  8 
08:18:38 2005
@@ -623,7 +623,9 @@
        rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
        rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
 #endif
-/*        register_console(&xen_console); */
+
+       HYPERVISOR_vm_assist(VMASST_CMD_enable,
+                            VMASST_TYPE_writable_pagetables);
 
 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
        /* This is drawn from a dump from vgacon:startup in standard Linux. */
diff -r 69bf77e1b102 -r c4512592a1dc 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c     Mon Aug  8 
08:18:06 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c     Mon Aug  8 
08:18:38 2005
@@ -30,9 +30,9 @@
 #include <asm/proto.h>
 #include <asm/mman.h>
 #include <asm/numa.h>
-
+#ifdef CONFIG_XEN
 #include <asm-xen/hypervisor.h>
-
+#endif
 char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
 
 cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
@@ -123,82 +123,11 @@
        }
 } 
 
-void pda_init(int cpu)
-{ 
-        pgd_t *old_level4 = (pgd_t *)xen_start_info.pt_base;
-       struct x8664_pda *pda = &cpu_pda[cpu];
-
-       /* Setup up data that may be needed in __get_free_pages early */
-       asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); 
-        HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, 
-                                    (unsigned long)(cpu_pda + cpu));
-
-       pda->me = pda;
-       pda->cpunumber = cpu; 
-       pda->irqcount = -1;
-       pda->kernelstack = 
-               (unsigned long)stack_thread_info() - PDA_STACKOFFSET + 
THREAD_SIZE; 
-       pda->active_mm = &init_mm;
-       pda->mmu_state = 0;
-        pda->kernel_mode = 1;
-
-       if (cpu == 0) {
-                memcpy((void *)init_level4_pgt, 
-                       (void *) xen_start_info.pt_base, PAGE_SIZE);
-               /* others are initialized in smpboot.c */
-               pda->pcurrent = &init_task;
-               pda->irqstackptr = boot_cpu_stack; 
-                make_page_readonly(init_level4_pgt);
-                make_page_readonly(init_level4_user_pgt);
-                make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
-                xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
-                xen_pud_pin(__pa_symbol(level3_user_pgt));
-                set_pgd((pgd_t *)(init_level4_user_pgt + 511), 
-                        mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
-       } else {
-               pda->irqstackptr = (char *)
-                       __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
-               if (!pda->irqstackptr)
-                       panic("cannot allocate irqstack for cpu %d", cpu); 
-       }
-
+#ifdef CONFIG_XEN
+static void switch_pt(void)
+{
        xen_pt_switch(__pa(init_level4_pgt));
         xen_new_user_pt(__pa(init_level4_user_pgt));
-
-       if (cpu == 0) {
-                xen_pgd_unpin(__pa(old_level4));
-#if 0
-                early_printk("__pa: %x, <machine_phys> old_level 4 %x\n", 
-                             __pa(xen_start_info.pt_base),
-                             pfn_to_mfn(__pa(old_level4) >> PAGE_SHIFT));
-#endif
-//                make_page_writable(old_level4);
-//                free_bootmem(__pa(old_level4), PAGE_SIZE);
-        }
-
-       pda->irqstackptr += IRQSTACKSIZE-64;
-} 
-
-char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ] 
-__attribute__((section(".bss.page_aligned")));
-
-/* May not be marked __init: used by software suspend */
-void syscall_init(void)
-{
-#ifdef CONFIG_IA32_EMULATION                   
-       syscall32_cpu_init ();
-#endif
-}
-
-void __init check_efer(void)
-{
-       unsigned long efer;
-
-       rdmsrl(MSR_EFER, efer); 
-        if (!(efer & EFER_NX) || do_not_nx) { 
-                __supported_pte_mask &= ~_PAGE_NX; 
-
-        }       
 }
 
 void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
@@ -217,7 +146,96 @@
                                sizeof (struct desc_struct)))
                BUG();
 }
-
+#else
+static void switch_pt(void)
+{
+       asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
+}
+
+void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
+{
+#ifdef CONFIG_SMP
+       int cpu = stack_smp_processor_id();
+#else
+       int cpu = smp_processor_id();
+#endif
+
+       asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
+       asm volatile("lidt %0" :: "m" (idt_descr));
+}
+#endif
+
+
+void pda_init(int cpu)
+{ 
+       struct x8664_pda *pda = &cpu_pda[cpu];
+
+       /* Setup up data that may be needed in __get_free_pages early */
+       asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); 
+#ifndef CONFIG_XEN
+       wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
+#else
+        HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, 
+                                    (unsigned long)(cpu_pda + cpu));
+#endif
+       pda->me = pda;
+       pda->cpunumber = cpu; 
+       pda->irqcount = -1;
+       pda->kernelstack = 
+               (unsigned long)stack_thread_info() - PDA_STACKOFFSET + 
THREAD_SIZE; 
+       pda->active_mm = &init_mm;
+       pda->mmu_state = 0;
+
+       if (cpu == 0) {
+#ifdef CONFIG_XEN
+               xen_init_pt();
+#endif
+               /* others are initialized in smpboot.c */
+               pda->pcurrent = &init_task;
+               pda->irqstackptr = boot_cpu_stack; 
+       } else {
+               pda->irqstackptr = (char *)
+                       __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+               if (!pda->irqstackptr)
+                       panic("cannot allocate irqstack for cpu %d", cpu); 
+       }
+
+       switch_pt();
+       pda->irqstackptr += IRQSTACKSIZE-64;
+} 
+
+char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ] 
+__attribute__((section(".bss.page_aligned")));
+
+/* May not be marked __init: used by software suspend */
+void syscall_init(void)
+{
+#ifndef CONFIG_XEN
+       /* 
+        * LSTAR and STAR live in a bit strange symbiosis.
+        * They both write to the same internal register. STAR allows to set 
CS/DS
+        * but only a 32bit target. LSTAR sets the 64bit rip.    
+        */ 
+       wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32); 
+       wrmsrl(MSR_LSTAR, system_call); 
+
+       /* Flags to clear on syscall */
+       wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); 
+#endif
+#ifdef CONFIG_IA32_EMULATION                   
+       syscall32_cpu_init ();
+#endif
+}
+
+void __init check_efer(void)
+{
+       unsigned long efer;
+
+       rdmsrl(MSR_EFER, efer); 
+        if (!(efer & EFER_NX) || do_not_nx) { 
+                __supported_pte_mask &= ~_PAGE_NX; 
+        }       
+}
 
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
@@ -247,14 +265,13 @@
 
        me = current;
 
-       if (test_and_set_bit(cpu, &cpu_initialized))
+       if (cpu_test_and_set(cpu, cpu_initialized))
                panic("CPU#%d already initialized!\n", cpu);
 
        printk("Initializing CPU#%d\n", cpu);
 
-#if 0
                clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-#endif
+
        /*
         * Initialize the per-CPU GDT with the boot GDT,
         * and set up the GDT descriptor:
@@ -265,18 +282,16 @@
 
        cpu_gdt_descr[cpu].size = GDT_SIZE;
        cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
-#if 0
-       asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
-       asm volatile("lidt %0" :: "m" (idt_descr));
-#endif
+
         cpu_gdt_init(&cpu_gdt_descr[cpu]);
 
-#if 0
+#ifndef CONFIG_XEN 
        memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES 
* 8);
 
-#endif
+#else
        memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
            GDT_ENTRY_TLS_ENTRIES * 8);
+#endif
        
        /*
         * Delete NT
@@ -284,12 +299,12 @@
 
        asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; 
popfq" ::: "eax");
 
-       if (cpu == 0) 
-               early_identify_cpu(&boot_cpu_data);
-
        syscall_init();
 
+       wrmsrl(MSR_FS_BASE, 0);
+       wrmsrl(MSR_KERNEL_GS_BASE, 0);
        barrier(); 
+
        check_efer();
 
        /*
@@ -321,19 +336,22 @@
                BUG();
        enter_lazy_tlb(&init_mm, me);
 
+#ifndef CONFIG_XEN
+       set_tss_desc(cpu, t);
+       load_TR_desc();
+#endif
        load_LDT(&init_mm.context);
 
        /*
         * Clear all 6 debug registers:
         */
-#define CD(register) HYPERVISOR_set_debugreg(register, 0)
-
-       CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
-
-#undef CD
+
+       set_debug(0UL, 0);
+       set_debug(0UL, 1);
+       set_debug(0UL, 2);
+       set_debug(0UL, 3);
+       set_debug(0UL, 6);
+       set_debug(0UL, 7);
+
        fpu_init(); 
-
-#ifdef CONFIG_NUMA
-       numa_add_cpu(cpu);
-#endif
-}
+}
diff -r 69bf77e1b102 -r c4512592a1dc 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Mon Aug  8 08:18:06 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Mon Aug  8 08:18:38 2005
@@ -280,7 +280,7 @@
        if (!pte_none(*pte) &&
            pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
                pte_ERROR(*pte);
-        xen_l1_entry_update(pte, new_pte);
+        set_pte(pte, new_pte);
 
        /*
         * It's enough to flush this one mapping.
@@ -511,6 +511,78 @@
                          round_up(ptes * 8, PAGE_SIZE); 
 }
 
+static void xen_copy_pt(void)
+{
+       unsigned long va = __START_KERNEL_map;
+       unsigned long addr, *pte_page;
+       int i;
+       pud_t *pud; pmd_t *pmd; pte_t *pte;
+       unsigned long *page = (unsigned long *) init_level4_pgt;
+
+       addr = (unsigned long) page[pgd_index(va)];
+       addr_to_page(addr, page);
+
+       pud = (pud_t *) &page[pud_index(va)];
+       addr = page[pud_index(va)];
+       addr_to_page(addr, page);
+
+       level3_kernel_pgt[pud_index(va)] = 
+               __pud(__pa_symbol(level2_kernel_pgt) | _KERNPG_TABLE | 
_PAGE_USER);
+
+       for (;;) {
+               pmd = (pmd_t *) &page[pmd_index(va)];
+               if (pmd_present(*pmd)) {
+                       level2_kernel_pgt[pmd_index(va)] = *pmd;
+                       /*
+                        * if pmd is valid, check pte.
+                        */
+                       addr = page[pmd_index(va)];
+                       addr_to_page(addr, pte_page);
+                       
+                       for (i = 0; i < PTRS_PER_PTE; i++) {
+                               pte = (pte_t *) &pte_page[pte_index(va)];
+                               if (pte_present(*pte))
+                                       va += PAGE_SIZE;
+                               else
+                                   break;
+                       }
+
+               } else
+                   break;
+       }
+
+       init_level4_pgt[pgd_index(__START_KERNEL_map)] = 
+               mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
+}
+
+void __init xen_init_pt(void)
+{
+        pgd_t *old_level4 = (pgd_t *)xen_start_info.pt_base;
+
+       memcpy((void *)init_level4_pgt, 
+              (void *)xen_start_info.pt_base, PAGE_SIZE);
+
+       memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
+       memset((void *)level2_kernel_pgt, 0, PAGE_SIZE);
+
+       xen_copy_pt();
+
+       make_page_readonly(init_level4_pgt);
+       make_page_readonly(level3_kernel_pgt);
+       make_page_readonly(level2_kernel_pgt);
+       make_page_readonly(init_level4_user_pgt);
+       make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
+
+       xen_pgd_pin(__pa_symbol(init_level4_pgt));
+       xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
+       xen_pud_pin(__pa_symbol(level3_kernel_pgt));
+       xen_pud_pin(__pa_symbol(level3_user_pgt));
+       xen_pmd_pin(__pa_symbol(level2_kernel_pgt));
+
+       set_pgd((pgd_t *)(init_level4_user_pgt + 511), 
+               mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
+
+}
 
 /*
  * Extend kernel mapping to access pages for page tables.  The initial
diff -r 69bf77e1b102 -r c4512592a1dc 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Mon Aug  8 
08:18:06 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Mon Aug  8 
08:18:38 2005
@@ -4,31 +4,19 @@
 /*
  * This file contains the functions and defines necessary to modify and use
  * the x86-64 page table tree.
- * 
- * x86-64 has a 4 level table setup. Generic linux MM only supports
- * three levels. The fourth level is currently a single static page that
- * is shared by everybody and just contains a pointer to the current
- * three level page setup on the beginning and some kernel mappings at 
- * the end. For more details see Documentation/x86_64/mm.txt
  */
 #include <asm/processor.h>
 #include <asm/fixmap.h>
 #include <asm/bitops.h>
 #include <linux/threads.h>
 #include <asm/pda.h>
+#ifdef CONFIG_XEN
 #include <asm-xen/hypervisor.h>
+
 extern pud_t level3_user_pgt[512];
-extern pud_t init_level4_pgt[];
 extern pud_t init_level4_user_pgt[];
-extern unsigned long __supported_pte_mask;
-
-#define swapper_pg_dir NULL
-
-extern int nonx_setup(char *str);
-extern void paging_init(void);
-extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
-
-extern unsigned long pgkern_mask;
+
+extern void xen_init_pt(void);
 
 #define virt_to_ptep(__va)                                             \
 ({                                                                     \
@@ -44,6 +32,22 @@
        unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK;     \
        __pa | ((unsigned long)(__va) & (PAGE_SIZE-1));                 \
 })
+#endif
+
+extern pud_t level3_kernel_pgt[512];
+extern pud_t level3_physmem_pgt[512];
+extern pud_t level3_ident_pgt[512];
+extern pmd_t level2_kernel_pgt[512];
+extern pgd_t init_level4_pgt[];
+extern unsigned long __supported_pte_mask;
+
+#define swapper_pg_dir init_level4_pgt
+
+extern int nonx_setup(char *str);
+extern void paging_init(void);
+extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
+
+extern unsigned long pgkern_mask;
 
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
@@ -52,11 +56,14 @@
 extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
 #define PGDIR_SHIFT    39
 #define PTRS_PER_PGD   512
 
 /*
- * PUDIR_SHIFT determines what a top-level page table entry can map
+ * 3rd level page
  */
 #define PUD_SHIFT      30
 #define PTRS_PER_PUD   512
@@ -80,7 +87,7 @@
 #define pud_ERROR(e) \
        printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), 
pud_val(e))
 #define pgd_ERROR(e) \
-        printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), 
pgd_val(e))
+       printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), 
pgd_val(e))
 
 #define pgd_none(x)    (!pgd_val(x))
 #define pud_none(x)    (!pud_val(x))
@@ -90,18 +97,10 @@
 
 extern inline int pud_present(pud_t pud)       { return !pud_none(pud); }
 
-#ifdef CONFIG_SMP
-#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval))
-
-#else
-#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval))
-#if 0
 static inline void set_pte(pte_t *dst, pte_t val)
 {
        *dst = val;
 }
-#endif
-#endif
 
 #define set_pmd(pmdptr, pmdval) xen_l2_entry_update(pmdptr, (pmdval))
 #define set_pud(pudptr, pudval) xen_l3_entry_update(pudptr, (pudval))
@@ -132,6 +131,9 @@
  * each domain will have separate page tables, with their own versions of
  * accessed & dirty state.
  */
+#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte, 0))
+
+#if 0
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr, pte_t *xp)
 {
         pte_t pte = *xp;
@@ -139,21 +141,22 @@
                 set_pte(xp, __pte_ma(0));
         return pte;
 }
+#endif
 
 #define pte_same(a, b)         ((a).pte == (b).pte)
 
-#define PMD_SIZE        (1UL << PMD_SHIFT)
-#define PMD_MASK        (~(PMD_SIZE-1))
-#define PUD_SIZE        (1UL << PUD_SHIFT)
-#define PUD_MASK        (~(PUD_SIZE-1))
-#define PGDIR_SIZE      (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK      (~(PGDIR_SIZE-1))
-
-#define USER_PTRS_PER_PGD       (TASK_SIZE/PGDIR_SIZE)
+#define PMD_SIZE       (1UL << PMD_SHIFT)
+#define PMD_MASK       (~(PMD_SIZE-1))
+#define PUD_SIZE       (1UL << PUD_SHIFT)
+#define PUD_MASK       (~(PUD_SIZE-1))
+#define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK     (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD      (TASK_SIZE/PGDIR_SIZE)
 #define FIRST_USER_ADDRESS     0
 
 #ifndef __ASSEMBLY__
-#define MAXMEM           0x3fffffffffffUL
+#define MAXMEM          0x3fffffffffffUL
 #define VMALLOC_START    0xffffc20000000000UL
 #define VMALLOC_END      0xffffe1ffffffffffUL
 #define MODULES_VADDR    0xffffffff88000000UL
@@ -347,7 +350,7 @@
        pte_t pte = *ptep;
        int ret = pte_dirty(pte);
        if (ret)
-               xen_l1_entry_update(ptep, pte_mkclean(pte));
+               set_pte(ptep, pte_mkclean(pte));
        return ret;
 }
 
@@ -356,7 +359,7 @@
        pte_t pte = *ptep;
        int ret = pte_young(pte);
        if (ret)
-               xen_l1_entry_update(ptep, pte_mkold(pte));
+               set_pte(ptep, pte_mkold(pte));
        return ret;
 }
 
@@ -398,7 +401,7 @@
 
 /* PUD - Level3 access */
 /* to find an entry in a page-table-directory. */
-#define pud_index(address) ((address >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + 
pud_index(address))
 static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
 { 
@@ -413,7 +416,7 @@
 {
        unsigned long addr;
 
-       addr = pud_val(init_level4_pgt[pud_index(address)]);
+       addr = pgd_val(init_level4_pgt[pud_index(address)]);
        addr &= PHYSICAL_PAGE_MASK; /* machine physical */
         addr = machine_to_phys(addr);
        return __pud_offset_k((pud_t *)__va(addr), address);
@@ -427,9 +430,11 @@
 #define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
                                   pmd_index(address))
 #define pmd_none(x)    (!pmd_val(x))
-#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+   can temporarily clear it. */
+#define pmd_present(x) (pmd_val(x))
 #define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
-#define        pmd_bad(x)      ((pmd_val(x) & ~PTE_MASK) != _KERNPG_TABLE )
+#define        pmd_bad(x)      ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT)) 
!= (_KERNPG_TABLE & ~_PAGE_PRESENT))
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
 #define pmd_pfn(x)  ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
 
@@ -479,11 +484,24 @@
  * race with other CPU's that might be updating the dirty
  * bit at the same time. */
 #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#if 0
 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
        do {                                                              \
                if (__dirty) {                                            \
                        set_pte(__ptep, __entry);                         \
                        flush_tlb_page(__vma, __address);                 \
+               }                                                         \
+       } while (0)
+#endif
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+       do {                                                              \
+               if (__dirty) {                                            \
+                       if ( likely((__vma)->vm_mm == current->mm) ) {    \
+                           HYPERVISOR_update_va_mapping((__address), 
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned 
long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
+                       } else {                                          \
+                            xen_l1_entry_update((__ptep), (__entry)); \
+                           flush_tlb_page((__vma), (__address));         \
+                       }                                                 \
                }                                                         \
        } while (0)
 
diff -r 69bf77e1b102 -r c4512592a1dc 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h       Mon Aug 
 8 08:18:06 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h       Mon Aug 
 8 08:18:38 2005
@@ -153,6 +153,20 @@
                } while (0);
        }
 }
+
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+#ifndef CONFIG_XEN
+       mmu_cr4_features &= ~mask;
+       __asm__("movq %%cr4,%%rax\n\t"
+               "andq %0,%%rax\n\t"
+               "movq %%rax,%%cr4\n"
+               : : "irg" (~mask)
+               :"ax");
+#endif
+}
+
 
 #define load_cr3(pgdir) do {                           \
        xen_pt_switch(__pa(pgdir));                     \
@@ -283,9 +297,9 @@
        load_gs_index(0);                                                       
\
        (regs)->rip = (new_rip);                                                
 \
        (regs)->rsp = (new_rsp);                                                
 \
-       write_pda(oldrsp, (new_rsp));                                           
 \
-       (regs)->cs = __USER_CS;                                                 
 \
-       (regs)->ss = __USER_DS;                                                 
 \
+       write_pda(oldrsp, (new_rsp));                                           
 \
+       (regs)->cs = __USER_CS;                                                 
 \
+       (regs)->ss = __USER_DS;                                                 
 \
        (regs)->eflags = 0x200;                                                 
 \
        set_fs(USER_DS);                                                        
 \
 } while(0) 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.