[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] non-PAE behavior should be identical after applying these



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID f1a16642edaf5fd3dfd3bb3e92f60ff1ef5e9c6a
# Parent  e6f48ae99035ebfdcb84379e62e8883fa215d676

non-PAE behavior should be identical after applying these
patches, i.e. both dom0 and domU boots work as usual.

In PAE mode dom0 boot works and seems to be stable, running
linux kernel builds with -j12 at the moment ;)

Actually using more than 4GB isn't tested yet, my machine has
only one GB.  Also this needs a patch for the e820 code in
xen, right now xen will not even try to use memory above 4GB.

Signed-off-by: Gerd Knorr <kraxel@xxxxxxx>

diff -r e6f48ae99035 -r f1a16642edaf 
linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Mon Jul 11 
15:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Mon Jul 11 
16:00:46 2005
@@ -47,15 +47,20 @@
 #elif defined(CONFIG_X86_64)
 #define pmd_val_ma(v) (v).pmd
 #else
-#define pmd_val_ma(v) (v).pud.pgd.pgd
+#ifdef CONFIG_X86_PAE
+# define pmd_val_ma(v) ((v).pmd)
+# define pud_val_ma(v) ((v).pgd.pgd)
+#else
+# define pmd_val_ma(v) ((v).pud.pgd.pgd)
+#endif
 #endif
 
 #ifndef CONFIG_XEN_SHADOW_MODE
-void xen_l1_entry_update(pte_t *ptr, unsigned long val)
-{
-    mmu_update_t u;
-    u.ptr = virt_to_machine(ptr);
-    u.val = val;
+void xen_l1_entry_update(pte_t *ptr, pte_t val)
+{
+    mmu_update_t u;
+    u.ptr = virt_to_machine(ptr);
+    u.val = pte_val_ma(val);
     BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
 
@@ -66,6 +71,16 @@
     u.val = pmd_val_ma(val);
     BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
 }
+
+#ifdef CONFIG_X86_PAE
+void xen_l3_entry_update(pud_t *ptr, pud_t val)
+{
+    mmu_update_t u;
+    u.ptr = virt_to_machine(ptr);
+    u.val = pud_val_ma(val);
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+#endif
 
 #ifdef CONFIG_X86_64
 void xen_l3_entry_update(pud_t *ptr, pud_t val)
@@ -171,6 +186,8 @@
     struct mmuext_op op;
 #ifdef CONFIG_X86_64
     op.cmd = MMUEXT_PIN_L4_TABLE;
+#elif defined(CONFIG_X86_PAE)
+    op.cmd = MMUEXT_PIN_L3_TABLE;
 #else
     op.cmd = MMUEXT_PIN_L2_TABLE;
 #endif
diff -r e6f48ae99035 -r f1a16642edaf 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Mon Jul 11 
15:56:50 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Mon Jul 11 
16:00:46 2005
@@ -400,7 +400,7 @@
                        if ( likely((__vma)->vm_mm == current->mm) ) {    \
                            HYPERVISOR_update_va_mapping((__address), 
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned 
long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
                        } else {                                          \
-                            xen_l1_entry_update((__ptep), (__entry).pte_low); \
+                            xen_l1_entry_update((__ptep), (__entry)); \
                            flush_tlb_page((__vma), (__address));         \
                        }                                                 \
                }                                                         \
@@ -419,7 +419,7 @@
                HYPERVISOR_update_va_mapping((__address),               \
                                             __entry, 0);               \
        } else {                                                        \
-               xen_l1_entry_update((__ptep), (__entry).pte_low);       \
+               xen_l1_entry_update((__ptep), (__entry));       \
        }                                                               \
 } while (0)
 
diff -r e6f48ae99035 -r f1a16642edaf 
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Mon Jul 11 15:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Mon Jul 11 16:00:46 2005
@@ -57,9 +57,10 @@
 {
        pud_t *pud;
        pmd_t *pmd_table;
-               
+
 #ifdef CONFIG_X86_PAE
        pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+       make_page_readonly(pmd_table);
        set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
        pud = pud_offset(pgd, 0);
        if (pmd_table != pmd_offset(pud, 0)) 
@@ -115,13 +116,13 @@
        pmd_idx = pmd_index(vaddr);
        pgd = pgd_base + pgd_idx;
 
-       for ( ; (pgd_idx < PTRS_PER_PGD_NO_HV) && (vaddr != end); pgd++, 
pgd_idx++) {
+       for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
                if (pgd_none(*pgd)) 
                        one_md_table_init(pgd);
                pud = pud_offset(pgd, vaddr);
                pmd = pmd_offset(pud, vaddr);
                for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, 
pmd_idx++) {
-                       if (pmd_none(*pmd)) 
+                       if (vaddr < HYPERVISOR_VIRT_START && pmd_none(*pmd)) 
                                one_page_table_init(pmd);
 
                        vaddr += PMD_SIZE;
@@ -160,13 +161,26 @@
        pmd_idx = pmd_index(PAGE_OFFSET);
        pte_ofs = pte_index(PAGE_OFFSET);
 
-       for (; pgd_idx < PTRS_PER_PGD_NO_HV; pgd++, pgd_idx++) {
+       for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
+#ifdef CONFIG_XEN
+               /*
+                * Native linux hasn't PAE-paging enabled yet at this
+                * point.  When running as xen domain we are in PAE
+                * mode already, thus we can't simply hook a empty
+                * pmd.  That would kill the mappings we are currently
+                * using ...
+                */
+               pmd = pmd_offset(pud_offset(pgd, PAGE_OFFSET), PAGE_OFFSET);
+#else
                pmd = one_md_table_init(pgd);
+#endif
                if (pfn >= max_low_pfn)
                        continue;
                pmd += pmd_idx;
                for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, 
pmd_idx++) {
                        unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
+                       if (address >= HYPERVISOR_VIRT_START)
+                               continue;
 
                        /* Map with big pages if possible, otherwise create 
normal page tables. */
                        if (cpu_has_pse) {
@@ -350,6 +364,7 @@
         * page directory, write-protect the new page directory, then switch to
         * it. We clean up by write-enabling and then freeing the old page dir.
         */
+#ifndef CONFIG_X86_PAE
        memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
        make_page_readonly(pgd_base);
        xen_pgd_pin(__pa(pgd_base));
@@ -358,8 +373,31 @@
        make_page_writable(old_pgd);
        __flush_tlb_all();
        free_bootmem(__pa(old_pgd), PAGE_SIZE);
+#else
+       {
+               pud_t *old_pud = pud_offset(old_pgd+3, PAGE_OFFSET);
+               pmd_t *old_pmd = pmd_offset(old_pud, PAGE_OFFSET);
+               pmd_t *new_pmd = alloc_bootmem_low_pages(PAGE_SIZE);
+
+               memcpy(new_pmd,  old_pmd, PAGE_SIZE);
+               memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
+               set_pgd(&pgd_base[3], __pgd(__pa(new_pmd) | _PAGE_PRESENT));
+
+               make_page_readonly(new_pmd);
+               make_page_readonly(pgd_base);
+               xen_pgd_pin(__pa(pgd_base));
+               load_cr3(pgd_base);
+               xen_pgd_unpin(__pa(old_pgd));
+               make_page_writable(old_pgd);
+               make_page_writable(old_pmd);
+               __flush_tlb_all();
+
+               free_bootmem(__pa(old_pgd), PAGE_SIZE);
+               free_bootmem(__pa(old_pmd), PAGE_SIZE);
+       }
+#endif
+
        init_mm.context.pinned = 1;
-
        kernel_physical_mapping_init(pgd_base);
        remap_numa_kva();
 
@@ -372,7 +410,7 @@
 
        permanent_kmaps_init(pgd_base);
 
-#ifdef CONFIG_X86_PAE
+#if 0 /* def CONFIG_X86_PAE */
        /*
         * Add low memory identity-mappings - SMP needs it when
         * starting up on an AP from real-mode. In the non-PAE
@@ -380,7 +418,7 @@
         * All user-space mappings are explicitly cleared after
         * SMP startup.
         */
-       pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+       set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
 #endif
 }
 
@@ -415,7 +453,7 @@
         * us, because pgd_clear() is a no-op on i386.
         */
        for (i = 0; i < USER_PTRS_PER_PGD; i++)
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
                set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
 #else
                set_pgd(swapper_pg_dir+i, __pgd(0));
@@ -514,10 +552,12 @@
 
        pagetable_init();
 
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
        /*
         * We will bail out later - printk doesn't work right now so
         * the user would just see a hanging kernel.
+        * when running as xen domain we are already in PAE mode at
+        * this point.
         */
        if (cpu_has_pae)
                set_in_cr4(X86_CR4_PAE);
@@ -690,8 +730,13 @@
                        panic("pgtable_cache_init(): cannot create pmd cache");
        }
        pgd_cache = kmem_cache_create("pgd",
+#if 0 /* How the heck _this_ works in native linux ??? */
                                PTRS_PER_PGD*sizeof(pgd_t),
                                PTRS_PER_PGD*sizeof(pgd_t),
+#else
+                               PAGE_SIZE,
+                               PAGE_SIZE,
+#endif
                                0,
                                pgd_ctor,
                                pgd_dtor);
diff -r e6f48ae99035 -r f1a16642edaf 
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Mon Jul 11 15:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Mon Jul 11 16:00:46 2005
@@ -364,6 +364,7 @@
                if (!HAVE_SHARED_KERNEL_PMD) {
                        pmd_t *pmd = (void 
*)__va(pgd_val(pgd[USER_PTRS_PER_PGD])-1);
                        make_page_writable(pmd);
+                       memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
                        kmem_cache_free(pmd_cache, pmd);
                }
        }
diff -r e6f48ae99035 -r f1a16642edaf 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Mon Jul 11 
15:56:50 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Mon Jul 11 
16:00:46 2005
@@ -15,6 +15,8 @@
 #include <linux/config.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/bug.h>
 #include <asm-xen/xen-public/xen.h>
 #include <asm-xen/foreign_page.h>
 
@@ -84,9 +86,40 @@
 typedef struct { unsigned long long pmd; } pmd_t;
 typedef struct { unsigned long long pgd; } pgd_t;
 typedef struct { unsigned long long pgprot; } pgprot_t;
-#define pmd_val(x)     ((x).pmd)
-#define pte_val(x)     ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
-#define __pmd(x) ((pmd_t) { (x) } )
+#define __pte(x) ({ unsigned long long _x = (x); \
+    (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pgd(x) ({ unsigned long long _x = (x); \
+    (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
+#define __pmd(x) ({ unsigned long long _x = (x); \
+    (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); })
+static inline unsigned long long pte_val(pte_t x)
+{
+       unsigned long long ret;
+
+       if (x.pte_low) {
+               ret = x.pte_low | (unsigned long long)x.pte_high << 32;
+               ret = machine_to_phys(ret) | 1;
+       } else {
+               ret = 0;
+       }
+       return ret;
+}
+static inline unsigned long long pmd_val(pmd_t x)
+{
+       unsigned long long ret = x.pmd;
+       if (ret) ret = machine_to_phys(ret) | 1;
+       return ret;
+}
+static inline unsigned long long pgd_val(pgd_t x)
+{
+       unsigned long long ret = x.pgd;
+       if (ret) ret = machine_to_phys(ret) | 1;
+       return ret;
+}
+static inline unsigned long long pte_val_ma(pte_t x)
+{
+       return (unsigned long long)x.pte_high << 32 | x.pte_low;
+}
 #define HPAGE_SHIFT    21
 #else
 typedef struct { unsigned long pte_low; } pte_t;
@@ -96,6 +129,16 @@
 #define pte_val(x)     (((x).pte_low & 1) ? machine_to_phys((x).pte_low) : \
                         (x).pte_low)
 #define pte_val_ma(x)  ((x).pte_low)
+#define __pte(x) ({ unsigned long _x = (x); \
+    (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pgd(x) ({ unsigned long _x = (x); \
+    (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
+static inline unsigned long pgd_val(pgd_t x)
+{
+       unsigned long ret = x.pgd;
+       if (ret) ret = machine_to_phys(ret) | 1;
+       return ret;
+}
 #define HPAGE_SHIFT    22
 #endif
 #define PTE_MASK       PAGE_MASK
@@ -107,20 +150,9 @@
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 #endif
 
-
-static inline unsigned long pgd_val(pgd_t x)
-{
-       unsigned long ret = x.pgd;
-       if (ret) ret = machine_to_phys(ret);
-       return ret;
-}
 #define pgprot_val(x)  ((x).pgprot)
 
-#define __pte(x) ({ unsigned long _x = (x); \
-    (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
 #define __pte_ma(x)    ((pte_t) { (x) } )
-#define __pgd(x) ({ unsigned long _x = (x); \
-    (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
 #define __pgprot(x)    ((pgprot_t) { (x) } )
 
 #endif /* !__ASSEMBLY__ */
diff -r e6f48ae99035 -r f1a16642edaf 
linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Mon Jul 11 15:56:50 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Mon Jul 11 16:00:46 2005
@@ -40,9 +40,13 @@
 #include <asm/ptrace.h>
 #include <asm/page.h>
 #if defined(__i386__)
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <asm-generic/pgtable-nopmd.h>
-#endif
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#  ifdef CONFIG_X86_PAE
+#   include <asm-generic/pgtable-nopud.h>
+#  else
+#   include <asm-generic/pgtable-nopmd.h>
+#  endif
+# endif
 #endif
 
 /* arch/xen/i386/kernel/setup.c */
@@ -80,11 +84,9 @@
 void xen_invlpg(unsigned long ptr);
 
 #ifndef CONFIG_XEN_SHADOW_MODE
-void xen_l1_entry_update(pte_t *ptr, unsigned long val);
+void xen_l1_entry_update(pte_t *ptr, pte_t val);
 void xen_l2_entry_update(pmd_t *ptr, pmd_t val);
-#ifdef __x86_64__
-void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64 only */
-#endif
+void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */
 void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */
 void xen_pgd_pin(unsigned long ptr);
 void xen_pgd_unpin(unsigned long ptr);
diff -r e6f48ae99035 -r f1a16642edaf linux-2.6-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Mon Jul 11 15:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Mon Jul 11 16:00:46 2005
@@ -581,11 +581,11 @@
          Select this if you have a 32-bit processor and between 1 and 4
          gigabytes of physical RAM.
 
-#config HIGHMEM64G
-#      bool "64GB"
-#      help
-#        Select this if you have a 32-bit processor and more than 4
-#        gigabytes of physical RAM.
+config HIGHMEM64G
+       bool "64GB"
+       help
+         Select this if you have a 32-bit processor and more than 4
+         gigabytes of physical RAM.
 
 endchoice
 
diff -r e6f48ae99035 -r f1a16642edaf 
linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S  Mon Jul 11 15:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S  Mon Jul 11 16:00:46 2005
@@ -5,6 +5,11 @@
        .ascii  "GUEST_OS=linux,GUEST_VER=2.6"
        .ascii  ",XEN_VER=3.0"
        .ascii  ",VIRT_BASE=0xC0000000"
+#ifdef CONFIG_X86_PAE
+       .ascii  ",PAE=yes"
+#else
+       .ascii  ",PAE=no"
+#endif
        .ascii  ",LOADER=generic"
        .byte   0
 
@@ -145,10 +150,17 @@
        .quad 0x0000000000000000        /* 0x53 reserved */
        .quad 0x0000000000000000        /* 0x5b reserved */
 
+#ifdef CONFIG_X86_PAE
+       .quad 0x00cfbb00000067ff        /* 0x60 kernel 4GB code at 0x00000000 */
+       .quad 0x00cfb300000067ff        /* 0x68 kernel 4GB data at 0x00000000 */
+       .quad 0x00cffb00000067ff        /* 0x73 user 4GB code at 0x00000000 */
+       .quad 0x00cff300000067ff        /* 0x7b user 4GB data at 0x00000000 */
+#else
        .quad 0x00cfbb000000c3ff        /* 0x60 kernel 4GB code at 0x00000000 */
        .quad 0x00cfb3000000c3ff        /* 0x68 kernel 4GB data at 0x00000000 */
        .quad 0x00cffb000000c3ff        /* 0x73 user 4GB code at 0x00000000 */
        .quad 0x00cff3000000c3ff        /* 0x7b user 4GB data at 0x00000000 */
+#endif
 
        .quad 0x0000000000000000        /* 0x80 TSS descriptor */
        .quad 0x0000000000000000        /* 0x88 LDT descriptor */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.