[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Subject: PAE support



ChangeSet 1.1616, 2005/05/31 16:39:28+01:00, kaf24@xxxxxxxxxxxxxxxxxxxx

        Subject: PAE support
        
        This patch adds initial support for PAE paging to xen.
        This patch does:
        
         * boot Xen itself with PAE paging enabled.
         * add PAE support to the dom0 domain builder.
        
        Some notes on the design and the changes:
        
         * There are two new config options: CONFIG_X86_PAE (boolean,
           same name Linux uses to simply things) and
           CONFIG_PAGING_LEVELS (int, possible values are 2,3,4).  I've
           used #if CONFIG_PAGING_LEVELS for stuff which simply depends
           on the number of paging levels in the code common for
           x86-32/64, and CONFIG_X86_PAE for special PAE quirks or
           i386-only stuff.  I've tried to avoid ifdefs if possible
           though, often I rearranged code to make it work in both
           PAE and non-PAE case instead.
        
         * idle_pg_table:  3rd level is statically initialized, 2nd
           level is contignous in physical and virtual memory, so it can
           be addressed linear (the dom0 builder uses the same trick to
           simplify things a bit btw.).  There are two new symbols:
           idle_pg_table_l3 and idle_pg_table_l2 for the two tables.
           idle_pg_table is aliased to the toplevel page table, i.e.
           idle_pg_table_l3 in PAE mode and idle_pg_table_l2 in non-pae
           mode.  The idle l3 table is actually never ever touched after
           boot, the l2 table is accessed via idle_pg_table_l2 and
           addressed linear in both PAE and non-PAE mode.
        
         * I've added a "intpte_t" type and a PRIpte define, modeled
           after the C99 inttypes.h header, for page table entries.
        
        Signed-off-by: Gerd Knorr <kraxel@xxxxxxxxxxx>



 arch/x86/audit.c                     |    4 
 arch/x86/boot/x86_32.S               |   43 ++++
 arch/x86/dom0_ops.c                  |    2 
 arch/x86/domain.c                    |    8 
 arch/x86/domain_build.c              |   92 +++++++--
 arch/x86/idle0_task.c                |    3 
 arch/x86/mm.c                        |  348 ++++++++++++++++++++++++-----------
 arch/x86/setup.c                     |    2 
 arch/x86/shadow.c                    |   51 +++--
 arch/x86/traps.c                     |    2 
 arch/x86/vmx.c                       |   12 -
 arch/x86/vmx_io.c                    |    6 
 arch/x86/vmx_vmcs.c                  |    4 
 arch/x86/x86_32/domain_page.c        |    2 
 arch/x86/x86_32/mm.c                 |  140 +++++++++-----
 arch/x86/x86_32/traps.c              |   17 +
 arch/x86/x86_64/mm.c                 |    2 
 include/asm-x86/config.h             |   33 ++-
 include/asm-x86/domain.h             |    6 
 include/asm-x86/mm.h                 |   14 -
 include/asm-x86/page.h               |  151 +++++++++++++--
 include/asm-x86/shadow.h             |    6 
 include/asm-x86/smp.h                |    7 
 include/asm-x86/types.h              |    6 
 include/asm-x86/x86_32/page-2level.h |   49 ++++
 include/asm-x86/x86_32/page-3level.h |   56 +++++
 include/asm-x86/x86_32/page.h        |  127 ------------
 include/asm-x86/x86_64/page.h        |  188 ++----------------
 include/public/arch-x86_32.h         |    6 
 29 files changed, 830 insertions(+), 557 deletions(-)


diff -Nru a/xen/arch/x86/audit.c b/xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c      2005-05-31 12:04:00 -04:00
+++ b/xen/arch/x86/audit.c      2005-05-31 12:04:00 -04:00
@@ -408,9 +408,9 @@
 
         for_each_exec_domain(d, ed)
         {
-            if ( pagetable_val(ed->arch.guest_table) )
+            if ( pagetable_get_phys(ed->arch.guest_table) )
                 adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 
1);
-            if ( pagetable_val(ed->arch.shadow_table) )
+            if ( pagetable_get_phys(ed->arch.shadow_table) )
                 adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 
0);
             if ( ed->arch.monitor_shadow_ref )
                 adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
diff -Nru a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        2005-05-31 12:04:00 -04:00
+++ b/xen/arch/x86/boot/x86_32.S        2005-05-31 12:04:00 -04:00
@@ -101,6 +101,22 @@
         xor     %eax,%eax
         rep     stosb
 
+#ifdef CONFIG_X86_PAE
+        /* Initialize low and high mappings of all memory with 2MB pages */
+        mov     $idle_pg_table_l2-__PAGE_OFFSET,%edi
+        mov     $0xe3,%eax                  /* PRESENT+RW+A+D+2MB */
+1:      mov     %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
+        stosl                                /* low mapping */
+        add     $4,%edi
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
+        jne     1b
+1:      stosl   /* low mappings cover as much physmem as possible */
+        add     $4,%edi
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
+        jne     1b
+#else
         /* Initialize low and high mappings of all memory with 4MB pages */
         mov     $idle_pg_table-__PAGE_OFFSET,%edi
         mov     $0xe3,%eax                  /* PRESENT+RW+A+D+4MB */
@@ -113,6 +129,7 @@
         add     $(1<<L2_PAGETABLE_SHIFT),%eax
         cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
         jne     1b
+#endif
         
         /* Initialise IDT with simple error defaults. */
         lea     ignore_int,%edx
@@ -204,10 +221,17 @@
         .quad 0x0000000000000000     /* unused */
         .quad 0x00cf9a000000ffff     /* 0xe008 ring 0 4.00GB code at 0x0 */
         .quad 0x00cf92000000ffff     /* 0xe010 ring 0 4.00GB data at 0x0 */
+#ifdef CONFIG_X86_PAE
+        .quad 0x00cfba00000067ff
+        .quad 0x00cfb200000067ff
+        .quad 0x00cffa00000067ff
+        .quad 0x00cff200000067ff
+#else
         .quad 0x00cfba000000c3ff     /* 0xe019 ring 1 3.95GB code at 0x0 */
         .quad 0x00cfb2000000c3ff     /* 0xe021 ring 1 3.95GB data at 0x0 */
         .quad 0x00cffa000000c3ff     /* 0xe02b ring 3 3.95GB code at 0x0 */
         .quad 0x00cff2000000c3ff     /* 0xe033 ring 3 3.95GB data at 0x0 */
+#endif
         .quad 0x0000000000000000     /* unused                           */
         .fill 2*NR_CPUS,8,0          /* space for TSS and LDT per CPU    */
 
@@ -215,10 +239,27 @@
 /* Maximum STACK_ORDER for x86/32 is 1. We must therefore ensure that the */
 /* CPU0 stack is aligned on an even page boundary!                        */
 ENTRY(cpu0_stack)
-
         .org 0x2000 + STACK_SIZE
+
+#ifdef CONFIG_X86_PAE
+
 ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l3)
+        .quad 0x100000 + 0x2000 + STACK_SIZE + 1*PAGE_SIZE + 0x01
+        .quad 0x100000 + 0x2000 + STACK_SIZE + 2*PAGE_SIZE + 0x01
+        .quad 0x100000 + 0x2000 + STACK_SIZE + 3*PAGE_SIZE + 0x01
+        .quad 0x100000 + 0x2000 + STACK_SIZE + 4*PAGE_SIZE + 0x01
+        .org 0x2000 + STACK_SIZE + 1*PAGE_SIZE
+ENTRY(idle_pg_table_l2)
+        .org 0x2000 + STACK_SIZE + 5*PAGE_SIZE
+
+#else /* CONFIG_X86_PAE */
 
+ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l2) # Initial page directory is 4kB
         .org 0x2000 + STACK_SIZE + PAGE_SIZE
+
+#endif /* CONFIG_X86_PAE */
+
 ENTRY(stext)
 ENTRY(_stext)
diff -Nru a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   2005-05-31 12:04:00 -04:00
+++ b/xen/arch/x86/dom0_ops.c   2005-05-31 12:04:00 -04:00
@@ -405,7 +405,7 @@
         c->flags |= VGCF_VMX_GUEST;
 #endif
 
-    c->pt_base = pagetable_val(ed->arch.guest_table);
+    c->pt_base = pagetable_get_phys(ed->arch.guest_table);
 
     c->vm_assist = ed->domain->vm_assist;
 }
diff -Nru a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     2005-05-31 12:04:01 -04:00
+++ b/xen/arch/x86/domain.c     2005-05-31 12:04:01 -04:00
@@ -460,7 +460,7 @@
         //      trust the VMX domain builder.  Xen should validate this
         //      page table, and/or build the table itself, or ???
         //
-        if ( !pagetable_val(d->arch.phys_table) )
+        if ( !pagetable_get_phys(d->arch.phys_table) )
             d->arch.phys_table = ed->arch.guest_table;
 
         if ( (error = vmx_final_setup_guest(ed, c)) )
@@ -660,7 +660,7 @@
     struct exec_domain    *ed = current;
 
     if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
-         unlikely(pagetable_val(ed->arch.guest_table_user) == 0) )
+         unlikely(pagetable_get_phys(ed->arch.guest_table_user) == 0) )
         return -EFAULT;
 
     toggle_guest_mode(ed);
@@ -978,7 +978,7 @@
     /* Drop the in-use references to page-table bases. */
     for_each_exec_domain ( d, ed )
     {
-        if ( pagetable_val(ed->arch.guest_table) != 0 )
+        if ( pagetable_get_phys(ed->arch.guest_table) != 0 )
         {
             if ( shadow_mode_refcounts(d) )
                 
put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
@@ -988,7 +988,7 @@
             ed->arch.guest_table = mk_pagetable(0);
         }
 
-        if ( pagetable_val(ed->arch.guest_table_user) != 0 )
+        if ( pagetable_get_phys(ed->arch.guest_table_user) != 0 )
         {
             if ( shadow_mode_refcounts(d) )
                 
put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
diff -Nru a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       2005-05-31 12:04:00 -04:00
+++ b/xen/arch/x86/domain_build.c       2005-05-31 12:04:00 -04:00
@@ -44,15 +44,15 @@
 #if defined(__i386__)
 /* No ring-3 access in initial leaf page tables. */
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT)
 #elif defined(__x86_64__)
 /* Allow ring-3 access in long mode as guest cannot use ring 1. */
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#endif
-/* Don't change these: Linux expects just these bits to be set. */
-/* (And that includes the bogus _PAGE_DIRTY!) */
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#endif
 
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
@@ -91,7 +91,11 @@
 #elif defined(__x86_64__)
     char *image_start  = __va(_image_start);
     char *initrd_start = __va(_initrd_start);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
     l4_pgentry_t *l4tab = NULL, *l4start = NULL;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
     l3_pgentry_t *l3tab = NULL, *l3start = NULL;
 #endif
     l2_pgentry_t *l2tab = NULL, *l2start = NULL;
@@ -143,7 +147,7 @@
         panic("Not enough RAM for DOM0 reservation.\n");
     alloc_start = page_to_phys(page);
     alloc_end   = alloc_start + (d->tot_pages << PAGE_SHIFT);
-    
+
     if ( (rc = parseelfimage(&dsi)) != 0 )
         return rc;
 
@@ -172,10 +176,15 @@
         v_end            = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
         if ( (v_end - vstack_end) < (512UL << 10) )
             v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
-#if defined(__i386__)
+#if defined(__i386__) && !defined(CONFIG_X86_PAE)
         if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
                L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
             break;
+#elif defined(__i386__) && defined(CONFIG_X86_PAE)
+        /* 5 pages: 1x 3rd + 4x 2nd level */
+        if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >> 
+               L2_PAGETABLE_SHIFT) + 5) <= nr_pt_pages )
+            break;
 #elif defined(__x86_64__)
 #define NR(_l,_h,_s) \
     (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
@@ -249,6 +258,24 @@
     }
 
     /* WARNING: The new domain must have its 'processor' field filled in! */
+#if CONFIG_PAGING_LEVELS == 3
+    l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
+    memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE);
+    for (i = 0; i < 4; i++) {
+        l3tab[i] = l3e_create_phys((u32)l2tab + i*PAGE_SIZE, L3_PROT);
+        l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
+            l2e_create_phys((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
+    }
+    unsigned long v;
+    for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END;
+         v += (1 << L2_PAGETABLE_SHIFT)) {
+        l2tab[v >> L2_PAGETABLE_SHIFT] =
+            l2e_create_phys(__pa(d->arch.mm_perdomain_pt) + 
(v-PERDOMAIN_VIRT_START),
+                            __PAGE_HYPERVISOR);
+    }
+    ed->arch.guest_table = mk_pagetable((unsigned long)l3start);
+#else
     l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
     memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
     l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
@@ -256,8 +283,9 @@
     l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
         l2e_create_phys(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
     ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
+#endif
 
-    l2tab += l2_table_offset(dsi.v_start);
+    l2tab += l2_linear_offset(dsi.v_start);
     mfn = alloc_start >> PAGE_SHIFT;
     for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
     {
@@ -282,8 +310,8 @@
     }
 
     /* Pages that are part of page tables must be read only. */
-    l2tab = l2start + l2_table_offset(vpt_start);
-    l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*l2tab);
+    l2tab = l2start + l2_linear_offset(vpt_start);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.