[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] The 1:1 page table should be a 3 level PAE page table on x86-64



The 1:1 page table should be a 3 level PAE page table on x86-64 

This is needed to support > 4GB machine physical addresses.

Signed-off-by: Chengyuan Li <chengyuan.li@xxxxxxxxx>
Signed-off-by: Arun Sharma <arun.sharma@xxxxxxxxx>

--- a/tools/libxc/xc_vmx_build.c        Mon Jul 11 05:02:12 2005
+++ b/tools/libxc/xc_vmx_build.c        Mon Jul 11 05:04:22 2005
@@ -13,6 +13,9 @@
 
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#ifdef __x86_64__
+#define L3_PROT (_PAGE_PRESENT)
+#endif
 
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
@@ -91,6 +94,7 @@
     mem_mapp->nr_map = nr_map;
 }
 
+#ifdef __i386__
 static int zap_mmio_range(int xc_handle, u32 dom,
                             l2_pgentry_32_t *vl2tab,
                             unsigned long mmio_range_start,
@@ -138,6 +142,65 @@
     munmap(vl2tab, PAGE_SIZE);
     return 0;
 }
+#else
+static int zap_mmio_range(int xc_handle, u32 dom,
+                           l3_pgentry_t *vl3tab,
+                           unsigned long mmio_range_start,
+                           unsigned long mmio_range_size)
+{
+   unsigned long mmio_addr;
+   unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
+   unsigned long vl2e = 0;
+   unsigned long vl3e;
+   l1_pgentry_t *vl1tab;
+   l2_pgentry_t *vl2tab;
+ 
+   mmio_addr = mmio_range_start & PAGE_MASK;
+   for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
+       vl3e = vl3tab[l3_table_offset(mmio_addr)];
+       if (vl3e == 0)
+           continue;
+       vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+               PROT_READ|PROT_WRITE, vl3e >> PAGE_SHIFT);
+       if (vl2tab == 0) {
+           PERROR("Failed zap MMIO range");
+           return -1;
+       }
+       vl2e = vl2tab[l2_table_offset(mmio_addr)];
+       if (vl2e == 0)
+           continue;
+       vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+               PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
+
+       vl1tab[l1_table_offset(mmio_addr)] = 0;
+       munmap(vl2tab, PAGE_SIZE);
+       munmap(vl1tab, PAGE_SIZE);
+   }
+   return 0;
+}
+
+static int zap_mmio_ranges(int xc_handle, u32 dom,
+                           unsigned long l3tab,
+                           struct mem_map *mem_mapp)
+{
+   int i;
+   l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                               PROT_READ|PROT_WRITE,
+                                               l3tab >> PAGE_SHIFT);
+   if (vl3tab == 0)
+       return -1;
+   for (i = 0; i < mem_mapp->nr_map; i++) {
+       if ((mem_mapp->map[i].type == E820_IO)
+         && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
+           if (zap_mmio_range(xc_handle, dom, vl3tab,
+                       mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
+               return -1;
+   }
+   munmap(vl3tab, PAGE_SIZE);
+   return 0;
+}
+
+#endif
 
 static int setup_guest(int xc_handle,
                          u32 dom, int memsize,
@@ -151,9 +214,13 @@
                          unsigned long flags,
                          struct mem_map * mem_mapp)
 {
-    l1_pgentry_32_t *vl1tab=NULL, *vl1e=NULL;
-    l2_pgentry_32_t *vl2tab=NULL, *vl2e=NULL;
+    l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
+    l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
     unsigned long *page_array = NULL;
+#ifdef __x86_64__
+    l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
+    unsigned long l3tab;
+#endif
     unsigned long l2tab;
     unsigned long l1tab;
     unsigned long count, i;
@@ -212,7 +279,11 @@
     if(initrd_len == 0)
         vinitrd_start = vinitrd_end = 0;
 
+#ifdef __i386__
     nr_pt_pages = 1 + ((memsize + 3) >> 2);
+#else
+    nr_pt_pages = 5 + ((memsize + 1) >> 1);
+#endif
     vpt_start   = v_end;
     vpt_end     = vpt_start + (nr_pt_pages * PAGE_SIZE);
 
@@ -274,6 +345,7 @@
     if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
         goto error_out;
 
+#ifdef __i386__
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
     l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
@@ -310,7 +382,64 @@
     }
     munmap(vl1tab, PAGE_SIZE);
     munmap(vl2tab, PAGE_SIZE);
-
+#else
+    /* First allocate pdpt */
+    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
+    /* here l3tab means pdpt, only 4 entry is used */
+    l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+    ctxt->ctrlreg[3] = l3tab;
+
+    /* Initialise the page tables. */
+    if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 
+                                        PROT_READ|PROT_WRITE, 
+                                        l3tab >> PAGE_SHIFT)) == NULL )
+        goto error_out;
+    memset(vl3tab, 0, PAGE_SIZE);
+
+    vl3e = &vl3tab[l3_table_offset(dsi.v_start)];
+
+    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+    {
+        if (!(count % (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){
+            l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+
+            if (vl2tab != NULL)
+                munmap(vl2tab, PAGE_SIZE);
+
+            if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                      PROT_READ|PROT_WRITE,
+                      l2tab >> PAGE_SHIFT)) == NULL )
+                goto error_out;
+
+            memset(vl2tab, 0, PAGE_SIZE);
+            *vl3e++ = l2tab | L3_PROT;
+            vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count << 
PAGE_SHIFT))];
+        }
+        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
+        {
+            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+            if ( vl1tab != NULL )
+                munmap(vl1tab, PAGE_SIZE);
+            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                      PROT_READ|PROT_WRITE,
+                      l1tab >> PAGE_SHIFT)) == NULL )
+            {
+                munmap(vl2tab, PAGE_SIZE);
+                goto error_out;
+            }
+            memset(vl1tab, 0, PAGE_SIZE);
+            vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
+            *vl2e++ = l1tab | L2_PROT;
+        }
+
+        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+        vl1e++;
+    }
+
+    munmap(vl1tab, PAGE_SIZE);
+    munmap(vl2tab, PAGE_SIZE);
+    munmap(vl3tab, PAGE_SIZE);
+#endif
     /* Write the machine->phys table entries. */
     for ( count = 0; count < nr_pages; count++ )
     {
@@ -325,6 +454,7 @@
                xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
                page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
         goto error_out;
+
     memset(boot_paramsp, 0, sizeof(*boot_paramsp));
 
     strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800);
@@ -381,7 +511,11 @@
 
     /* memsize is in megabytes */
     build_e820map(mem_mapp, memsize << 20);
+#if defined (__i386__)
     if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
+#else
+    if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
+#endif
        goto error_out;
     boot_paramsp->e820_map_nr = mem_mapp->nr_map;
     for (i=0; i<mem_mapp->nr_map; i++) {
diff -r 036c6e463f67 -r 51dd38b2b917 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Mon Jul 11 05:02:12 2005
+++ b/xen/arch/x86/vmx.c        Mon Jul 11 05:04:22 2005
@@ -801,7 +801,11 @@
 skip_cr3:
 
     error |= __vmread(CR4_READ_SHADOW, &old_cr4);
+#if defined (__i386__)
     error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE));
+#else
+    error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE | X86_CR4_PAE));
+#endif
     error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
 
     error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
@@ -860,7 +864,7 @@
 {
     struct vmx_assist_context c;
     u32 magic;
-    unsigned long cp;
+    u32 cp;
 
     /* make sure vmxassist exists (this is not an error) */
     if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN))
@@ -1191,7 +1195,7 @@
 
         __vmread(CR4_READ_SHADOW, &old_cr);
         if (pae_disabled)
-            __vmwrite(GUEST_CR4, ((value & ~X86_CR4_PAE) | X86_CR4_VMXE));
+            __vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
         else
             __vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
 
diff -r 036c6e463f67 -r 51dd38b2b917 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Mon Jul 11 05:02:12 2005
+++ b/xen/arch/x86/vmx_vmcs.c   Mon Jul 11 05:04:22 2005
@@ -122,6 +122,7 @@
     struct e820entry *e820p;
     unsigned long gpfn = 0;
 
+    local_flush_tlb_pge();
     regs->ebx = 0;   /* Linux expects ebx to be 0 for boot proc */
 
     n = regs->ecx;
@@ -311,8 +312,7 @@
     error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
     /* CR3 is set in vmx_final_setup_guest */
 #ifdef __x86_64__
-    error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PAE);
-    printk("construct_init_vmcs_guest: guest CR4 is %lx\n", host_env->cr4 );
+    error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PSE);
 #else
     error |= __vmwrite(GUEST_CR4, host_env->cr4);
 #endif
diff -r 036c6e463f67 -r 51dd38b2b917 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Mon Jul 11 05:02:12 2005
+++ b/tools/python/xen/xend/image.py    Mon Jul 11 05:04:22 2005
@@ -351,6 +351,8 @@
         @param mem_mb: size in MB
         @return size in KB
         """
-        # Logic x86-32 specific. 
         # 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
-        return (1 + ((mem_mb + 3) >> 2)) * 4
+        if os.uname()[4] == 'x86_64':
+            return (5 + ((mem_mb + 1) >> 1)) * 4
+        else:
+            return (1 + ((mem_mb + 3) >> 2)) * 4
diff -r 036c6e463f67 -r 51dd38b2b917 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Mon Jul 11 05:02:12 2005
+++ b/xen/include/asm-x86/mm.h  Mon Jul 11 05:04:22 2005
@@ -349,4 +349,7 @@
                             l1_pgentry_t _nl1e, 
                             struct domain *d,
                             struct vcpu *v);
+
+void alloc_monitor_pagetable(struct vcpu *v);
+
 #endif /* __ASM_X86_MM_H__ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.