[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [linux-2.6.18-xen] x86-64: do not pass unmanageable amounts of memory to Dom0



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1245317536 -3600
# Node ID baeb818cd2dc72053ed28353efa36b3a7e0e0227
# Parent  cad6f60f050685d8dc824d5af53d23ad62e6faf6
x86-64: do not pass unmanageable amounts of memory to Dom0

Due to address space restrictions it is not possible to successfully
pass more than about 500Gb to a Linux Dom0 unless its kernel specifies
a non-default phys-to-machine map location via XEN_ELFNOTE_INIT_P2M.

For non-Linux Dom0 kernels I can't say whether the limit could be set
to close to 1Tb, but since passing such huge amounts of memory isn't
very useful anyway (and can be enforced via dom0_mem=3D), the patch
doesn't attempt to guess the kernel type and restricts the memory
amount in all cases.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 arch/x86_64/kernel/head-xen.S             |    3 --
 arch/x86_64/kernel/setup-xen.c            |    8 +++--
 arch/x86_64/mm/init-xen.c                 |   41 ++++++++++++++++++++++++------
 include/asm-x86_64/mach-xen/asm/pgtable.h |    4 +-
 4 files changed, 40 insertions(+), 16 deletions(-)

diff -r cad6f60f0506 -r baeb818cd2dc arch/x86_64/kernel/head-xen.S
--- a/arch/x86_64/kernel/head-xen.S     Thu Jun 18 10:24:18 2009 +0100
+++ b/arch/x86_64/kernel/head-xen.S     Thu Jun 18 10:32:16 2009 +0100
@@ -76,9 +76,6 @@ NEXT_PAGE(level3_kernel_pgt)
          */
 NEXT_PAGE(level3_user_pgt)
         .fill  512,8,0
-
-NEXT_PAGE(level2_kernel_pgt)
-       .fill   512,8,0
 
 NEXT_PAGE(hypercall_page)
        CFI_STARTPROC
diff -r cad6f60f0506 -r baeb818cd2dc arch/x86_64/kernel/setup-xen.c
--- a/arch/x86_64/kernel/setup-xen.c    Thu Jun 18 10:24:18 2009 +0100
+++ b/arch/x86_64/kernel/setup-xen.c    Thu Jun 18 10:32:16 2009 +0100
@@ -524,10 +524,12 @@ contig_initmem_init(unsigned long start_
                panic("Cannot find bootmem map of size %ld\n",bootmap_size);
        bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
 #ifdef CONFIG_XEN
-       e820_bootmem_free(NODE_DATA(0), 0, 
xen_start_info->nr_pages<<PAGE_SHIFT);
-#else
+       if (xen_start_info->nr_pages < end_pfn)
+               e820_bootmem_free(NODE_DATA(0), 0,
+                                 xen_start_info->nr_pages<<PAGE_SHIFT);
+       else
+#endif
        e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
-#endif
        reserve_bootmem(bootmap, bootmap_size);
 } 
 #endif
diff -r cad6f60f0506 -r baeb818cd2dc arch/x86_64/mm/init-xen.c
--- a/arch/x86_64/mm/init-xen.c Thu Jun 18 10:24:18 2009 +0100
+++ b/arch/x86_64/mm/init-xen.c Thu Jun 18 10:32:16 2009 +0100
@@ -527,8 +527,6 @@ void __init xen_init_pt(void)
        page = (unsigned long *)xen_start_info->pt_base;
        addr = page[pgd_index(__START_KERNEL_map)];
        addr_to_page(addr, page);
-       addr = page[pud_index(__START_KERNEL_map)];
-       addr_to_page(addr, page);
 
 #if CONFIG_XEN_COMPAT <= 0x030002
        /* On Xen 3.0.2 and older we may need to explicitly specify _PAGE_USER
@@ -539,7 +537,9 @@ void __init xen_init_pt(void)
 
                /* Mess with the initial mapping of page 0. It's not needed. */
                BUILD_BUG_ON(__START_KERNEL <= __START_KERNEL_map);
-               addr = page[pmd_index(__START_KERNEL_map)];
+               addr = page[pud_index(__START_KERNEL_map)];
+               addr_to_page(addr, pg);
+               addr = pg[pmd_index(__START_KERNEL_map)];
                addr_to_page(addr, pg);
                pte.pte = pg[pte_index(__START_KERNEL_map)];
                BUG_ON(!(pte.pte & _PAGE_PRESENT));
@@ -560,9 +560,10 @@ void __init xen_init_pt(void)
        /* Construct mapping of initial pte page in our own directories. */
        init_level4_pgt[pgd_index(__START_KERNEL_map)] = 
                __pgd(__pa_symbol(level3_kernel_pgt) | _PAGE_TABLE);
-       level3_kernel_pgt[pud_index(__START_KERNEL_map)] = 
-               __pud(__pa_symbol(level2_kernel_pgt) | _PAGE_TABLE);
-       memcpy(level2_kernel_pgt, page, PAGE_SIZE);
+       memcpy(level3_kernel_pgt + pud_index(__START_KERNEL_map),
+              page + pud_index(__START_KERNEL_map),
+              (PTRS_PER_PUD - pud_index(__START_KERNEL_map))
+              * sizeof(*level3_kernel_pgt));
 
        __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
                __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
@@ -575,8 +576,6 @@ void __init xen_init_pt(void)
                                 XENFEAT_writable_page_tables);
        early_make_page_readonly(level3_user_pgt,
                                 XENFEAT_writable_page_tables);
-       early_make_page_readonly(level2_kernel_pgt,
-                                XENFEAT_writable_page_tables);
 
        if (!xen_feature(XENFEAT_writable_page_tables)) {
                xen_pgd_pin(__pa_symbol(init_level4_pgt));
@@ -608,6 +607,23 @@ static void __init extend_init_mapping(u
        while (va < (__START_KERNEL_map
                     + (start_pfn << PAGE_SHIFT)
                     + tables_space)) {
+               if (!(pmd_index(va) | pte_index(va))) {
+                       pud_t *pud;
+
+                       page = (unsigned long *)init_level4_pgt;
+                       addr = page[pgd_index(va)];
+                       addr_to_page(addr, page);
+                       pud = (pud_t *)&page[pud_index(va)];
+                       if (pud_none(*pud)) {
+                               page = alloc_static_page(&phys);
+                               early_make_page_readonly(
+                                       page, XENFEAT_writable_page_tables);
+                               set_pud(pud, __pud(phys | _KERNPG_TABLE));
+                       } else {
+                               addr = page[pud_index(va)];
+                               addr_to_page(addr, page);
+                       }
+               }
                pmd = (pmd_t *)&page[pmd_index(va)];
                if (pmd_none(*pmd)) {
                        pte_page = alloc_static_page(&phys);
@@ -630,6 +646,15 @@ static void __init extend_init_mapping(u
 
        /* Finally, blow away any spurious initial mappings. */
        while (1) {
+               if (!(pmd_index(va) | pte_index(va))) {
+                       page = (unsigned long *)init_level4_pgt;
+                       addr = page[pgd_index(va)];
+                       addr_to_page(addr, page);
+                       if (pud_none(((pud_t *)page)[pud_index(va)]))
+                               break;
+                       addr = page[pud_index(va)];
+                       addr_to_page(addr, page);
+               }
                pmd = (pmd_t *)&page[pmd_index(va)];
                if (pmd_none(*pmd))
                        break;
diff -r cad6f60f0506 -r baeb818cd2dc include/asm-x86_64/mach-xen/asm/pgtable.h
--- a/include/asm-x86_64/mach-xen/asm/pgtable.h Thu Jun 18 10:24:18 2009 +0100
+++ b/include/asm-x86_64/mach-xen/asm/pgtable.h Thu Jun 18 10:32:16 2009 +0100
@@ -137,11 +137,11 @@ static inline void pgd_clear (pgd_t * pg
 #define FIRST_USER_ADDRESS     0
 
 #ifndef __ASSEMBLY__
-#define MAXMEM          0x3fffffffffffUL
+#define MAXMEM          0x6fffffffffUL
 #define VMALLOC_START    0xffffc20000000000UL
 #define VMALLOC_END      0xffffe1ffffffffffUL
 #define MODULES_VADDR    0xffffffff88000000UL
-#define MODULES_END      0xfffffffffff00000UL
+#define MODULES_END      0xffffffffff000000UL
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 
 #define _PAGE_BIT_PRESENT      0

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.