[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] linux/x86: fix issues with the assignment of huge amounts of memory



At the same time remove the non-applicable and broken support for the
memmap= command line option.
Also fix the overlap of the modules area with the fixmaps on x86-64.

As usual, written and tested on 2.6.26 and made apply to the 2.6.18
tree without further testing.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: head-2008-08-07/arch/x86_64/kernel/e820-xen.c
===================================================================
--- head-2008-08-07.orig/arch/x86_64/kernel/e820-xen.c  2008-08-07 
14:54:04.000000000 +0200
+++ head-2008-08-07/arch/x86_64/kernel/e820-xen.c       2008-08-07 
14:03:05.000000000 +0200
@@ -745,6 +745,24 @@ static int __init parse_memopt(char *p)
        i = e820.nr_map-1;
        current_end = e820.map[i].addr + e820.map[i].size;
 
+       /*
+        * A little less than 2% of available memory are needed for page
+        * tables, p2m map, and mem_map. Hence the maximum amount of memory
+        * we can potentially balloon up to cannot exceed about 50 times
+        * what we've been given initially.
+        */
+#define PAGE_OVERHEAD (PAGE_SIZE \
+                      / (sizeof(pte_t) + sizeof(long) + sizeof(struct page)))
+       if (end_user_pfn / (PAGE_OVERHEAD - 1) > xen_start_info->nr_pages) {
+               end = (xen_start_info->nr_pages * (PAGE_OVERHEAD - 1))
+                     << PAGE_SHIFT;
+               printk(KERN_WARNING "mem=%lu is invalid for an initial"
+                                   " allocation of %lu, using %lu\n",
+                      end_user_pfn << PAGE_SHIFT,
+                      xen_start_info->nr_pages << PAGE_SHIFT, end);
+               end_user_pfn = end >> PAGE_SHIFT;
+       }
+
        if (current_end < end) {
                /*
                  * The e820 map ends before our requested size so
@@ -760,6 +778,7 @@ static int __init parse_memopt(char *p)
        }
 } 
 
+#ifndef CONFIG_XEN
 void __init parse_memmapopt(char *p, char **from)
 {
        unsigned long long start_at, mem_size;
@@ -770,6 +789,7 @@ void __init finish_e820_parsing(void)
        }
        p = *from;
 }
+#endif
 
 unsigned long pci_mem_start = 0xaeedbabe;
 EXPORT_SYMBOL(pci_mem_start);
Index: head-2008-08-07/arch/i386/kernel/setup-xen.c
===================================================================
--- head-2008-08-07.orig/arch/i386/kernel/setup-xen.c   2008-08-07 
14:54:04.000000000 +0200
+++ head-2008-08-07/arch/i386/kernel/setup-xen.c        2008-08-07 
14:03:05.000000000 +0200
@@ -405,7 +405,7 @@ shared_info_t *HYPERVISOR_shared_info = 
 EXPORT_SYMBOL(HYPERVISOR_shared_info);
 
 unsigned long *phys_to_machine_mapping;
-unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
+unsigned long *pfn_to_mfn_frame_list_list, **pfn_to_mfn_frame_list;
 EXPORT_SYMBOL(phys_to_machine_mapping);
 
 /* Raw start-of-day parameters from the hypervisor. */
@@ -831,6 +831,7 @@
                        }
                }
 
+#ifndef CONFIG_XEN
                else if (!memcmp(from, "memmap=", 7)) {
                        if (to != command_line)
                                to--;
@@ -872,6 +873,7 @@
                                }
                        }
                }
+#endif
 
                else if (!memcmp(from, "noexec=", 7))
                        noexec_setup(from + 7);
@@ -1803,7 +1803,7 @@ void __init setup_arch(char **cmdline_p)
        int i, j, k, fpp;
        struct physdev_set_iopl set_iopl;
        unsigned long max_low_pfn;
-       unsigned long p2m_pages;
+       unsigned long p2m_pages, size;
 
        /* Force a quick death if the kernel panics (not domain 0). */
        extern int panic_timeout;
@@ -1996,11 +1996,11 @@ void __init setup_arch(char **cmdline_p)
        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
                phys_to_machine_mapping = alloc_bootmem_low_pages(
                     max_pfn * sizeof(unsigned long));
-               memset(phys_to_machine_mapping, ~0,
-                      max_pfn * sizeof(unsigned long));
                memcpy(phys_to_machine_mapping,
                       (unsigned long *)xen_start_info->mfn_list,
                       p2m_pages * sizeof(unsigned long));
+               memset(phys_to_machine_mapping + p2m_pages, ~0,
+                      (max_pfn - p2m_pages) * sizeof(unsigned long));
                free_bootmem(
                     __pa(xen_start_info->mfn_list),
                     PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
@@ -2010,13 +2010,18 @@ void __init setup_arch(char **cmdline_p)
                 * Initialise the list of the frames that specify the list of
                 * frames that make up the p2m table. Used by save/restore
                 */
-               pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
-
                fpp = PAGE_SIZE/sizeof(unsigned long);
+               size = (max_pfn + fpp - 1) / fpp;
+               size = (size + fpp - 1) / fpp;
+               size *= sizeof(unsigned long);
+               BUG_ON(size > PAGE_SIZE);
+               pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(size);
+               pfn_to_mfn_frame_list = alloc_bootmem_low(size);
+
                for (i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++) {
                        if ((j % fpp) == 0) {
                                k++;
-                               BUG_ON(k>=16);
+                               BUG_ON(k * sizeof(unsigned long) >= size);
                                pfn_to_mfn_frame_list[k] =
                                        alloc_bootmem_low_pages(PAGE_SIZE);
                                pfn_to_mfn_frame_list_list[k] =
Index: head-2008-08-07/arch/x86_64/kernel/setup-xen.c
===================================================================
--- head-2008-08-07.orig/arch/x86_64/kernel/setup-xen.c 2008-08-07 
14:54:04.000000000 +0200
+++ head-2008-08-07/arch/x86_64/kernel/setup-xen.c      2008-08-07 
14:03:05.000000000 +0200
@@ -105,7 +105,7 @@ static struct notifier_block xen_panic_b
 };
 
 unsigned long *phys_to_machine_mapping;
-unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[512];
+unsigned long *pfn_to_mfn_frame_list_list, **pfn_to_mfn_frame_list;
 
 EXPORT_SYMBOL(phys_to_machine_mapping);
 
@@ -419,6 +419,7 @@ static __init void parse_cmdline_early
                if (!memcmp(from, "mem=", 4))
                        parse_memopt(from+4, &from); 
 
+#ifndef CONFIG_XEN
                if (!memcmp(from, "memmap=", 7)) {
                        /* exactmap option is for used defined memory */
                        if (!memcmp(from+7, "exactmap", 8)) {
@@ -440,6 +441,7 @@ static __init void parse_cmdline_early
                                userdef = 1;
                        }
                }
+#endif
 
 #ifdef CONFIG_NUMA
                if (!memcmp(from, "numa=", 5))
@@ -585,7 +585,7 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_XEN
        {
                int i, j, k, fpp;
-               unsigned long p2m_pages;
+               unsigned long p2m_pages, size;
 
                p2m_pages = end_pfn;
                if (xen_start_info->nr_pages > end_pfn) {
@@ -617,11 +617,11 @@ void __init setup_arch(char **cmdline_p)
                        /* Make sure we have a large enough P->M table. */
                        phys_to_machine_mapping = alloc_bootmem_pages(
                                end_pfn * sizeof(unsigned long));
-                       memset(phys_to_machine_mapping, ~0,
-                              end_pfn * sizeof(unsigned long));
                        memcpy(phys_to_machine_mapping,
                               (unsigned long *)xen_start_info->mfn_list,
                               p2m_pages * sizeof(unsigned long));
+                       memset(phys_to_machine_mapping + p2m_pages, ~0,
+                              (end_pfn - p2m_pages) * sizeof(unsigned long));
                        free_bootmem(
                                __pa(xen_start_info->mfn_list),
                                PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
@@ -632,13 +632,22 @@ void __init setup_arch(char **cmdline_p)
                         * list of frames that make up the p2m table. Used by
                          * save/restore.
                         */
-                       pfn_to_mfn_frame_list_list = 
alloc_bootmem_pages(PAGE_SIZE);
-
                        fpp = PAGE_SIZE/sizeof(unsigned long);
+                       size = (max_pfn + fpp - 1) / fpp;
+                       size = (size + fpp - 1) / fpp;
+                       size *= sizeof(unsigned long);
+                       pfn_to_mfn_frame_list_list = alloc_bootmem_pages(size);
+                       if (size > PAGE_SIZE
+                           && xen_create_contiguous_region((unsigned long)
+                                                           
pfn_to_mfn_frame_list_list,
+                                                           get_order(size), 0))
+                               BUG();
+                       pfn_to_mfn_frame_list = alloc_bootmem(size);
+
                        for (i=0, j=0, k=-1; i< end_pfn; i+=fpp, j++) {
                                if ((j % fpp) == 0) {
                                        k++;
-                                       BUG_ON(k>=fpp);
+                                       BUG_ON(k * sizeof(unsigned long) >= 
size);
                                        pfn_to_mfn_frame_list[k] =
                                                alloc_bootmem_pages(PAGE_SIZE);
                                        pfn_to_mfn_frame_list_list[k] =
Index: head-2008-08-07/arch/x86_64/mm/init-xen.c
===================================================================
--- head-2008-08-07.orig/arch/x86_64/mm/init-xen.c      2008-08-07 
13:59:00.000000000 +0200
+++ head-2008-08-07/arch/x86_64/mm/init-xen.c   2008-08-07 14:03:05.000000000 
+0200
@@ -672,6 +672,13 @@ static void __init extend_init_mapping(u
        while (va < (__START_KERNEL_map
                     + (start_pfn << PAGE_SHIFT)
                     + tables_space)) {
+               if (!pmd_index(va) && !pte_index(va)) {
+                       page = (unsigned long *)init_level4_pgt;
+                       addr = page[pgd_index(va)];
+                       addr_to_page(addr, page);
+                       addr = page[pud_index(va)];
+                       addr_to_page(addr, page);
+               }
                pmd = (pmd_t *)&page[pmd_index(va)];
                if (pmd_none(*pmd)) {
                        pte_page = alloc_static_page(&phys);
Index: head-2008-08-07/drivers/xen/core/machine_reboot.c
===================================================================
--- head-2008-08-07.orig/drivers/xen/core/machine_reboot.c      2008-08-07 
13:59:00.000000000 +0200
+++ head-2008-08-07/drivers/xen/core/machine_reboot.c   2008-08-07 
14:03:05.000000000 +0200
@@ -74,7 +74,7 @@ static void post_suspend(int suspend_can
        unsigned long shinfo_mfn;
        extern unsigned long max_pfn;
        extern unsigned long *pfn_to_mfn_frame_list_list;
-       extern unsigned long *pfn_to_mfn_frame_list[];
+       extern unsigned long **pfn_to_mfn_frame_list;
 
        if (suspend_cancelled) {
                xen_start_info->store_mfn =
Index: head-2008-08-07/include/asm-x86_64/mach-xen/asm/pgtable.h
===================================================================
--- head-2008-08-07.orig/include/asm-x86_64/mach-xen/asm/pgtable.h      
2008-08-07 13:59:00.000000000 +0200
+++ head-2008-08-07/include/asm-x86_64/mach-xen/asm/pgtable.h   2008-08-07 
14:03:05.000000000 +0200
@@ -135,11 +135,11 @@
 #define FIRST_USER_ADDRESS     0
 
 #ifndef __ASSEMBLY__
-#define MAXMEM          0x3fffffffffffUL
+#define MAXMEM          0xdfffffffffUL
 #define VMALLOC_START    0xffffc20000000000UL
 #define VMALLOC_END      0xffffe1ffffffffffUL
 #define MODULES_VADDR    0xffffffff88000000UL
-#define MODULES_END      0xfffffffffff00000UL
+#define MODULES_END      0xffffffffff000000UL
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 
 #define _PAGE_BIT_PRESENT      0



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.