[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen stable-4.5] x86/NUMA: make init_node_heap() respect Xen heap limit



commit 0b6e02bd3b589c50d3d8111e91b9510d226e7e40
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Thu Sep 10 15:36:12 2015 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Sep 10 15:36:12 2015 +0200

    x86/NUMA: make init_node_heap() respect Xen heap limit
    
    On NUMA systems, where we try to use node local memory for the basic
    control structures of the buddy allocator, this special case needs to
    take into consideration a possible address width limit placed on the
    Xen heap. In turn this (but also other, more abstract considerations)
    requires that xenheap_max_mfn() not be called more than once (at most
    we might permit it to be called a second time with a larger value than
    was passed the first time), and be called only before calling
    end_boot_allocator().
    
    While inspecting all the involved code, a couple of off-by-one issues
    were found (and are being corrected here at once):
    - arch_init_memory() cleared one too many page table slots
    - the highmem_start based invocation of xenheap_max_mfn() passed too
      big a value
    - xenheap_max_mfn() calculated the wrong bit count in edge cases
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
    
    xen/arm64: do not (incorrectly) limit size of xenheap
    
    The commit 88e3ed61642bb393458acc7a9bd2f96edc337190 "x86/NUMA: make
    init_node_heap() respect Xen heap limit" breaks boot on the arm64 board
    X-Gene.
    
    The xenheap bits variable is used to know the last RAM MFN always mapped
    in Xen virtual memory. If the value is 0, it means that all the memory is
    always mapped in Xen virtual memory.
    
    On X-gene the RAM bank resides above 128GB and last xenheap MFN is
    0x4400000. With the new way to calculate the number of bits, xenheap_bits
    will be equal to 38 bits. This will result to hide all the RAM and the
    impossibility to allocate xenheap memory.
    
    Given that aarch64 have always all the memory mapped in Xen virtual
    memory, it's not necessary to call xenheap_max_mfn which set the number
    of bits.
    
    Suggested-by: Jan Beulich <jbeulich@xxxxxxxx>
    Signed-off-by: Julien Grall <julien.grall@xxxxxxxxxx>
    Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
    master commit: 88e3ed61642bb393458acc7a9bd2f96edc337190
    master date: 2015-09-01 14:02:57 +0200
    master commit: 0a7167d9b20cdc48e6ea320fbbb920b3267c9757
    master date: 2015-09-04 14:58:07 +0100
---
 xen/arch/arm/setup.c    |    1 -
 xen/arch/x86/mm.c       |    2 +-
 xen/arch/x86/setup.c    |   11 +++++++----
 xen/common/page_alloc.c |   24 ++++++++++++++++++------
 4 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index 1e488ee..3d212f4 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -664,7 +664,6 @@ static void __init setup_mm(unsigned long dtb_paddr, size_t 
dtb_size)
     xenheap_virt_end = XENHEAP_VIRT_START + ram_end - ram_start;
     xenheap_mfn_start = ram_start >> PAGE_SHIFT;
     xenheap_mfn_end = ram_end >> PAGE_SHIFT;
-    xenheap_max_mfn(xenheap_mfn_end);
 
     /*
      * Need enough mapped pages for copying the DTB.
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 59e16a6..7ebd733 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -372,7 +372,7 @@ void __init arch_init_memory(void)
 
                     for ( i = 0; i < l3_table_offset(split_va); ++i )
                         l3tab[i] = l3idle[i];
-                    for ( ; i <= L3_PAGETABLE_ENTRIES; ++i )
+                    for ( ; i < L3_PAGETABLE_ENTRIES; ++i )
                         l3tab[i] = l3e_empty();
                     split_l4e = l4e_from_pfn(virt_to_mfn(l3tab),
                                              __PAGE_HYPERVISOR);
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index fefa0b7..1efe02f 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -970,7 +970,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 
     setup_max_pdx(raw_max_page);
     if ( highmem_start )
-        xenheap_max_mfn(PFN_DOWN(highmem_start));
+        xenheap_max_mfn(PFN_DOWN(highmem_start - 1));
 
     /*
      * Walk every RAM region and map it in its entirety (on x86/64, at least)
@@ -1151,9 +1151,6 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 
     numa_initmem_init(0, raw_max_page);
 
-    end_boot_allocator();
-    system_state = SYS_STATE_boot;
-
     if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
     {
         unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
@@ -1162,6 +1159,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
         if ( !highmem_start )
             xenheap_max_mfn(limit);
 
+        end_boot_allocator();
+
         /* Pass the remaining memory to the allocator. */
         for ( i = 0; i < boot_e820.nr_map; i++ )
         {
@@ -1185,6 +1184,10 @@ void __init noreturn __start_xen(unsigned long mbi_p)
            opt_tmem = 0;
         }
     }
+    else
+        end_boot_allocator();
+
+    system_state = SYS_STATE_boot;
 
     vm_init();
     console_init_ring();
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 24a759c..fc6db33 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -405,13 +405,19 @@ void get_outstanding_claims(uint64_t *free_pages, 
uint64_t *outstanding_pages)
     spin_unlock(&heap_lock);
 }
 
+static bool_t __read_mostly first_node_initialised;
+#ifndef CONFIG_SEPARATE_XENHEAP
+static unsigned int __read_mostly xenheap_bits;
+#else
+#define xenheap_bits 0
+#endif
+
 static unsigned long init_node_heap(int node, unsigned long mfn,
                                     unsigned long nr, bool_t *use_tail)
 {
     /* First node to be discovered has its heap metadata statically alloced. */
     static heap_by_zone_and_order_t _heap_static;
     static unsigned long avail_static[NR_ZONES];
-    static int first_node_initialised;
     unsigned long needed = (sizeof(**_heap) +
                             sizeof(**avail) * NR_ZONES +
                             PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -429,14 +435,18 @@ static unsigned long init_node_heap(int node, unsigned 
long mfn,
     }
 #ifdef DIRECTMAP_VIRT_END
     else if ( *use_tail && nr >= needed &&
-              (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) )
+              (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) &&
+              (!xenheap_bits ||
+               !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
     {
         _heap[node] = mfn_to_virt(mfn + nr - needed);
         avail[node] = mfn_to_virt(mfn + nr - 1) +
                       PAGE_SIZE - sizeof(**avail) * NR_ZONES;
     }
     else if ( nr >= needed &&
-              (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) )
+              (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) &&
+              (!xenheap_bits ||
+               !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) )
     {
         _heap[node] = mfn_to_virt(mfn);
         avail[node] = mfn_to_virt(mfn + needed - 1) +
@@ -1541,11 +1551,13 @@ void free_xenheap_pages(void *v, unsigned int order)
 
 #else
 
-static unsigned int __read_mostly xenheap_bits;
-
 void __init xenheap_max_mfn(unsigned long mfn)
 {
-    xenheap_bits = fls(mfn) + PAGE_SHIFT;
+    ASSERT(!first_node_initialised);
+    ASSERT(!xenheap_bits);
+    BUILD_BUG_ON(PADDR_BITS >= BITS_PER_LONG);
+    xenheap_bits = min(fls(mfn + 1) - 1 + PAGE_SHIFT, PADDR_BITS);
+    printk(XENLOG_INFO "Xen heap: %u bits\n", xenheap_bits);
 }
 
 void init_xenheap_pages(paddr_t ps, paddr_t pe)
--
generated by git-patchbot for /home/xen/git/xen.git#stable-4.5

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.