[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [IA64] Update efi.c and rework xenheap location



# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 0a226de3fc37204cc75023f2bb31aa26831755a5
# Parent  aa229873911204d675310d0dab06cd673eff7757
[IA64] Update efi.c and rework xenheap location

This is a port of a newer upsteam efi.c to xen/ia64.  For the most
part, this patch is simply incorporating this upstream linux-ia64
patch into the tree:

http://www.kernel.org/hg/linux-2.6/?cs=fb781f6d3e81

To support this new code, xensetup needed to be modified to relocate
the dom0 kernel and initrd images without using efi_memmap_walk() as
this can no longer be called until after reserve_memory().  The dom0
kernel and initrd images are now only moved if necessary and the xen
MDT entry is expanded to cover the xenheap area and any relocated dom0
bits.

Signed-off-by: Alex Williamson <alex.williamson@xxxxxx>
---
 xen/arch/ia64/linux-xen/efi.c                |  472 ++++++++++++++-------------
 xen/arch/ia64/linux-xen/setup.c              |    3 
 xen/arch/ia64/xen/xensetup.c                 |  231 ++++++++-----
 xen/include/asm-ia64/linux-xen/asm/meminit.h |    6 
 4 files changed, 416 insertions(+), 296 deletions(-)

diff -r aa2298739112 -r 0a226de3fc37 xen/arch/ia64/linux-xen/efi.c
--- a/xen/arch/ia64/linux-xen/efi.c     Fri Jun 09 10:40:31 2006 -0600
+++ b/xen/arch/ia64/linux-xen/efi.c     Tue Jun 13 08:45:22 2006 -0600
@@ -246,57 +246,30 @@ is_available_memory (efi_memory_desc_t *
        return 0;
 }
 
-/*
- * Trim descriptor MD so its starts at address START_ADDR.  If the descriptor 
covers
- * memory that is normally available to the kernel, issue a warning that some 
memory
- * is being ignored.
- */
+typedef struct kern_memdesc {
+       u64 attribute;
+       u64 start;
+       u64 num_pages;
+} kern_memdesc_t;
+
+static kern_memdesc_t *kern_memmap;
+
 static void
-trim_bottom (efi_memory_desc_t *md, u64 start_addr)
-{
-       u64 num_skipped_pages;
-
-       if (md->phys_addr >= start_addr || !md->num_pages)
-               return;
-
-       num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
-       if (num_skipped_pages > md->num_pages)
-               num_skipped_pages = md->num_pages;
-
-       if (is_available_memory(md))
-               printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx 
due to granule hole "
-                      "at 0x%lx\n", __FUNCTION__,
-                      (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
-                      md->phys_addr, start_addr - IA64_GRANULE_SIZE);
-       /*
-        * NOTE: Don't set md->phys_addr to START_ADDR because that could cause 
the memory
-        * descriptor list to become unsorted.  In such a case, md->num_pages 
will be
-        * zero, so the Right Thing will happen.
-        */
-       md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
-       md->num_pages -= num_skipped_pages;
-}
-
-static void
-trim_top (efi_memory_desc_t *md, u64 end_addr)
-{
-       u64 num_dropped_pages, md_end_addr;
-
-       md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-
-       if (md_end_addr <= end_addr || !md->num_pages)
-               return;
-
-       num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT;
-       if (num_dropped_pages > md->num_pages)
-               num_dropped_pages = md->num_pages;
-
-       if (is_available_memory(md))
-               printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx 
due to granule hole "
-                      "at 0x%lx\n", __FUNCTION__,
-                      (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
-                      md->phys_addr, end_addr);
-       md->num_pages -= num_dropped_pages;
+walk (efi_freemem_callback_t callback, void *arg, u64 attr)
+{
+       kern_memdesc_t *k;
+       u64 start, end, voff;
+
+       voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET;
+       for (k = kern_memmap; k->start != ~0UL; k++) {
+               if (k->attribute != attr)
+                       continue;
+               start = PAGE_ALIGN(k->start);
+               end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK;
+               if (start < end)
+                       if ((*callback)(start + voff, end + voff, arg) < 0)
+                               return;
+       }
 }
 
 /*
@@ -306,153 +279,17 @@ void
 void
 efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
 {
-       int prev_valid = 0;
-       struct range {
-               u64 start;
-               u64 end;
-       } prev, curr;
-       void *efi_map_start, *efi_map_end, *p, *q;
-       efi_memory_desc_t *md, *check_md;
-       u64 efi_desc_size, start, end, granule_addr, last_granule_addr, 
first_non_wb_addr = 0;
-       unsigned long total_mem = 0;
-
-       efi_map_start = __va(ia64_boot_param->efi_memmap);
-       efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
-       efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
-       for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
-               md = p;
-
-               /* skip over non-WB memory descriptors; that's all we're 
interested in... */
-               if (!(md->attribute & EFI_MEMORY_WB))
-                       continue;
-
-#ifdef XEN
-// this works around a problem in the ski bootloader
-{
-               extern long running_on_sim;
-               if (running_on_sim && md->type != EFI_CONVENTIONAL_MEMORY)
-                       continue;
-}
-#endif
-               /*
-                * granule_addr is the base of md's first granule.
-                * [granule_addr - first_non_wb_addr) is guaranteed to
-                * be contiguous WB memory.
-                */
-               granule_addr = GRANULEROUNDDOWN(md->phys_addr);
-               first_non_wb_addr = max(first_non_wb_addr, granule_addr);
-
-               if (first_non_wb_addr < md->phys_addr) {
-                       trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
-                       granule_addr = GRANULEROUNDDOWN(md->phys_addr);
-                       first_non_wb_addr = max(first_non_wb_addr, 
granule_addr);
-               }
-
-               for (q = p; q < efi_map_end; q += efi_desc_size) {
-                       check_md = q;
-
-                       if ((check_md->attribute & EFI_MEMORY_WB) &&
-                           (check_md->phys_addr == first_non_wb_addr))
-                               first_non_wb_addr += check_md->num_pages << 
EFI_PAGE_SHIFT;
-                       else
-                               break;          /* non-WB or hole */
-               }
-
-               last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
-               if (last_granule_addr < md->phys_addr + (md->num_pages << 
EFI_PAGE_SHIFT))
-                       trim_top(md, last_granule_addr);
-
-               if (is_available_memory(md)) {
-                       if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) 
>= max_addr) {
-                               if (md->phys_addr >= max_addr)
-                                       continue;
-                               md->num_pages = (max_addr - md->phys_addr) >> 
EFI_PAGE_SHIFT;
-                               first_non_wb_addr = max_addr;
-                       }
-
-                       if (total_mem >= mem_limit)
-                               continue;
-
-                       if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > 
mem_limit) {
-                               unsigned long limit_addr = md->phys_addr;
-
-                               limit_addr += mem_limit - total_mem;
-                               limit_addr = GRANULEROUNDDOWN(limit_addr);
-
-                               if (md->phys_addr > limit_addr)
-                                       continue;
-
-                               md->num_pages = (limit_addr - md->phys_addr) >>
-                                               EFI_PAGE_SHIFT;
-                               first_non_wb_addr = max_addr = md->phys_addr +
-                                             (md->num_pages << EFI_PAGE_SHIFT);
-                       }
-                       total_mem += (md->num_pages << EFI_PAGE_SHIFT);
-
-                       if (md->num_pages == 0)
-                               continue;
-
-                       curr.start = PAGE_OFFSET + md->phys_addr;
-                       curr.end   = curr.start + (md->num_pages << 
EFI_PAGE_SHIFT);
-
-                       if (!prev_valid) {
-                               prev = curr;
-                               prev_valid = 1;
-                       } else {
-                               if (curr.start < prev.start)
-                                       printk(KERN_ERR "Oops: EFI memory table 
not ordered!\n");
-
-                               if (prev.end == curr.start) {
-                                       /* merge two consecutive memory ranges 
*/
-                                       prev.end = curr.end;
-                               } else {
-                                       start = PAGE_ALIGN(prev.start);
-                                       end = prev.end & PAGE_MASK;
-                                       if ((end > start) && (*callback)(start, 
end, arg) < 0)
-                                               return;
-                                       prev = curr;
-                               }
-                       }
-               }
-       }
-       if (prev_valid) {
-               start = PAGE_ALIGN(prev.start);
-               end = prev.end & PAGE_MASK;
-               if (end > start)
-                       (*callback)(start, end, arg);
-       }
+       walk(callback, arg, EFI_MEMORY_WB);
 }
 
 /*
- * Walk the EFI memory map to pull out leftover pages in the lower
- * memory regions which do not end up in the regular memory map and
- * stick them into the uncached allocator
- *
- * The regular walk function is significantly more complex than the
- * uncached walk which means it really doesn't make sense to try and
- * marge the two.
+ * Walks the EFI memory map and calls CALLBACK once for each EFI memory 
descriptor that
+ * has memory that is available for uncached allocator.
  */
-void __init
-efi_memmap_walk_uc (efi_freemem_callback_t callback)
-{
-       void *efi_map_start, *efi_map_end, *p;
-       efi_memory_desc_t *md;
-       u64 efi_desc_size, start, end;
-
-       efi_map_start = __va(ia64_boot_param->efi_memmap);
-       efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
-       efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
-       for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
-               md = p;
-               if (md->attribute == EFI_MEMORY_UC) {
-                       start = PAGE_ALIGN(md->phys_addr);
-                       end = PAGE_ALIGN((md->phys_addr+(md->num_pages << 
EFI_PAGE_SHIFT)) & PAGE_MASK);
-                       if ((*callback)(start, end, NULL) < 0)
-                               return;
-               }
-       }
+void
+efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
+{
+       walk(callback, arg, EFI_MEMORY_UC);
 }
 
 #ifdef XEN
@@ -799,30 +636,6 @@ efi_get_iobase (void)
        return 0;
 }
 
-#ifdef XEN
-// variation of efi_get_iobase which returns entire memory descriptor
-efi_memory_desc_t *
-efi_get_io_md (void)
-{
-       void *efi_map_start, *efi_map_end, *p;
-       efi_memory_desc_t *md;
-       u64 efi_desc_size;
-
-       efi_map_start = __va(ia64_boot_param->efi_memmap);
-       efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
-       efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
-       for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
-               md = p;
-               if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
-                       if (md->attribute & EFI_MEMORY_UC)
-                               return md;
-               }
-       }
-       return 0;
-}
-#endif
-
 u32
 efi_mem_type (unsigned long phys_addr)
 {
@@ -934,3 +747,228 @@ efi_uart_console_only(void)
        printk(KERN_ERR "Malformed %s value\n", name);
        return 0;
 }
+
+#define efi_md_size(md)        (md->num_pages << EFI_PAGE_SHIFT)
+
+static inline u64
+kmd_end(kern_memdesc_t *kmd)
+{
+       return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
+}
+
+static inline u64
+efi_md_end(efi_memory_desc_t *md)
+{
+       return (md->phys_addr + efi_md_size(md));
+}
+
+static inline int
+efi_wb(efi_memory_desc_t *md)
+{
+       return (md->attribute & EFI_MEMORY_WB);
+}
+
+static inline int
+efi_uc(efi_memory_desc_t *md)
+{
+       return (md->attribute & EFI_MEMORY_UC);
+}
+
+/*
+ * Look for the first granule aligned memory descriptor memory
+ * that is big enough to hold EFI memory map. Make sure this
+ * descriptor is atleast granule sized so it does not get trimmed
+ */
+struct kern_memdesc *
+find_memmap_space (void)
+{
+       u64     contig_low=0, contig_high=0;
+       u64     as = 0, ae;
+       void *efi_map_start, *efi_map_end, *p, *q;
+       efi_memory_desc_t *md, *pmd = NULL, *check_md;
+       u64     space_needed, efi_desc_size;
+       unsigned long total_mem = 0;
+
+       efi_map_start = __va(ia64_boot_param->efi_memmap);
+       efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+       efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+       /*
+        * Worst case: we need 3 kernel descriptors for each efi descriptor
+        * (if every entry has a WB part in the middle, and UC head and tail),
+        * plus one for the end marker.
+        */
+       space_needed = sizeof(kern_memdesc_t) *
+               (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1);
+
+       for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
+               md = p;
+               if (!efi_wb(md)) {
+                       continue;
+               }
+               if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != 
md->phys_addr) {
+                       contig_low = GRANULEROUNDUP(md->phys_addr);
+                       contig_high = efi_md_end(md);
+                       for (q = p + efi_desc_size; q < efi_map_end; q += 
efi_desc_size) {
+                               check_md = q;
+                               if (!efi_wb(check_md))
+                                       break;
+                               if (contig_high != check_md->phys_addr)
+                                       break;
+                               contig_high = efi_md_end(check_md);
+                       }
+                       contig_high = GRANULEROUNDDOWN(contig_high);
+               }
+               if (!is_available_memory(md) || md->type == EFI_LOADER_DATA)
+                       continue;
+
+               /* Round ends inward to granule boundaries */
+               as = max(contig_low, md->phys_addr);
+               ae = min(contig_high, efi_md_end(md));
+
+               /* keep within max_addr= command line arg */
+               ae = min(ae, max_addr);
+               if (ae <= as)
+                       continue;
+
+               /* avoid going over mem= command line arg */
+               if (total_mem + (ae - as) > mem_limit)
+                       ae -= total_mem + (ae - as) - mem_limit;
+
+               if (ae <= as)
+                       continue;
+
+               if (ae - as > space_needed)
+                       break;
+       }
+       if (p >= efi_map_end)
+               panic("Can't allocate space for kernel memory descriptors");
+
+       return __va(as);
+}
+
+/*
+ * Walk the EFI memory map and gather all memory available for kernel
+ * to use.  We can allocate partial granules only if the unavailable
+ * parts exist, and are WB.
+ */
+void
+efi_memmap_init(unsigned long *s, unsigned long *e)
+{
+       struct kern_memdesc *k, *prev = 0;
+       u64     contig_low=0, contig_high=0;
+       u64     as, ae, lim;
+       void *efi_map_start, *efi_map_end, *p, *q;
+       efi_memory_desc_t *md, *pmd = NULL, *check_md;
+       u64     efi_desc_size;
+       unsigned long total_mem = 0;
+
+       k = kern_memmap = find_memmap_space();
+
+       efi_map_start = __va(ia64_boot_param->efi_memmap);
+       efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+       efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+       for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
+               md = p;
+               if (!efi_wb(md)) {
+                       if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY 
||
+                                          md->type == EFI_BOOT_SERVICES_DATA)) 
{
+                               k->attribute = EFI_MEMORY_UC;
+                               k->start = md->phys_addr;
+                               k->num_pages = md->num_pages;
+                               k++;
+                       }
+                       continue;
+               }
+#ifdef XEN
+// this works around a problem in the ski bootloader
+{
+               extern long running_on_sim;
+               if (running_on_sim && md->type != EFI_CONVENTIONAL_MEMORY)
+                       continue;
+}
+#endif
+               if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != 
md->phys_addr) {
+                       contig_low = GRANULEROUNDUP(md->phys_addr);
+                       contig_high = efi_md_end(md);
+                       for (q = p + efi_desc_size; q < efi_map_end; q += 
efi_desc_size) {
+                               check_md = q;
+                               if (!efi_wb(check_md))
+                                       break;
+                               if (contig_high != check_md->phys_addr)
+                                       break;
+                               contig_high = efi_md_end(check_md);
+                       }
+                       contig_high = GRANULEROUNDDOWN(contig_high);
+               }
+               if (!is_available_memory(md))
+                       continue;
+
+               /*
+                * Round ends inward to granule boundaries
+                * Give trimmings to uncached allocator
+                */
+               if (md->phys_addr < contig_low) {
+                       lim = min(efi_md_end(md), contig_low);
+                       if (efi_uc(md)) {
+                               if (k > kern_memmap && (k-1)->attribute == 
EFI_MEMORY_UC &&
+                                   kmd_end(k-1) == md->phys_addr) {
+                                       (k-1)->num_pages += (lim - 
md->phys_addr) >> EFI_PAGE_SHIFT;
+                               } else {
+                                       k->attribute = EFI_MEMORY_UC;
+                                       k->start = md->phys_addr;
+                                       k->num_pages = (lim - md->phys_addr) >> 
EFI_PAGE_SHIFT;
+                                       k++;
+                               }
+                       }
+                       as = contig_low;
+               } else
+                       as = md->phys_addr;
+
+               if (efi_md_end(md) > contig_high) {
+                       lim = max(md->phys_addr, contig_high);
+                       if (efi_uc(md)) {
+                               if (lim == md->phys_addr && k > kern_memmap &&
+                                   (k-1)->attribute == EFI_MEMORY_UC &&
+                                   kmd_end(k-1) == md->phys_addr) {
+                                       (k-1)->num_pages += md->num_pages;
+                               } else {
+                                       k->attribute = EFI_MEMORY_UC;
+                                       k->start = lim;
+                                       k->num_pages = (efi_md_end(md) - lim) 
>> EFI_PAGE_SHIFT;
+                                       k++;
+                               }
+                       }
+                       ae = contig_high;
+               } else
+                       ae = efi_md_end(md);
+
+               /* keep within max_addr= command line arg */
+               ae = min(ae, max_addr);
+               if (ae <= as)
+                       continue;
+
+               /* avoid going over mem= command line arg */
+               if (total_mem + (ae - as) > mem_limit)
+                       ae -= total_mem + (ae - as) - mem_limit;
+
+               if (ae <= as)
+                       continue;
+               if (prev && kmd_end(prev) == md->phys_addr) {
+                       prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT;
+                       total_mem += ae - as;
+                       continue;
+               }
+               k->attribute = EFI_MEMORY_WB;
+               k->start = as;
+               k->num_pages = (ae - as) >> EFI_PAGE_SHIFT;
+               total_mem += ae - as;
+               prev = k++;
+       }
+       k->start = ~0L; /* end-marker */
+
+       /* reserve the memory we are using for kern_memmap */
+       *s = (u64)kern_memmap;
+       *e = (u64)++k;
+}
diff -r aa2298739112 -r 0a226de3fc37 xen/arch/ia64/linux-xen/setup.c
--- a/xen/arch/ia64/linux-xen/setup.c   Fri Jun 09 10:40:31 2006 -0600
+++ b/xen/arch/ia64/linux-xen/setup.c   Tue Jun 13 08:45:22 2006 -0600
@@ -248,6 +248,9 @@ reserve_memory (void)
                n++;
        }
 #endif
+
+       efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+       n++;
 
        /* end of memory marker */
        rsvd_region[n].start = ~0UL;
diff -r aa2298739112 -r 0a226de3fc37 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Fri Jun 09 10:40:31 2006 -0600
+++ b/xen/arch/ia64/xen/xensetup.c      Tue Jun 13 08:45:22 2006 -0600
@@ -90,20 +90,6 @@ xen_count_pages(u64 start, u64 end, void
     return 0;
 }
 
-/* Find first hole after trunk for xen image */
-static int
-xen_find_first_hole(u64 start, u64 end, void *arg)
-{
-    unsigned long *first_hole = arg;
-
-    if ((*first_hole) == 0) {
-       if ((start <= KERNEL_START) && (KERNEL_START < end))
-           *first_hole = __pa(end);
-    }
-
-    return 0;
-}
-
 static void __init do_initcalls(void)
 {
     initcall_t *call;
@@ -197,15 +183,64 @@ efi_print(void)
     }
 }
 
+/*
+ * These functions are utility functions for getting and
+ * testing memory descriptors for allocating the xenheap area.
+ */
+static efi_memory_desc_t *
+efi_get_md (unsigned long phys_addr)
+{
+    void *efi_map_start, *efi_map_end, *p;
+    efi_memory_desc_t *md;
+    u64 efi_desc_size;
+
+    efi_map_start = __va(ia64_boot_param->efi_memmap);
+    efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+    efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+    for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+        md = p;
+        if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
+            return md;
+    }
+    return 0;
+}
+
+static int
+is_xenheap_usable_memory(efi_memory_desc_t *md)
+{
+    if (!(md->attribute & EFI_MEMORY_WB))
+        return 0;
+
+    switch (md->type) {
+        case EFI_LOADER_CODE:
+        case EFI_LOADER_DATA:
+        case EFI_BOOT_SERVICES_CODE:
+        case EFI_BOOT_SERVICES_DATA:
+        case EFI_CONVENTIONAL_MEMORY:
+            return 1;
+    }
+    return 0;
+}
+
+static inline int
+md_overlaps(efi_memory_desc_t *md, unsigned long phys_addr)
+{
+    return (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT));
+}
+
+#define MD_SIZE(md) (md->num_pages << EFI_PAGE_SHIFT)
+
 void start_kernel(void)
 {
     unsigned char *cmdline;
     void *heap_start;
-    unsigned long nr_pages, firsthole_start;
+    unsigned long nr_pages;
     unsigned long dom0_memory_start, dom0_memory_size;
     unsigned long dom0_initrd_start, dom0_initrd_size;
-    unsigned long initial_images_start, initial_images_end;
+    unsigned long md_end, relo_start, relo_end, relo_size = 0;
     struct domain *idle_domain;
+    efi_memory_desc_t *kern_md, *last_md, *md;
 #ifdef CONFIG_SMP
     int i;
 #endif
@@ -230,67 +265,111 @@ void start_kernel(void)
     init_console();
     set_printk_prefix("(XEN) ");
 
+    if (running_on_sim || ia64_boot_param->domain_start == 0 ||
+                          ia64_boot_param->domain_size == 0) {
+        /* This is possible only with the old elilo, which does not support
+           a vmm.  Fix now, and continue without initrd.  */
+        printk ("Your elilo is not Xen-aware.  Bootparams fixed\n");
+        ia64_boot_param->domain_start = ia64_boot_param->initrd_start;
+        ia64_boot_param->domain_size = ia64_boot_param->initrd_size;
+        ia64_boot_param->initrd_start = 0;
+        ia64_boot_param->initrd_size = 0;
+    }
+
     /* xenheap should be in same TR-covered range with xen image */
     xenheap_phys_end = xen_pstart + xenheap_size;
     printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
-           xen_pstart, xenheap_phys_end);
-
-    /* Find next hole */
-    firsthole_start = 0;
-    efi_memmap_walk(xen_find_first_hole, &firsthole_start);
-
-    if (running_on_sim || ia64_boot_param->domain_start == 0
-       || ia64_boot_param->domain_size == 0) {
-           /* This is possible only with the old elilo, which does not support
-              a vmm.  Fix now, and continue without initrd.  */
-           printk ("Your elilo is not Xen-aware.  Bootparams fixed\n");
-           ia64_boot_param->domain_start = ia64_boot_param->initrd_start;
-           ia64_boot_param->domain_size = ia64_boot_param->initrd_size;
-           ia64_boot_param->initrd_start = 0;
-           ia64_boot_param->initrd_size = 0;
-    }
-
-    initial_images_start = xenheap_phys_end;
-    initial_images_end = initial_images_start +
-       PAGE_ALIGN(ia64_boot_param->domain_size);
-
-    /* also reserve space for initrd */
-    if (ia64_boot_param->initrd_start && ia64_boot_param->initrd_size)
-       initial_images_end += PAGE_ALIGN(ia64_boot_param->initrd_size);
-    else {
-       /* sanity cleanup */
-       ia64_boot_param->initrd_size = 0;
-       ia64_boot_param->initrd_start = 0;
-    }
-
-
-    /* Later may find another memory trunk, even away from xen image... */
-    if (initial_images_end > firsthole_start) {
-       printk("Not enough memory to stash the DOM0 kernel image.\n");
-       printk("First hole:0x%lx, relocation end: 0x%lx\n",
-               firsthole_start, initial_images_end);
-       for ( ; ; );
-    }
-
-    /* This copy is time consuming, but elilo may load Dom0 image
-     * within xenheap range */
-    printk("ready to move Dom0 to 0x%lx with len %lx...", initial_images_start,
-          ia64_boot_param->domain_size);
-
-    memmove(__va(initial_images_start),
-          __va(ia64_boot_param->domain_start),
-          ia64_boot_param->domain_size);
-    ia64_boot_param->domain_start = initial_images_start;
-
-    printk("ready to move initrd to 0x%lx with len %lx...",
-          initial_images_start+PAGE_ALIGN(ia64_boot_param->domain_size),
-          ia64_boot_param->initrd_size);
-    
memmove(__va(initial_images_start+PAGE_ALIGN(ia64_boot_param->domain_size)),
-          __va(ia64_boot_param->initrd_start),
-          ia64_boot_param->initrd_size);
-    printk("Done\n");
-    ia64_boot_param->initrd_start = initial_images_start +
-       PAGE_ALIGN(ia64_boot_param->domain_size);
+           xen_pstart, xenheap_phys_end);
+
+    kern_md = md = efi_get_md(xen_pstart);
+    md_end = __pa(ia64_imva(&_end));
+    relo_start = xenheap_phys_end;
+
+    /*
+     * Scan through the memory descriptors after the kernel
+     * image to make sure we have enough room for the xenheap
+     * area, pushing out whatever may already be there.
+     */
+    while (relo_start + relo_size >= md_end) {
+        md = efi_get_md(md_end);
+
+        BUG_ON(!md);
+        BUG_ON(!is_xenheap_usable_memory(md));
+
+        md_end = md->phys_addr + MD_SIZE(md);
+        /*
+         * The dom0 kernel or initrd could overlap, reserve space
+         * at the end to relocate them later.
+         */
+        if (md->type == EFI_LOADER_DATA) {
+            /* Test for ranges we're not prepared to move */
+            BUG_ON(md_overlaps(md, __pa(ia64_boot_param)) ||
+                   md_overlaps(md, ia64_boot_param->efi_memmap) ||
+                   md_overlaps(md, ia64_boot_param->command_line));
+
+            relo_size += MD_SIZE(md);
+            /* If range overlaps the end, push out the relocation start */
+            if (md_end > relo_start)
+                relo_start = md_end;
+        }
+    }
+    last_md = md;
+    relo_end = relo_start + relo_size;
+
+    md_end = __pa(ia64_imva(&_end));
+ 
+    /*
+     * Move any relocated data out into the previously found relocation
+     * area.  Any extra memory descriptrs are moved out to the end
+     * and set to zero pages.
+     */
+    for (md = efi_get_md(md_end) ;; md = efi_get_md(md_end)) {
+        md_end = md->phys_addr + MD_SIZE(md);
+
+        if (md->type == EFI_LOADER_DATA) {
+            unsigned long relo_offset;
+
+            if (md_overlaps(md, ia64_boot_param->domain_start)) {
+                relo_offset = ia64_boot_param->domain_start - md->phys_addr;
+                printk("Moving Dom0 kernel image: 0x%lx -> 0x%lx (%ld KiB)\n",
+                       ia64_boot_param->domain_start, relo_start + relo_offset,
+                       ia64_boot_param->domain_size >> 10);
+                ia64_boot_param->domain_start = relo_start + relo_offset;
+            }
+            if (ia64_boot_param->initrd_size &&
+                md_overlaps(md, ia64_boot_param->initrd_start)) {
+                relo_offset = ia64_boot_param->initrd_start - md->phys_addr;
+                printk("Moving Dom0 initrd image: 0x%lx -> 0x%lx (%ld KiB)\n",
+                       ia64_boot_param->initrd_start, relo_start + relo_offset,
+                       ia64_boot_param->initrd_size >> 10);
+                ia64_boot_param->initrd_start = relo_start + relo_offset;
+            }
+            memcpy(__va(relo_start), __va(md->phys_addr), MD_SIZE(md));
+            relo_start += MD_SIZE(md);
+        }
+
+        if (md == kern_md)
+            continue;
+        if (md == last_md)
+            break;
+
+        md->phys_addr = relo_end;
+        md->num_pages = 0;
+    }
+
+    /* Trim the last entry */
+    md->phys_addr = relo_end;
+    md->num_pages = (md_end - relo_end) >> EFI_PAGE_SHIFT;
+
+    /*
+     * Expand the new kernel/xenheap (and maybe dom0/initrd) out to
+     * the full size.  This range will already be type EFI_LOADER_DATA,
+     * therefore the xenheap area is now protected being allocated for
+     * use by find_memmap_space() in efi.c
+     */
+    kern_md->num_pages = (relo_end - kern_md->phys_addr) >> EFI_PAGE_SHIFT;
+
+    reserve_memory();
 
     /* first find highest page frame number */
     max_page = 0;
@@ -310,8 +389,6 @@ void start_kernel(void)
     heap_start = __va(init_boot_allocator(__pa(heap_start)));
     printf("After heap_start: %p\n", heap_start);
 
-    reserve_memory();
-
     efi_memmap_walk(filter_rsvd_memory, init_boot_pages);
     efi_memmap_walk(xen_count_pages, &nr_pages);
 
@@ -417,7 +494,7 @@ printk("About to call domain_create()\n"
      * above our heap. The second module, if present, is an initrd ramdisk.
      */
     printk("About to call construct_dom0()\n");
-    dom0_memory_start = (unsigned long) __va(initial_images_start);
+    dom0_memory_start = (unsigned long) __va(ia64_boot_param->domain_start);
     dom0_memory_size = ia64_boot_param->domain_size;
     dom0_initrd_start = (unsigned long) __va(ia64_boot_param->initrd_start);
     dom0_initrd_size = ia64_boot_param->initrd_size;
diff -r aa2298739112 -r 0a226de3fc37 
xen/include/asm-ia64/linux-xen/asm/meminit.h
--- a/xen/include/asm-ia64/linux-xen/asm/meminit.h      Fri Jun 09 10:40:31 
2006 -0600
+++ b/xen/include/asm-ia64/linux-xen/asm/meminit.h      Tue Jun 13 08:45:22 
2006 -0600
@@ -22,13 +22,14 @@
  *     - dom0 code & data
  *     - initrd (optional)
 #endif
+ *     - Kernel memory map built from EFI memory map
  *
  * More could be added if necessary
  */
 #ifndef XEN
-#define IA64_MAX_RSVD_REGIONS 5
+#define IA64_MAX_RSVD_REGIONS 6
 #else
-#define IA64_MAX_RSVD_REGIONS 6
+#define IA64_MAX_RSVD_REGIONS 7
 #endif
 
 struct rsvd_region {
@@ -43,6 +44,7 @@ extern void reserve_memory (void);
 extern void reserve_memory (void);
 extern void find_initrd (void);
 extern int filter_rsvd_memory (unsigned long start, unsigned long end, void 
*arg);
+extern void efi_memmap_init(unsigned long *, unsigned long *);
 
 /*
  * For rounding an address to the next IA64_GRANULE_SIZE or order

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.