# HG changeset patch # Parent 031be098e227bc0dcceef880a1a4c8f9c81647df KEXEC: Clean up logic to choose a range for the crash kernel. This is a fairly large change but I can't see an easy way to split it up without things breaking. Before this patch, using "classic" crashdump= syntax (@) causes an exact size and location to be reserved in the e820 map before Xen and modules are relocated. However, using "new" syntax (-[]:) encounteres a myriad of problems, most notibly not considering the range when allocating its region. This patch tries to unify both codepaths, and fix the broken logic when using "newer" syntax. Changes addressed: 1) Remove use of kexec_crash_area from parse_crashkernel(). Use ranges[0] in preference, as there is no support for multiple ranges using classic syntax. This prevents some wierd logic in __setup_xen() depending on whether kexec_crash_area.size is set or not. 2) Change set_kexec_crash_area_size() to choose_kexec_range() The new method of choosing a range provides an entire rather than just setting kexec_crash_area.size. Also, fix the very broken logic which will only consider a range if its .end is greater than system_ram. Additionally, prefer range with the largest size if we have a choice of valid ones, and prefer ranges with more flexibility in their limits. 3) Move kexec_reserve_area() from setup.c to kexec.c It makes more sense here, Also, change its functionality a little to always work from the kexec range provided by choose_kexec_range() 4) Remove the kexec reservation code when considering modules This code only had any effect for people using the "newer" syntax on the command line, as people using the "classic" syntax would reserve a memory region before considering modules. Signed-off-by: Andrew Cooper diff -r 0a0c02a61676 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -482,32 +482,6 @@ static void __init parse_video_info(void } } -static void __init kexec_reserve_area(struct e820map *e820) -{ - unsigned long kdump_start = kexec_crash_area.start; - unsigned long kdump_size = kexec_crash_area.size; - static int is_reserved = 0; - - kdump_size = (kdump_size + PAGE_SIZE - 1) & PAGE_MASK; - - if ( (kdump_start == 0) || (kdump_size == 0) || is_reserved ) - return; - - is_reserved = 1; - - if ( !reserve_e820_ram(e820, kdump_start, kdump_start + kdump_size) ) - { - printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) at 0x%lx)" - "\n", kdump_size >> 20, kdump_size >> 10, kdump_start); - kexec_crash_area.start = kexec_crash_area.size = 0; - } - else - { - printk("Kdump: %luMB (%lukB) at 0x%lx\n", - kdump_size >> 20, kdump_size >> 10, kdump_start); - } -} - void init_done(void) { /* Free (or page-protect) the init areas. */ @@ -759,13 +733,15 @@ void __init __start_xen(unsigned long mb /* Create a temporary copy of the E820 map. */ memcpy(&boot_e820, &e820, sizeof(e820)); - /* Early kexec reservation (explicit static start address). */ + /* Calculate nr_pages from the e820 map. */ nr_pages = 0; for ( i = 0; i < e820.nr_map; i++ ) if ( e820.map[i].type == E820_RAM ) nr_pages += e820.map[i].size >> PAGE_SHIFT; - set_kexec_crash_area_size((u64)nr_pages << PAGE_SHIFT); - kexec_reserve_area(&boot_e820); + + /* Early kexec reservation. */ + if( choose_kexec_range((u64)nr_pages << PAGE_SHIFT) ) + kexec_reserve_area(&boot_e820); initial_images = mod; nr_initial_images = mbi->mods_count; @@ -939,19 +915,6 @@ void __init __start_xen(unsigned long mb mod[j].reserved = 1; } } - -#ifdef CONFIG_X86_32 - /* Confine the kexec area to below 4Gb. */ - e = min_t(uint64_t, e, 1ULL << 32); -#endif - /* Don't overlap with modules. */ - e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size), - mod, mbi->mods_count, -1); - if ( !kexec_crash_area.start && (s < e) ) - { - e = (e - kexec_crash_area.size) & PAGE_MASK; - kexec_crash_area.start = e; - } } if ( modules_headroom && !mod->reserved ) diff -r 0a0c02a61676 xen/common/kexec.c --- a/xen/common/kexec.c +++ b/xen/common/kexec.c @@ -51,10 +51,22 @@ static unsigned char vmcoreinfo_data[VMC static size_t vmcoreinfo_size = 0; xen_kexec_reserve_t kexec_crash_area; -static struct { - u64 start, end; - unsigned long size; -} ranges[16] __initdata; + +/* Structure to contain kexec memory regions, as provided by the + crashkernel= command line parameter. The meaining is a region of + size byte anywhere in the range start to end. Therefore, + size <= (end - start) */ +struct kexec_boot_range { + u64 start, end, size; +}; + +/* Possible regions for the crashdump kernel region, parsed from the + * "crashkernel=foo" command line option. */ +static struct kexec_boot_range ranges[16] __initdata; + +/* Our chosen range to place the crashdump kernel, taking into account + * system memory. */ +static struct kexec_boot_range * kexec_range __initdata = NULL; /* * Parse command lines in the format @@ -126,32 +138,126 @@ static void __init parse_crashkernel(con ranges[idx].size = 0; } else - kexec_crash_area.size = parse_size_and_unit(cur = str, &str); + ranges[0].size = parse_size_and_unit(cur = str, &str); if ( cur != str && *str == '@' ) - kexec_crash_area.start = parse_size_and_unit(cur = str + 1, &str); + { + ranges[0].start = parse_size_and_unit(cur = str + 1, &str); + ranges[0].end = ranges[0].start + ranges[0].size; + } if ( cur == str ) + { printk(XENLOG_WARNING "crashkernel: memory value expected\n"); + ranges[0].size = 0; + } } custom_param("crashkernel", parse_crashkernel); -void __init set_kexec_crash_area_size(u64 system_ram) +/* Using system ram as a guide, choice one of the possible ranges to + use. Return boolean indicating whether a range has been chosen or + not. */ +bool_t __init choose_kexec_range(u64 system_ram) { unsigned int idx; + struct kexec_boot_range * best_choice = NULL; - for ( idx = 0; idx < ARRAY_SIZE(ranges) && !kexec_crash_area.size; ++idx ) + for ( idx = 0; idx < ARRAY_SIZE(ranges); ++idx ) { + /* size == 0 indicates invalid range. */ if ( !ranges[idx].size ) break; - if ( ranges[idx].size >= system_ram ) + /* PAGE_ALIGN each of the values. */ + ranges[idx].start = PAGE_ALIGN(ranges[idx].start); + ranges[idx].size = PAGE_ALIGN(ranges[idx].size); + if ( ranges[idx].end != -1 ) + ranges[idx].end = PAGE_ALIGN(ranges[idx].end); + + /* Check that the size can actually fit in the region provided. */ + if ( (ranges[idx].end - ranges[idx].start) < ranges[idx].size ) + continue; + + /* Check that the ranges size will fit inside its ends. */ + if ( (ranges[idx].end - ranges[idx].start) < ranges[idx].size ) { - printk(XENLOG_WARNING "crashkernel: invalid size\n"); + printk(XENLOG_WARNING "crashkernel: range %d size bigger than " + "defined range\n", idx); continue; } - if ( ranges[idx].start <= system_ram && ranges[idx].end > system_ram ) - kexec_crash_area.size = ranges[idx].size; + /* Check that the range will fit in memory. */ + if ( ranges[idx].start + ranges[idx].size <= system_ram ) + { + /* Prefer regions with a larger size if we have a choice, and + prefer regions with more flexibility in its range. */ + if ( !best_choice || (ranges[idx].size > best_choice->size) || + ((ranges[idx].end - ranges[idx].start) > + (best_choice->end - best_choice->start)) ) + best_choice = &ranges[idx]; + } + /* else complain. */ + else + printk(XENLOG_WARNING "crashkernel: range %d size bigger than " + "available ram\n", idx); } + + kexec_range = best_choice; + return best_choice != NULL; +} + +/* Reserve an area in the e820 map for the kexec region. */ +void __init kexec_reserve_area(struct e820map *e820) +{ + static bool_t is_reserved = FALSE; + unsigned long kexec_start, kexec_size; + int i; + + /* Do we have a chosen range, or have we already reserved an + area? */ + if ( ! kexec_range || is_reserved ) + return; + + for ( i = 0; i < e820->nr_map; ++i ) + { + /* We dont want to allocate in any non RAM regions. */ + if ( e820->map[i].type != E820_RAM ) + continue; + + /* Check that the e820 region is big enough to fit the kexec + region. */ + if ( (kexec_range->size >= e820->map[i].size) || + (kexec_range->start + kexec_range->size) > + (e820->map[i].addr + e820->map[i].size) ) + continue; + + kexec_start = max_t(u64, kexec_range->start, e820->map[i].addr); + + /* If we have to start the kexec region higher in memory because + of the start of the e820 region, check that we still respect + the specified end of kexec region. */ + if ( kexec_range->size > (kexec_range->end - kexec_start) ) + continue; + + is_reserved = TRUE; + kexec_size = kexec_range->size; + + /* Try to reserve this region in the e820. On failure, we just might + be able to successfully reserve a region in a later RAM block. */ + if ( reserve_e820_ram(e820, kexec_start, kexec_start + kexec_size) ) + { + printk("Kdump: %luMB (%lukB) at 0x%lx\n", + kexec_size >> 20, kexec_size >> 10, kexec_start); + kexec_crash_area.start = kexec_start; + kexec_crash_area.size = kexec_size; + return; + } + } + + /* If we fail to find a valid region, or fail ro reserve one, complain + to the console. */ + printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) in range 0x%lx " + "-> 0x%lx\n", kexec_range->size >> 20, kexec_range->size >> 10, + kexec_range->start, kexec_range->end ); + kexec_crash_area.start = kexec_crash_area.size = 0; } static void one_cpu_only(void) diff -r 0a0c02a61676 xen/include/xen/kexec.h --- a/xen/include/xen/kexec.h +++ b/xen/include/xen/kexec.h @@ -4,6 +4,7 @@ #include #include #include +#include typedef struct xen_kexec_reserve { unsigned long size; @@ -14,7 +15,8 @@ extern xen_kexec_reserve_t kexec_crash_a extern bool_t kexecing; -void set_kexec_crash_area_size(u64 system_ram); +bool_t choose_kexec_range(u64 system_ram); +void kexec_reserve_area(struct e820map *e820); /* We have space for 4 images to support atomic update * of images. This is important for CRASH images since