diff -Npru kexec-kernel-only/arch/x86_64/kernel/machine_kexec.c kexec-kernel-only_20121119/arch/x86_64/kernel/machine_kexec.c --- kexec-kernel-only/arch/x86_64/kernel/machine_kexec.c 2012-09-17 11:56:42.000000000 +0200 +++ kexec-kernel-only_20121119/arch/x86_64/kernel/machine_kexec.c 2012-11-07 13:09:47.000000000 +0100 @@ -833,7 +833,6 @@ NORET_TYPE void xen_pv_machine_kexec(str } xen_pgd_unpin(__pa_symbol(init_level4_user_pgt)); - xen_pgd_unpin(__pa(xen_start_info->pt_base)); xen_pgd_unpin(__pa(init_mm.pgd)); /* Move NULL segment selector to %ds and %es register. */ diff -Npru kexec-kernel-only/arch/x86_64/kernel/setup-xen.c kexec-kernel-only_20121119/arch/x86_64/kernel/setup-xen.c --- kexec-kernel-only/arch/x86_64/kernel/setup-xen.c 2012-09-17 11:56:42.000000000 +0200 +++ kexec-kernel-only_20121119/arch/x86_64/kernel/setup-xen.c 2012-11-17 22:33:25.000000000 +0100 @@ -588,8 +588,13 @@ static __init void parse_cmdline_early ( size = memparse(from+12, &from); if (*from == '@') { base = memparse(from+1, &from); - crashk_res.start = base; - crashk_res.end = base + size - 1; + if (base > __pa_symbol(&_end)) { + crashk_res.start = base; + crashk_res.end = base + size - 1; + } else + printk("Crashkernel region overlaps " + "with current kernel. Ignoring " + "crashkernel command line argument.\n"); } } } @@ -813,9 +818,12 @@ void __init setup_arch(char **cmdline_p) contig_initmem_init(0, end_pfn); #endif - /* Reserve direct mapping */ - reserve_bootmem_generic(table_start << PAGE_SHIFT, - (table_end - table_start) << PAGE_SHIFT, + /* + * Reserve magic pages (start info, xenstore and console) + * direct mapping and initial page tables. + */ + reserve_bootmem_generic((table_start - mp_new_count()) << PAGE_SHIFT, + (table_end - table_start + mp_new_count()) << PAGE_SHIFT, BOOTMEM_DEFAULT); /* reserve kernel */ @@ -824,9 +832,22 @@ void __init setup_arch(char **cmdline_p) BOOTMEM_DEFAULT); #ifdef CONFIG_XEN +#ifdef CONFIG_KEXEC /* reserve physmap, start info and initial page tables */ - reserve_bootmem(__pa_symbol(&_end), (table_start<pt_base), (unsigned long)crashk_res.start) - + __pa_symbol(&_end), BOOTMEM_DEFAULT); + if (__pa(xen_start_info->pt_base) > crashk_res.end + 1) + reserve_bootmem(crashk_res.end + 1, + __pa(xen_start_info->pt_base) - + crashk_res.end + 1, BOOTMEM_DEFAULT); + } else +#endif + { + reserve_bootmem(__pa_symbol(&_end), ((table_start - mp_new_count()) << PAGE_SHIFT) - + __pa_symbol(&_end), BOOTMEM_DEFAULT); + } #else /* * reserve physical page 0 - it's a special BIOS page on many boxes, @@ -949,10 +970,25 @@ void __init setup_arch(char **cmdline_p) (unsigned long *)xen_start_info->mfn_list, min(xen_start_info->nr_pages, p2m_max_pfn) * sizeof(unsigned long)); - free_bootmem( - __pa(xen_start_info->mfn_list), - PFN_PHYS(PFN_UP(xen_start_info->nr_pages * - sizeof(unsigned long)))); + +#ifdef CONFIG_KEXEC + if (!is_initial_xendomain() && crashk_res.start != crashk_res.end) { + if (__pa(xen_start_info->mfn_list) < crashk_res.start) + free_bootmem(__pa(xen_start_info->mfn_list), + min(__pa(xen_start_info->pt_base), + (unsigned long)crashk_res.start) - + __pa(xen_start_info->mfn_list)); + if (__pa(xen_start_info->pt_base) > crashk_res.end + 1) + free_bootmem(crashk_res.end + 1, + __pa(xen_start_info->pt_base) - + crashk_res.end + 1); + } else +#endif + { + free_bootmem(__pa(xen_start_info->mfn_list), + __pa(xen_start_info->pt_base) - + __pa(xen_start_info->mfn_list)); + } /* * Initialise the list of the frames that specify the diff -Npru kexec-kernel-only/arch/x86_64/mm/init-xen.c kexec-kernel-only_20121119/arch/x86_64/mm/init-xen.c --- kexec-kernel-only/arch/x86_64/mm/init-xen.c 2012-09-17 11:56:27.000000000 +0200 +++ kexec-kernel-only_20121119/arch/x86_64/mm/init-xen.c 2012-11-16 13:36:48.000000000 +0100 @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include @@ -55,10 +57,15 @@ struct dma_mapping_ops* dma_ops; EXPORT_SYMBOL(dma_ops); static unsigned long dma_reserve __initdata; +static unsigned long extended_nr_pt_frames __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); extern unsigned long start_pfn; +#define mp_new_start_info_pfn() (table_start - 1) +#define mp_new_console_pfn() (table_start - 2) +#define mp_new_xenstore_pfn() (table_start - 3) + /* * Use this until direct mapping is established, i.e. before __va() is * available in init_memory_mapping(). @@ -405,7 +412,8 @@ static inline int make_readonly(unsigned /* Make old page tables read-only. */ if (!xen_feature(XENFEAT_writable_page_tables) && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map)) - && (paddr < (start_pfn << PAGE_SHIFT))) + && (paddr < (xen_start_info->pt_base - __START_KERNEL_map + + (extended_nr_pt_frames << PAGE_SHIFT)))) readonly = 1; /* @@ -585,9 +593,7 @@ void __init extend_init_mapping(unsigned } /* Ensure init mappings cover kernel text/data and initial tables. */ - while (va < (__START_KERNEL_map - + (start_pfn << PAGE_SHIFT) - + tables_space)) { + while (va < (__START_KERNEL_map + PFN_PHYS(table_end))) { pmd = (pmd_t *)&page[pmd_index(va)]; if (pmd_none(*pmd)) { pte_page = alloc_static_page(&phys); @@ -606,17 +612,91 @@ void __init extend_init_mapping(unsigned xen_l1_entry_update(pte, new_pte); } va += PAGE_SIZE; + if (table_start < start_pfn + mp_new_count()) { + table_start = start_pfn + mp_new_count(); + table_end = table_start + tables_space; + } + } + + extended_nr_pt_frames = start_pfn - PFN_DOWN(__pa(xen_start_info->pt_base)); + + start_pfn = table_start; +} + +#ifdef CONFIG_KEXEC +static void __init rebuild_init_mapping(void) +{ + pgd_t *pgd = init_level4_pgt; + pmd_t *pmd; + pud_t *pud; + pte_t *pte, pte_w; + unsigned long i, phys, va = round_down((unsigned long)&_text, PMD_SIZE); + void *pte_dst, *pte_src; + + if (is_initial_xendomain() || crashk_res.start == crashk_res.end) + return; + + pud = __va(pgd_val(pgd[pgd_index(va)]) & PHYSICAL_PAGE_MASK); + pmd = __va(pud_val(pud[pud_index(va)]) & PHYSICAL_PAGE_MASK); + + /* Ensure init mappings cover kernel text/data. */ + while (va < (unsigned long)&_end) { + if (pmd_none(pmd[pmd_index(va)])) + continue; + + pte_src = __va(pmd_val(pmd[pmd_index(va)]) & PHYSICAL_PAGE_MASK); + pte_dst = alloc_static_page(&phys); + memcpy(pte_dst, pte_src, PAGE_SIZE); + + early_make_page_readonly(pte_dst, XENFEAT_writable_page_tables); + set_pmd(&pmd[pmd_index(va)], __pmd(phys | _KERNPG_TABLE)); + + va += PMD_SIZE; } + va = PAGE_ALIGN((unsigned long)&_end); + /* Finally, blow away any spurious initial mappings. */ - while (1) { - pmd = (pmd_t *)&page[pmd_index(va)]; - if (pmd_none(*pmd)) - break; + while (va < round_up((unsigned long)&_end, PMD_SIZE)) { HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0); va += PAGE_SIZE; } + + while (va < round_up((unsigned long)&_end, PUD_SIZE)) { + if (pmd_none(pmd[pmd_index(va)])) + break; + + pmd_clear(&pmd[pmd_index(va)]); + + va += PMD_SIZE; + } + + va = xen_start_info->pt_base; + + /* Unpin initial page table. */ + xen_pgd_unpin(__pa(va)); + + pud = __va(pgd_val(pgd[pgd_index(va)]) & PHYSICAL_PAGE_MASK); + pmd = __va(pud_val(pud[pud_index(va)]) & PHYSICAL_PAGE_MASK); + + /* Mark initial page table pages as writable. */ + for (i = 0; i < extended_nr_pt_frames; ++i) { + pte = __va(pmd_val(pmd[pmd_index(va)]) & PHYSICAL_PAGE_MASK); + pte = &pte[pte_index(va)]; + + pte_w.pte = pte->pte | _PAGE_RW; + + if (HYPERVISOR_update_va_mapping(va, pte_w, 0)) + BUG(); + + va += PAGE_SIZE; + } } +#else +static void __init rebuild_init_mapping(void) +{ +} +#endif static void __init find_early_table_space(unsigned long end) { @@ -626,18 +706,80 @@ static void __init find_early_table_spac pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT; +#ifdef CONFIG_KEXEC + if (!is_initial_xendomain() && crashk_res.start != crashk_res.end) + ptes += (PAGE_ALIGN((unsigned long)_end) - + round_down((unsigned long)&_text, PMD_SIZE)) >> PAGE_SHIFT; +#endif + tables = round_up(puds * 8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE) + round_up(ptes * 8, PAGE_SIZE); - extend_init_mapping(tables); + tables >>= PAGE_SHIFT; table_start = start_pfn; - table_end = table_start + (tables>>PAGE_SHIFT); + +#ifdef CONFIG_KEXEC + if (!is_initial_xendomain()) + table_start = max(table_start, PFN_UP((unsigned long)crashk_res.end)); +#endif + + /* Reserve area for new magic pages (start info, xenstore and console). */ + table_start += mp_new_count(); + + table_end = table_start + tables; + + extend_init_mapping(tables); early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, - (table_end << PAGE_SHIFT) + tables); + end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); +} + +static void __init *map_magic_page(unsigned long pfn_new, unsigned long pfn_old) +{ + struct mmu_update m2p_updates[2] = {}; + unsigned long mfn_new, mfn_old, va; + + mfn_new = pfn_to_mfn(pfn_new); + mfn_old = pfn_to_mfn(pfn_old); + + m2p_updates[0].ptr = PFN_PHYS(mfn_old); + m2p_updates[0].ptr |= MMU_MACHPHYS_UPDATE; + m2p_updates[0].val = pfn_new; + + m2p_updates[1].ptr = PFN_PHYS(mfn_new); + m2p_updates[1].ptr |= MMU_MACHPHYS_UPDATE; + m2p_updates[1].val = pfn_old; + + if (HYPERVISOR_mmu_update(m2p_updates, 2, NULL, DOMID_SELF)) + BUG(); + + phys_to_machine_mapping[pfn_new] = mfn_old; + phys_to_machine_mapping[pfn_old] = mfn_new; + + va = __START_KERNEL_map + PFN_PHYS(pfn_new); + + if (HYPERVISOR_update_va_mapping(va, + pfn_pte(pfn_new, PAGE_KERNEL_EXEC), + UVMF_INVLPG | UVMF_LOCAL)) + BUG(); + + return (void *)va; +} + +static void __init relocate_magic_pages(void) +{ + xen_start_info = map_magic_page(mp_new_start_info_pfn(), + PFN_DOWN(__pa(xen_start_info))); + + if (is_initial_xendomain()) + return; + + map_magic_page(mp_new_xenstore_pfn(), + mfn_to_pfn(xen_start_info->store_mfn)); + map_magic_page(mp_new_console_pfn(), + mfn_to_pfn(xen_start_info->console.domU.mfn)); } /* Setup the direct mapping of the physical memory at PAGE_OFFSET. @@ -657,6 +799,8 @@ void __init init_memory_mapping(unsigned */ find_early_table_space(end); + relocate_magic_pages(); + start = (unsigned long)__va(start); end = (unsigned long)__va(end); @@ -675,8 +819,6 @@ void __init init_memory_mapping(unsigned set_pgd(pgd, mk_kernel_pgd(pud_phys)); } - BUG_ON(start_pfn != table_end); - /* Re-vector virtual addresses pointing into the initial mapping to the just-established permanent ones. */ xen_start_info = __va(__pa(xen_start_info)); @@ -691,14 +833,25 @@ void __init init_memory_mapping(unsigned xen_start_info->mod_start = (unsigned long) __va(__pa(xen_start_info->mod_start)); - /* Destroy the Xen-created mappings beyond the kernel image as - * well as the temporary mappings created above. Prevents - * overlap with modules area (if init mapping is very big). - */ - start = PAGE_ALIGN((unsigned long)_end); - end = __START_KERNEL_map + (table_end << PAGE_SHIFT); - for (; start < end; start += PAGE_SIZE) - WARN_ON(HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0)); + rebuild_init_mapping(); + + BUG_ON(start_pfn != table_end); + +#ifdef CONFIG_KEXEC + if (is_initial_xendomain() || crashk_res.start == crashk_res.end) +#endif + { + /* + * Destroy the Xen-created mappings beyond the kernel image as + * well as the temporary mappings created above. Prevents + * overlap with modules area (if init mapping is very big). + */ + + start = PAGE_ALIGN((unsigned long)_end); + end = __START_KERNEL_map + (table_end << PAGE_SHIFT); + for (; start < end; start += PAGE_SIZE) + WARN_ON(HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0)); + } __flush_tlb_all(); } diff -Npru kexec-kernel-only/include/asm-x86_64/mach-xen/asm/page.h kexec-kernel-only_20121119/include/asm-x86_64/mach-xen/asm/page.h --- kexec-kernel-only/include/asm-x86_64/mach-xen/asm/page.h 2012-09-17 11:56:42.000000000 +0200 +++ kexec-kernel-only_20121119/include/asm-x86_64/mach-xen/asm/page.h 2012-11-07 15:36:42.000000000 +0100 @@ -169,7 +169,7 @@ static inline pgd_t __pgd(unsigned long /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) -#define KERNEL_TEXT_SIZE (_AC(40,UL)*1024*1024) +#define KERNEL_TEXT_SIZE (_AC(100,UL)*1024*1024) #define KERNEL_TEXT_START _AC(0xffffffff80000000,UL) #define PAGE_OFFSET __PAGE_OFFSET diff -Npru kexec-kernel-only/include/asm-x86_64/page.h kexec-kernel-only_20121119/include/asm-x86_64/page.h --- kexec-kernel-only/include/asm-x86_64/page.h 2012-09-17 11:56:40.000000000 +0200 +++ kexec-kernel-only_20121119/include/asm-x86_64/page.h 2012-11-07 15:36:58.000000000 +0100 @@ -91,7 +91,7 @@ extern unsigned long phys_base; #define __VIRTUAL_MASK_SHIFT 48 #define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) -#define KERNEL_TEXT_SIZE (_AC(40,UL)*1024*1024) +#define KERNEL_TEXT_SIZE (_AC(100,UL)*1024*1024) #define KERNEL_TEXT_START _AC(0xffffffff80000000,UL) #ifndef __ASSEMBLY__ diff -Npru kexec-kernel-only/include/asm-x86_64/proto.h kexec-kernel-only_20121119/include/asm-x86_64/proto.h --- kexec-kernel-only/include/asm-x86_64/proto.h 2012-09-17 11:56:40.000000000 +0200 +++ kexec-kernel-only_20121119/include/asm-x86_64/proto.h 2012-11-07 14:25:09.000000000 +0100 @@ -153,6 +153,16 @@ extern int force_mwait; long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); +#ifdef CONFIG_XEN +static inline int mp_new_count(void) +{ + if (is_initial_xendomain()) + return 1; + else + return 3; +} +#endif + #define round_up(x,y) (((x) + (y) - 1) & ~((y)-1)) #define round_down(x,y) ((x) & ~((y)-1))