[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 03/04] Kexec / Kdump: x86_32 specific code
[PATCH 03/04] Kexec / Kdump: x86_32 specific code This patch contains the x86_32 implementation of Kexec / Kdump for Xen. Signed-Off-By: Magnus Damm <magnus@xxxxxxxxxxxxx> Signed-Off-By: Simon Horman <horms@xxxxxxxxxxxx> --- Applies on top of xen-unstable-12621. buildconfigs/linux-defconfig_xen_x86_32 | 2 linux-2.6-xen-sparse/arch/i386/Kconfig | 2 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 19 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h | 8 patches/linux-2.6.16.33/git-35...c9.patch | 401 +++++++ patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec...code-i386.patch | 169 ++++ patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-i386.patch | 108 ++ patches/linux-2.6.16.33/series | 3 xen/arch/x86/x86_32/entry.S | 2 xen/include/asm-x86/x86_32/elf.h | 52 + xen/include/asm-x86/x86_32/kexec.h | 28 11 files changed, 783 insertions(+), 11 deletions(-) --- 0002/buildconfigs/linux-defconfig_xen_x86_32 +++ work/buildconfigs/linux-defconfig_xen_x86_32 @@ -183,6 +183,7 @@ CONFIG_MTRR=y CONFIG_REGPARM=y CONFIG_SECCOMP=y CONFIG_HZ_100=y +CONFIG_KEXEC=y # CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=100 @@ -1036,6 +1037,7 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +# CONFIG_PROC_VMCORE is not set CONFIG_SYSFS=y CONFIG_TMPFS=y # CONFIG_HUGETLB_PAGE is not set --- 0001/linux-2.6-xen-sparse/arch/i386/Kconfig +++ work/linux-2.6-xen-sparse/arch/i386/Kconfig @@ -726,7 +726,7 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call (EXPERIMENTAL)" - depends on EXPERIMENTAL && !X86_XEN + depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot --- 0001/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c +++ work/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c @@ -69,6 +69,10 @@ #include "setup_arch_pre.h" #include <bios_ebda.h> +#ifdef CONFIG_XEN +#include <xen/interface/kexec.h> +#endif + /* Forward Declaration. */ void __init find_max_pfn(void); @@ -943,6 +947,7 @@ static void __init parse_cmdline_early ( * after a kernel panic. */ else if (!memcmp(from, "crashkernel=", 12)) { +#ifndef CONFIG_XEN unsigned long size, base; size = memparse(from+12, &from); if (*from == '@') { @@ -953,6 +958,10 @@ static void __init parse_cmdline_early ( crashk_res.start = base; crashk_res.end = base + size - 1; } +#else + printk("Ignoring crashkernel command line, " + "parameter will be supplied by xen\n"); +#endif } #endif #ifdef CONFIG_PROC_VMCORE @@ -1322,10 +1331,14 @@ void __init setup_bootmem_allocator(void } #endif #ifdef CONFIG_KEXEC +#ifdef CONFIG_XEN + xen_machine_kexec_setup_resources(); +#else if (crashk_res.start != crashk_res.end) reserve_bootmem(crashk_res.start, crashk_res.end - crashk_res.start + 1); #endif +#endif if (!xen_feature(XENFEAT_auto_translated_physmap)) phys_to_machine_mapping = @@ -1389,7 +1402,11 @@ legacy_init_iomem_resources(struct e820e request_resource(res, data_resource); #endif #ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); + if (crashk_res.start != crashk_res.end) + request_resource(res, &crashk_res); +#ifdef CONFIG_XEN + xen_machine_kexec_register_resources(res); +#endif #endif } } --- 0001/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h +++ work/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h @@ -395,5 +395,13 @@ HYPERVISOR_xenoprof_op( return _hypercall2(int, xenoprof_op, op, arg); } +static inline int +HYPERVISOR_kexec_op( + unsigned long op, void *args) +{ + return _hypercall2(int, kexec_op, op, args); +} + + #endif /* __HYPERCALL_H__ */ --- /dev/null +++ work/patches/linux-2.6.16.33/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch @@ -0,0 +1,401 @@ +From: Magnus Damm <magnus@xxxxxxxxxxxxx> +Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200) +Subject: [PATCH] i386: Avoid overwriting the current pgd (V4, i386) +X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=3566561bfadffcb5dbc85d576be80c0dbf2cccc9 + +[PATCH] i386: Avoid overwriting the current pgd (V4, i386) + +kexec: Avoid overwriting the current pgd (V4, i386) + +This patch upgrades the i386-specific kexec code to avoid overwriting the +current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used +to start a secondary kernel that dumps the memory of the previous kernel. + +The code introduces a new set of page tables. These tables are used to provide +an executable identity mapping without overwriting the current pgd. + +Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx> +Signed-off-by: Andi Kleen <ak@xxxxxxx> +--- + +--- a/arch/i386/kernel/machine_kexec.c ++++ b/arch/i386/kernel/machine_kexec.c +@@ -21,70 +21,13 @@ + #include <asm/system.h> + + #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) +- +-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +-#define L2_ATTR (_PAGE_PRESENT) +- +-#define LEVEL0_SIZE (1UL << 12UL) +- +-#ifndef CONFIG_X86_PAE +-#define LEVEL1_SIZE (1UL << 22UL) +-static u32 pgtable_level1[1024] PAGE_ALIGNED; +- +-static void identity_map_page(unsigned long address) +-{ +- unsigned long level1_index, level2_index; +- u32 *pgtable_level2; +- +- /* Find the current page table */ +- pgtable_level2 = __va(read_cr3()); +- +- /* Find the indexes of the physical address to identity map */ +- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; +- level2_index = address / LEVEL1_SIZE; +- +- /* Identity map the page table entry */ +- pgtable_level1[level1_index] = address | L0_ATTR; +- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; +- +- /* Flush the tlb so the new mapping takes effect. +- * Global tlb entries are not flushed but that is not an issue. +- */ +- load_cr3(pgtable_level2); +-} +- +-#else +-#define LEVEL1_SIZE (1UL << 21UL) +-#define LEVEL2_SIZE (1UL << 30UL) +-static u64 pgtable_level1[512] PAGE_ALIGNED; +-static u64 pgtable_level2[512] PAGE_ALIGNED; +- +-static void identity_map_page(unsigned long address) +-{ +- unsigned long level1_index, level2_index, level3_index; +- u64 *pgtable_level3; +- +- /* Find the current page table */ +- pgtable_level3 = __va(read_cr3()); +- +- /* Find the indexes of the physical address to identity map */ +- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; +- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; +- level3_index = address / LEVEL2_SIZE; +- +- /* Identity map the page table entry */ +- pgtable_level1[level1_index] = address | L0_ATTR; +- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; +- set_64bit(&pgtable_level3[level3_index], +- __pa(pgtable_level2) | L2_ATTR); +- +- /* Flush the tlb so the new mapping takes effect. +- * Global tlb entries are not flushed but that is not an issue. +- */ +- load_cr3(pgtable_level3); +-} ++static u32 kexec_pgd[1024] PAGE_ALIGNED; ++#ifdef CONFIG_X86_PAE ++static u32 kexec_pmd0[1024] PAGE_ALIGNED; ++static u32 kexec_pmd1[1024] PAGE_ALIGNED; + #endif ++static u32 kexec_pte0[1024] PAGE_ALIGNED; ++static u32 kexec_pte1[1024] PAGE_ALIGNED; + + static void set_idt(void *newidt, __u16 limit) + { +@@ -128,16 +71,6 @@ static void load_segments(void) + #undef __STR + } + +-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( +- unsigned long indirection_page, +- unsigned long reboot_code_buffer, +- unsigned long start_address, +- unsigned int has_pae) ATTRIB_NORET; +- +-extern const unsigned char relocate_new_kernel[]; +-extern void relocate_new_kernel_end(void); +-extern const unsigned int relocate_new_kernel_size; +- + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +@@ -170,25 +103,29 @@ void machine_kexec_cleanup(struct kimage + */ + NORET_TYPE void machine_kexec(struct kimage *image) + { +- unsigned long page_list; +- unsigned long reboot_code_buffer; +- +- relocate_new_kernel_t rnk; ++ unsigned long page_list[PAGES_NR]; ++ void *control_page; + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + +- /* Compute some offsets */ +- reboot_code_buffer = page_to_pfn(image->control_code_page) +- << PAGE_SHIFT; +- page_list = image->head; +- +- /* Set up an identity mapping for the reboot_code_buffer */ +- identity_map_page(reboot_code_buffer); +- +- /* copy it out */ +- memcpy((void *)reboot_code_buffer, relocate_new_kernel, +- relocate_new_kernel_size); ++ control_page = page_address(image->control_code_page); ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); ++ ++ page_list[PA_CONTROL_PAGE] = __pa(control_page); ++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; ++ page_list[PA_PGD] = __pa(kexec_pgd); ++ page_list[VA_PGD] = (unsigned long)kexec_pgd; ++#ifdef CONFIG_X86_PAE ++ page_list[PA_PMD_0] = __pa(kexec_pmd0); ++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; ++ page_list[PA_PMD_1] = __pa(kexec_pmd1); ++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; ++#endif ++ page_list[PA_PTE_0] = __pa(kexec_pte0); ++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0; ++ page_list[PA_PTE_1] = __pa(kexec_pte1); ++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + + /* The segment registers are funny things, they have both a + * visible and an invisible part. Whenever the visible part is +@@ -207,8 +144,8 @@ NORET_TYPE void machine_kexec(struct kim + set_idt(phys_to_virt(0),0); + + /* now call it */ +- rnk = (relocate_new_kernel_t) reboot_code_buffer; +- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); ++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list, ++ image->start, cpu_has_pae); + } + + /* crashkernel=size@addr specifies the location to reserve for +--- a/arch/i386/kernel/relocate_kernel.S ++++ b/arch/i386/kernel/relocate_kernel.S +@@ -7,16 +7,138 @@ + */ + + #include <linux/linkage.h> ++#include <asm/page.h> ++#include <asm/kexec.h> ++ ++/* ++ * Must be relocatable PIC code callable as a C function ++ */ ++ ++#define PTR(x) (x << 2) ++#define PAGE_ALIGNED (1 << PAGE_SHIFT) ++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ ++#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ ++ ++ .text ++ .align PAGE_ALIGNED ++ .globl relocate_kernel ++relocate_kernel: ++ movl 8(%esp), %ebp /* list of pages */ ++ ++#ifdef CONFIG_X86_PAE ++ /* map the control page at its virtual address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xc0000000, %eax ++ shrl $27, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PMD_0)(%ebp), %edx ++ orl $PAE_PGD_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PMD_0)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x3fe00000, %eax ++ shrl $18, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_0)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_0)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x001ff000, %eax ++ shrl $9, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ /* identity map the control page at its physical address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xc0000000, %eax ++ shrl $27, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PMD_1)(%ebp), %edx ++ orl $PAE_PGD_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PMD_1)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x3fe00000, %eax ++ shrl $18, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_1)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_1)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x001ff000, %eax ++ shrl $9, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++#else ++ /* map the control page at its virtual address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xffc00000, %eax ++ shrl $20, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_0)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_0)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x003ff000, %eax ++ shrl $10, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ /* identity map the control page at its physical address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xffc00000, %eax ++ shrl $20, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_1)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_1)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x003ff000, %eax ++ shrl $10, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++#endif + +- /* +- * Must be relocatable PIC code callable as a C function, that once +- * it starts can not use the previous processes stack. +- */ +- .globl relocate_new_kernel + relocate_new_kernel: + /* read the arguments and say goodbye to the stack */ + movl 4(%esp), %ebx /* page_list */ +- movl 8(%esp), %ebp /* reboot_code_buffer */ ++ movl 8(%esp), %ebp /* list of pages */ + movl 12(%esp), %edx /* start address */ + movl 16(%esp), %ecx /* cpu_has_pae */ + +@@ -24,11 +146,26 @@ relocate_new_kernel: + pushl $0 + popfl + +- /* set a new stack at the bottom of our page... */ +- lea 4096(%ebp), %esp ++ /* get physical address of control page now */ ++ /* this is impossible after page table switch */ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi ++ ++ /* switch to new set of page tables */ ++ movl PTR(PA_PGD)(%ebp), %eax ++ movl %eax, %cr3 ++ ++ /* setup a new stack at the end of the physical control page */ ++ lea 4096(%edi), %esp + +- /* store the parameters back on the stack */ +- pushl %edx /* store the start address */ ++ /* jump to identity mapped page */ ++ movl %edi, %eax ++ addl $(identity_mapped - relocate_kernel), %eax ++ pushl %eax ++ ret ++ ++identity_mapped: ++ /* store the start address on the stack */ ++ pushl %edx + + /* Set cr0 to a known state: + * 31 0 == Paging disabled +@@ -113,8 +250,3 @@ relocate_new_kernel: + xorl %edi, %edi + xorl %ebp, %ebp + ret +-relocate_new_kernel_end: +- +- .globl relocate_new_kernel_size +-relocate_new_kernel_size: +- .long relocate_new_kernel_end - relocate_new_kernel +--- a/include/asm-i386/kexec.h ++++ b/include/asm-i386/kexec.h +@@ -1,6 +1,26 @@ + #ifndef _I386_KEXEC_H + #define _I386_KEXEC_H + ++#define PA_CONTROL_PAGE 0 ++#define VA_CONTROL_PAGE 1 ++#define PA_PGD 2 ++#define VA_PGD 3 ++#define PA_PTE_0 4 ++#define VA_PTE_0 5 ++#define PA_PTE_1 6 ++#define VA_PTE_1 7 ++#ifdef CONFIG_X86_PAE ++#define PA_PMD_0 8 ++#define VA_PMD_0 9 ++#define PA_PMD_1 10 ++#define VA_PMD_1 11 ++#define PAGES_NR 12 ++#else ++#define PAGES_NR 8 ++#endif ++ ++#ifndef __ASSEMBLY__ ++ + #include <asm/fixmap.h> + #include <asm/ptrace.h> + #include <asm/string.h> +@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru + newregs->eip = (unsigned long)current_text_addr(); + } + } ++asmlinkage NORET_TYPE void ++relocate_kernel(unsigned long indirection_page, ++ unsigned long control_page, ++ unsigned long start_address, ++ unsigned int has_pae) ATTRIB_NORET; ++ ++#endif /* __ASSEMBLY__ */ + + #endif /* _I386_KEXEC_H */ --- /dev/null +++ work/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch @@ -0,0 +1,169 @@ +kexec: Move asm segment handling code to the assembly file (i386) + +This patch moves the idt, gdt, and segment handling code from machine_kexec.c +to relocate_kernel.S. The main reason behind this move is to avoid code +duplication in the Xen hypervisor. With this patch all code required to kexec +is put on the control page. + +On top of that this patch also counts as a cleanup - I think it is much +nicer to write assembly directly in assembly files than wrap inline assembly +in C functions for no apparent reason. + +Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx> +--- + + Applies to 2.6.19-rc1. + + machine_kexec.c | 59 ----------------------------------------------------- + relocate_kernel.S | 58 +++++++++++++++++++++++++++++++++++++++++++++++----- + 2 files changed, 53 insertions(+), 64 deletions(-) + +--- 0002/arch/i386/kernel/machine_kexec.c ++++ work/arch/i386/kernel/machine_kexec.c 2006-10-05 15:49:08.000000000 +0900 +@@ -29,48 +29,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED + static u32 kexec_pte0[1024] PAGE_ALIGNED; + static u32 kexec_pte1[1024] PAGE_ALIGNED; + +-static void set_idt(void *newidt, __u16 limit) +-{ +- struct Xgt_desc_struct curidt; +- +- /* ia32 supports unaliged loads & stores */ +- curidt.size = limit; +- curidt.address = (unsigned long)newidt; +- +- load_idt(&curidt); +-}; +- +- +-static void set_gdt(void *newgdt, __u16 limit) +-{ +- struct Xgt_desc_struct curgdt; +- +- /* ia32 supports unaligned loads & stores */ +- curgdt.size = limit; +- curgdt.address = (unsigned long)newgdt; +- +- load_gdt(&curgdt); +-}; +- +-static void load_segments(void) +-{ +-#define __STR(X) #X +-#define STR(X) __STR(X) +- +- __asm__ __volatile__ ( +- "\tljmp $"STR(__KERNEL_CS)",$1f\n" +- "\t1:\n" +- "\tmovl $"STR(__KERNEL_DS)",%%eax\n" +- "\tmovl %%eax,%%ds\n" +- "\tmovl %%eax,%%es\n" +- "\tmovl %%eax,%%fs\n" +- "\tmovl %%eax,%%gs\n" +- "\tmovl %%eax,%%ss\n" +- ::: "eax", "memory"); +-#undef STR +-#undef __STR +-} +- + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +@@ -127,23 +85,6 @@ NORET_TYPE void machine_kexec(struct kim + page_list[PA_PTE_1] = __pa(kexec_pte1); + page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + +- /* The segment registers are funny things, they have both a +- * visible and an invisible part. Whenever the visible part is +- * set to a specific selector, the invisible part is loaded +- * with from a table in memory. At no other time is the +- * descriptor table in memory accessed. +- * +- * I take advantage of this here by force loading the +- * segments, before I zap the gdt with an invalid value. +- */ +- load_segments(); +- /* The gdt & idt are now invalid. +- * If you want to load them you must set up your own idt & gdt. +- */ +- set_gdt(phys_to_virt(0),0); +- set_idt(phys_to_virt(0),0); +- +- /* now call it */ + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start, cpu_has_pae); + } +--- 0002/arch/i386/kernel/relocate_kernel.S ++++ work/arch/i386/kernel/relocate_kernel.S 2006-10-05 16:03:21.000000000 +0900 +@@ -154,14 +154,45 @@ relocate_new_kernel: + movl PTR(PA_PGD)(%ebp), %eax + movl %eax, %cr3 + ++ /* setup idt */ ++ movl %edi, %eax ++ addl $(idt_48 - relocate_kernel), %eax ++ lidtl (%eax) ++ ++ /* setup gdt */ ++ movl %edi, %eax ++ addl $(gdt - relocate_kernel), %eax ++ movl %edi, %esi ++ addl $((gdt_48 - relocate_kernel) + 2), %esi ++ movl %eax, (%esi) ++ ++ movl %edi, %eax ++ addl $(gdt_48 - relocate_kernel), %eax ++ lgdtl (%eax) ++ ++ /* setup data segment registers */ ++ mov $(gdt_ds - gdt), %eax ++ mov %eax, %ds ++ mov %eax, %es ++ mov %eax, %fs ++ mov %eax, %gs ++ mov %eax, %ss ++ + /* setup a new stack at the end of the physical control page */ + lea 4096(%edi), %esp + +- /* jump to identity mapped page */ +- movl %edi, %eax +- addl $(identity_mapped - relocate_kernel), %eax +- pushl %eax +- ret ++ /* load new code segment and jump to identity mapped page */ ++ movl %edi, %esi ++ xorl %eax, %eax ++ pushl %eax ++ pushl %esi ++ pushl %eax ++ movl $(gdt_cs - gdt), %eax ++ pushl %eax ++ movl %edi, %eax ++ addl $(identity_mapped - relocate_kernel),%eax ++ pushl %eax ++ iretl + + identity_mapped: + /* store the start address on the stack */ +@@ -250,3 +281,20 @@ identity_mapped: + xorl %edi, %edi + xorl %ebp, %ebp + ret ++ ++ .align 16 ++gdt: ++ .quad 0x0000000000000000 /* NULL descriptor */ ++gdt_cs: ++ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ ++gdt_ds: ++ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ ++gdt_end: ++ ++gdt_48: ++ .word gdt_end - gdt - 1 /* limit */ ++ .long 0 /* base - filled in by code above */ ++ ++idt_48: ++ .word 0 /* limit */ ++ .long 0 /* base */ --- /dev/null +++ work/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-i386.patch @@ -0,0 +1,108 @@ +--- 0001/arch/i386/kernel/crash.c ++++ work/arch/i386/kernel/crash.c +@@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re + crash_save_this_cpu(regs, cpu); + } + ++#ifndef CONFIG_XEN + #ifdef CONFIG_SMP + static atomic_t waiting_for_crash_ipi; + +@@ -158,6 +159,7 @@ static void nmi_shootdown_cpus(void) + /* There are no cpus to shootdown */ + } + #endif ++#endif /* CONFIG_XEN */ + + void machine_crash_shutdown(struct pt_regs *regs) + { +@@ -174,10 +176,12 @@ void machine_crash_shutdown(struct pt_re + + /* Make a note of crashing cpu. Will be used in NMI callback.*/ + crashing_cpu = smp_processor_id(); ++#ifndef CONFIG_XEN + nmi_shootdown_cpus(); + lapic_shutdown(); + #if defined(CONFIG_X86_IO_APIC) + disable_IO_APIC(); + #endif ++#endif /* CONFIG_XEN */ + crash_save_self(regs); + } +--- 0007/arch/i386/kernel/machine_kexec.c ++++ work/arch/i386/kernel/machine_kexec.c +@@ -19,6 +19,10 @@ + #include <asm/desc.h> + #include <asm/system.h> + ++#ifdef CONFIG_XEN ++#include <xen/interface/kexec.h> ++#endif ++ + #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) + static u32 kexec_pgd[1024] PAGE_ALIGNED; + #ifdef CONFIG_X86_PAE +@@ -28,6 +32,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED + static u32 kexec_pte0[1024] PAGE_ALIGNED; + static u32 kexec_pte1[1024] PAGE_ALIGNED; + ++#ifdef CONFIG_XEN ++ ++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT) ++ ++#if PAGES_NR > KEXEC_XEN_NO_PAGES ++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break ++#endif ++ ++#if PA_CONTROL_PAGE != 0 ++#error PA_CONTROL_PAGE is non zero - Xen support will break ++#endif ++ ++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image) ++{ ++ void *control_page; ++ ++ memset(xki->page_list, 0, sizeof(xki->page_list)); ++ ++ control_page = page_address(image->control_code_page); ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); ++ ++ xki->page_list[PA_CONTROL_PAGE] = __ma(control_page); ++ xki->page_list[PA_PGD] = __ma(kexec_pgd); ++#ifdef CONFIG_X86_PAE ++ xki->page_list[PA_PMD_0] = __ma(kexec_pmd0); ++ xki->page_list[PA_PMD_1] = __ma(kexec_pmd1); ++#endif ++ xki->page_list[PA_PTE_0] = __ma(kexec_pte0); ++ xki->page_list[PA_PTE_1] = __ma(kexec_pte1); ++ ++} ++ ++#endif /* CONFIG_XEN */ ++ + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +--- 0006/include/asm-i386/kexec.h ++++ work/include/asm-i386/kexec.h +@@ -98,6 +98,20 @@ relocate_kernel(unsigned long indirectio + unsigned long start_address, + unsigned int has_pae) ATTRIB_NORET; + ++ ++/* Under Xen we need to work with machine addresses. These macros give the ++ * machine address of a certain page to the generic kexec code instead of ++ * the pseudo physical address which would be given by the default macros. ++ */ ++ ++#ifdef CONFIG_XEN ++#define KEXEC_ARCH_HAS_PAGE_MACROS ++#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) ++#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) ++#define kexec_virt_to_phys(addr) virt_to_machine(addr) ++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) ++#endif ++ + #endif /* __ASSEMBLY__ */ + + #endif /* _I386_KEXEC_H */ --- 0004/patches/linux-2.6.16.33/series +++ work/patches/linux-2.6.16.33/series @@ -1,6 +1,9 @@ kexec-generic.patch git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch +git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch +linux-2.6.19-rc1-kexec-move_segment_code-i386.patch +linux-2.6.19-rc1-kexec-xen-i386.patch blktap-aio-16_03_06.patch device_bind.patch fix-hz-suspend.patch --- 0001/xen/arch/x86/x86_32/entry.S +++ work/xen/arch/x86/x86_32/entry.S @@ -659,6 +659,7 @@ ENTRY(hypercall_table) .long do_hvm_op .long do_sysctl /* 35 */ .long do_domctl + .long do_kexec_op .rept NR_hypercalls-((.-hypercall_table)/4) .long do_ni_hypercall .endr @@ -701,6 +702,7 @@ ENTRY(hypercall_args_table) .byte 2 /* do_hvm_op */ .byte 1 /* do_sysctl */ /* 35 */ .byte 1 /* do_domctl */ + .byte 2 /* do_kexec_op */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr --- 0004/xen/include/asm-x86/x86_32/elf.h +++ work/xen/include/asm-x86/x86_32/elf.h @@ -1,16 +1,62 @@ #ifndef __X86_32_ELF_H__ #define __X86_32_ELF_H__ -#include <xen/lib.h> /* for printk() used in stub */ +#include <asm/processor.h> typedef struct { - unsigned long dummy; + unsigned long ebx; + unsigned long ecx; + unsigned long edx; + unsigned long esi; + unsigned long edi; + unsigned long ebp; + unsigned long eax; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; + unsigned long orig_eax; + unsigned long eip; + unsigned long cs; + unsigned long eflags; + unsigned long esp; + unsigned long ss; } ELF_Gregset; extern inline void elf_core_save_regs(ELF_Gregset *core_regs, crash_xen_core_t *xen_core_regs) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + unsigned long tmp; + + asm volatile("movl %%ebx,%0" : "=m"(core_regs->ebx)); + asm volatile("movl %%ecx,%0" : "=m"(core_regs->ecx)); + asm volatile("movl %%edx,%0" : "=m"(core_regs->edx)); + asm volatile("movl %%esi,%0" : "=m"(core_regs->esi)); + asm volatile("movl %%edi,%0" : "=m"(core_regs->edi)); + asm volatile("movl %%ebp,%0" : "=m"(core_regs->ebp)); + asm volatile("movl %%eax,%0" : "=m"(core_regs->eax)); + asm volatile("movw %%ds, %%ax;" :"=a"(core_regs->ds)); + asm volatile("movw %%es, %%ax;" :"=a"(core_regs->es)); + asm volatile("movw %%fs, %%ax;" :"=a"(core_regs->fs)); + asm volatile("movw %%gs, %%ax;" :"=a"(core_regs->gs)); + /* orig_eax not filled in for now */ + core_regs->eip = (unsigned long)current_text_addr(); + asm volatile("movw %%cs, %%ax;" :"=a"(core_regs->cs)); + asm volatile("pushfl; popl %0" :"=m"(core_regs->eflags)); + asm volatile("movl %%esp,%0" : "=m"(core_regs->esp)); + asm volatile("movw %%ss, %%ax;" :"=a"(core_regs->ss)); + + asm volatile("mov %%cr0, %0" : "=r" (tmp) : ); + xen_core_regs->cr0 = tmp; + + asm volatile("mov %%cr2, %0" : "=r" (tmp) : ); + xen_core_regs->cr2 = tmp; + + asm volatile("mov %%cr3, %0" : "=r" (tmp) : ); + xen_core_regs->cr3 = tmp; + + asm volatile("mov %%cr4, %0" : "=r" (tmp) : ); + xen_core_regs->cr4 = tmp; } #endif /* __X86_32_ELF_H__ */ --- 0004/xen/include/asm-x86/x86_32/kexec.h +++ work/xen/include/asm-x86/x86_32/kexec.h @@ -1,17 +1,33 @@ -#ifndef __X86_32_KEXEC_H__ -#define __X86_32_KEXEC_H__ +/****************************************************************************** + * kexec.h + * + * Based heavily on machine_kexec.c and kexec.h from Linux 2.6.19-rc1 + * + */ + +#ifndef __X86_KEXEC_X86_32_H__ +#define __X86_KEXEC_X86_32_H__ -#include <xen/lib.h> /* for printk() used in stub */ #include <xen/types.h> -#include <public/xen.h> #include <xen/kexec.h> +#include <asm/fixmap.h> + +typedef asmlinkage void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long page_list, + unsigned long start_address, + unsigned int has_pae); static inline void machine_kexec(xen_kexec_image_t *image) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + relocate_new_kernel_t rnk; + + rnk = (relocate_new_kernel_t) image->page_list[1]; + (*rnk)(image->indirection_page, (unsigned long)image->page_list, + image->start_address, (unsigned long)cpu_has_pae); } -#endif /* __X86_32_KEXEC_H__ */ +#endif /* __X86_KEXEC_X86_32_H__ */ /* * Local variables: _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |