[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 03/04] Kexec / Kdump: x86_32 specific code



[PATCH 03/04] Kexec / Kdump: x86_32 specific code

This patch contains the x86_32 implementation of Kexec / Kdump for Xen.

Signed-Off-By: Magnus Damm <magnus@xxxxxxxxxxxxx>
---

 Applies on top of xen-unstable-12281.

 buildconfigs/linux-defconfig_xen_x86_32                          |   2
 linux-2.6-xen-sparse/arch/i386/Kconfig                           |   2
 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c                |  19
 linux-2.6-xen-sparse/include/asm-i386/kexec-xen.h                |  30
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h   |   8
 patches/linux-2.6.16.29/git-35...c9.patch                        | 401 ++++++
 patches/linux-2.6.16.29/linux-2.6.16.29-crash-xen-i386.patch     |  31
 patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec...code-i386.patch | 169 +++
 patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec-xen-i386.patch    |  54 +
 patches/linux-2.6.16.29/series                                   |   4
 xen/arch/x86/x86_32/entry.S                                      |   2
 xen/include/asm-x86/x86_32/elf.h                                 |  38
 xen/include/asm-x86/x86_32/kexec.h                               |  28
 13 files changed, 777 insertions(+), 11 deletions(-)

--- 0002/buildconfigs/linux-defconfig_xen_x86_32
+++ work/buildconfigs/linux-defconfig_xen_x86_32
@@ -183,6 +183,7 @@ CONFIG_MTRR=y
 CONFIG_REGPARM=y
 CONFIG_SECCOMP=y
 CONFIG_HZ_100=y
+CONFIG_KEXEC=y
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=100
@@ -1036,6 +1037,7 @@ CONFIG_DNOTIFY=y
 #
 CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
+# CONFIG_PROC_VMCORE is not set
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 # CONFIG_HUGETLB_PAGE is not set
--- 0001/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ work/linux-2.6-xen-sparse/arch/i386/Kconfig
@@ -726,7 +726,7 @@ source kernel/Kconfig.hz
 
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && !X86_XEN
+       depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
--- 0001/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
+++ work/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
@@ -69,6 +69,10 @@
 #include "setup_arch_pre.h"
 #include <bios_ebda.h>
 
+#ifdef CONFIG_XEN
+#include <xen/interface/kexec.h>
+#endif
+
 /* Forward Declaration. */
 void __init find_max_pfn(void);
 
@@ -943,6 +947,7 @@ static void __init parse_cmdline_early (
                 * after a kernel panic.
                 */
                else if (!memcmp(from, "crashkernel=", 12)) {
+#ifndef CONFIG_XEN
                        unsigned long size, base;
                        size = memparse(from+12, &from);
                        if (*from == '@') {
@@ -953,6 +958,10 @@ static void __init parse_cmdline_early (
                                crashk_res.start = base;
                                crashk_res.end   = base + size - 1;
                        }
+#else
+                       printk("Ignoring crashkernel command line, "
+                              "parameter will be supplied by xen\n");
+#endif
                }
 #endif
 #ifdef CONFIG_PROC_VMCORE
@@ -1322,10 +1331,14 @@ void __init setup_bootmem_allocator(void
        }
 #endif
 #ifdef CONFIG_KEXEC
+#ifdef CONFIG_XEN
+       xen_machine_kexec_setup_resources();
+#else
        if (crashk_res.start != crashk_res.end)
                reserve_bootmem(crashk_res.start,
                        crashk_res.end - crashk_res.start + 1);
 #endif
+#endif
 
        if (!xen_feature(XENFEAT_auto_translated_physmap))
                phys_to_machine_mapping =
@@ -1389,7 +1402,11 @@ legacy_init_iomem_resources(struct e820e
                        request_resource(res, data_resource);
 #endif
 #ifdef CONFIG_KEXEC
-                       request_resource(res, &crashk_res);
+                       if (crashk_res.start != crashk_res.end)
+                            request_resource(res, &crashk_res);
+#ifdef CONFIG_XEN
+                       xen_machine_kexec_register_resources(res);
+#endif
 #endif
                }
        }
--- /dev/null
+++ work/linux-2.6-xen-sparse/include/asm-i386/kexec-xen.h
@@ -0,0 +1,30 @@
+#ifndef _I386_KEXEC_XEN_H
+#define _I386_KEXEC_XEN_H
+
+/* Kexec needs to know about the actual physical addresss.
+ * But in xen, on some architectures, a physical address is a
+ * pseudo-physical addresss. */
+
+#ifdef CONFIG_XEN
+#define kexec_page_to_pfn(page)  pfn_to_mfn(page_to_pfn(page))
+#define kexec_pfn_to_page(pfn)   pfn_to_page(mfn_to_pfn(pfn))
+#define kexec_virt_to_phys(addr) virt_to_machine(addr)
+#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr))
+#else
+#define kexec_page_to_pfn(page)  page_to_pfn(page)
+#define kexec_pfn_to_page(pfn)   pfn_to_page(pfn)
+#define kexec_virt_to_phys(addr) virt_to_phys(addr)
+#define kexec_phys_to_virt(addr) phys_to_virt(addr)
+#endif
+
+#endif /* _I386_KEXEC_XEN_H */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
--- 0001/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
+++ work/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
@@ -395,5 +395,13 @@ HYPERVISOR_xenoprof_op(
        return _hypercall2(int, xenoprof_op, op, arg);
 }
 
+static inline int
+HYPERVISOR_kexec_op(
+       unsigned long op, void *args)
+{
+       return _hypercall2(int, kexec_op, op, args);
+}
+
+
 
 #endif /* __HYPERCALL_H__ */
--- /dev/null
+++ 
work/patches/linux-2.6.16.29/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
@@ -0,0 +1,401 @@
+From: Magnus Damm <magnus@xxxxxxxxxxxxx>
+Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200)
+Subject: [PATCH] i386: Avoid overwriting the current pgd (V4, i386)
+X-Git-Url: 
http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=3566561bfadffcb5dbc85d576be80c0dbf2cccc9
+
+[PATCH] i386: Avoid overwriting the current pgd (V4, i386)
+
+kexec: Avoid overwriting the current pgd (V4, i386)
+
+This patch upgrades the i386-specific kexec code to avoid overwriting the
+current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
+to start a secondary kernel that dumps the memory of the previous kernel.
+
+The code introduces a new set of page tables. These tables are used to provide
+an executable identity mapping without overwriting the current pgd.
+
+Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx>
+Signed-off-by: Andi Kleen <ak@xxxxxxx>
+---
+
+--- a/arch/i386/kernel/machine_kexec.c
++++ b/arch/i386/kernel/machine_kexec.c
+@@ -21,70 +21,13 @@
+ #include <asm/system.h>
+ 
+ #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+-
+-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L2_ATTR (_PAGE_PRESENT)
+-
+-#define LEVEL0_SIZE (1UL << 12UL)
+-
+-#ifndef CONFIG_X86_PAE
+-#define LEVEL1_SIZE (1UL << 22UL)
+-static u32 pgtable_level1[1024] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+-      unsigned long level1_index, level2_index;
+-      u32 *pgtable_level2;
+-
+-      /* Find the current page table */
+-      pgtable_level2 = __va(read_cr3());
+-
+-      /* Find the indexes of the physical address to identity map */
+-      level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+-      level2_index = address / LEVEL1_SIZE;
+-
+-      /* Identity map the page table entry */
+-      pgtable_level1[level1_index] = address | L0_ATTR;
+-      pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+-
+-      /* Flush the tlb so the new mapping takes effect.
+-       * Global tlb entries are not flushed but that is not an issue.
+-       */
+-      load_cr3(pgtable_level2);
+-}
+-
+-#else
+-#define LEVEL1_SIZE (1UL << 21UL)
+-#define LEVEL2_SIZE (1UL << 30UL)
+-static u64 pgtable_level1[512] PAGE_ALIGNED;
+-static u64 pgtable_level2[512] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+-      unsigned long level1_index, level2_index, level3_index;
+-      u64 *pgtable_level3;
+-
+-      /* Find the current page table */
+-      pgtable_level3 = __va(read_cr3());
+-
+-      /* Find the indexes of the physical address to identity map */
+-      level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+-      level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
+-      level3_index = address / LEVEL2_SIZE;
+-
+-      /* Identity map the page table entry */
+-      pgtable_level1[level1_index] = address | L0_ATTR;
+-      pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+-      set_64bit(&pgtable_level3[level3_index],
+-                                             __pa(pgtable_level2) | L2_ATTR);
+-
+-      /* Flush the tlb so the new mapping takes effect.
+-       * Global tlb entries are not flushed but that is not an issue.
+-       */
+-      load_cr3(pgtable_level3);
+-}
++static u32 kexec_pgd[1024] PAGE_ALIGNED;
++#ifdef CONFIG_X86_PAE
++static u32 kexec_pmd0[1024] PAGE_ALIGNED;
++static u32 kexec_pmd1[1024] PAGE_ALIGNED;
+ #endif
++static u32 kexec_pte0[1024] PAGE_ALIGNED;
++static u32 kexec_pte1[1024] PAGE_ALIGNED;
+ 
+ static void set_idt(void *newidt, __u16 limit)
+ {
+@@ -128,16 +71,6 @@ static void load_segments(void)
+ #undef __STR
+ }
+ 
+-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
+-                                      unsigned long indirection_page,
+-                                      unsigned long reboot_code_buffer,
+-                                      unsigned long start_address,
+-                                      unsigned int has_pae) ATTRIB_NORET;
+-
+-extern const unsigned char relocate_new_kernel[];
+-extern void relocate_new_kernel_end(void);
+-extern const unsigned int relocate_new_kernel_size;
+-
+ /*
+  * A architecture hook called to validate the
+  * proposed image and prepare the control pages
+@@ -170,25 +103,29 @@ void machine_kexec_cleanup(struct kimage
+  */
+ NORET_TYPE void machine_kexec(struct kimage *image)
+ {
+-      unsigned long page_list;
+-      unsigned long reboot_code_buffer;
+-
+-      relocate_new_kernel_t rnk;
++      unsigned long page_list[PAGES_NR];
++      void *control_page;
+ 
+       /* Interrupts aren't acceptable while we reboot */
+       local_irq_disable();
+ 
+-      /* Compute some offsets */
+-      reboot_code_buffer = page_to_pfn(image->control_code_page)
+-                                                              << PAGE_SHIFT;
+-      page_list = image->head;
+-
+-      /* Set up an identity mapping for the reboot_code_buffer */
+-      identity_map_page(reboot_code_buffer);
+-
+-      /* copy it out */
+-      memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+-                                              relocate_new_kernel_size);
++      control_page = page_address(image->control_code_page);
++      memcpy(control_page, relocate_kernel, PAGE_SIZE);
++
++      page_list[PA_CONTROL_PAGE] = __pa(control_page);
++      page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
++      page_list[PA_PGD] = __pa(kexec_pgd);
++      page_list[VA_PGD] = (unsigned long)kexec_pgd;
++#ifdef CONFIG_X86_PAE
++      page_list[PA_PMD_0] = __pa(kexec_pmd0);
++      page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
++      page_list[PA_PMD_1] = __pa(kexec_pmd1);
++      page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
++#endif
++      page_list[PA_PTE_0] = __pa(kexec_pte0);
++      page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
++      page_list[PA_PTE_1] = __pa(kexec_pte1);
++      page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+ 
+       /* The segment registers are funny things, they have both a
+        * visible and an invisible part.  Whenever the visible part is
+@@ -207,8 +144,8 @@ NORET_TYPE void machine_kexec(struct kim
+       set_idt(phys_to_virt(0),0);
+ 
+       /* now call it */
+-      rnk = (relocate_new_kernel_t) reboot_code_buffer;
+-      (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
++      relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
++                      image->start, cpu_has_pae);
+ }
+ 
+ /* crashkernel=size@addr specifies the location to reserve for
+--- a/arch/i386/kernel/relocate_kernel.S
++++ b/arch/i386/kernel/relocate_kernel.S
+@@ -7,16 +7,138 @@
+  */
+ 
+ #include <linux/linkage.h>
++#include <asm/page.h>
++#include <asm/kexec.h>
++
++/*
++ * Must be relocatable PIC code callable as a C function
++ */
++
++#define PTR(x) (x << 2)
++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
++#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
++
++      .text
++      .align PAGE_ALIGNED
++      .globl relocate_kernel
++relocate_kernel:
++      movl    8(%esp), %ebp /* list of pages */
++
++#ifdef CONFIG_X86_PAE
++      /* map the control page at its virtual address */
++
++      movl    PTR(VA_PGD)(%ebp), %edi
++      movl    PTR(VA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0xc0000000, %eax
++      shrl    $27, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_PMD_0)(%ebp), %edx
++      orl     $PAE_PGD_ATTR, %edx
++      movl    %edx, (%eax)
++
++      movl    PTR(VA_PMD_0)(%ebp), %edi
++      movl    PTR(VA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0x3fe00000, %eax
++      shrl    $18, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_PTE_0)(%ebp), %edx
++      orl     $PAGE_ATTR, %edx
++      movl    %edx, (%eax)
++
++      movl    PTR(VA_PTE_0)(%ebp), %edi
++      movl    PTR(VA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0x001ff000, %eax
++      shrl    $9, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %edx
++      orl     $PAGE_ATTR, %edx
++      movl    %edx, (%eax)
++
++      /* identity map the control page at its physical address */
++
++      movl    PTR(VA_PGD)(%ebp), %edi
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0xc0000000, %eax
++      shrl    $27, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_PMD_1)(%ebp), %edx
++      orl     $PAE_PGD_ATTR, %edx
++      movl    %edx, (%eax)
++
++      movl    PTR(VA_PMD_1)(%ebp), %edi
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0x3fe00000, %eax
++      shrl    $18, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_PTE_1)(%ebp), %edx
++      orl     $PAGE_ATTR, %edx
++      movl    %edx, (%eax)
++
++      movl    PTR(VA_PTE_1)(%ebp), %edi
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0x001ff000, %eax
++      shrl    $9, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %edx
++      orl     $PAGE_ATTR, %edx
++      movl    %edx, (%eax)
++#else
++      /* map the control page at its virtual address */
++
++      movl    PTR(VA_PGD)(%ebp), %edi
++      movl    PTR(VA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0xffc00000, %eax
++      shrl    $20, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_PTE_0)(%ebp), %edx
++      orl     $PAGE_ATTR, %edx
++      movl    %edx, (%eax)
++
++      movl    PTR(VA_PTE_0)(%ebp), %edi
++      movl    PTR(VA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0x003ff000, %eax
++      shrl    $10, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %edx
++      orl     $PAGE_ATTR, %edx
++      movl    %edx, (%eax)
++
++      /* identity map the control page at its physical address */
++
++      movl    PTR(VA_PGD)(%ebp), %edi
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0xffc00000, %eax
++      shrl    $20, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_PTE_1)(%ebp), %edx
++      orl     $PAGE_ATTR, %edx
++      movl    %edx, (%eax)
++
++      movl    PTR(VA_PTE_1)(%ebp), %edi
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %eax
++      andl    $0x003ff000, %eax
++      shrl    $10, %eax
++      addl    %edi, %eax
++
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %edx
++      orl     $PAGE_ATTR, %edx
++      movl    %edx, (%eax)
++#endif
+ 
+-      /*
+-       * Must be relocatable PIC code callable as a C function, that once
+-       * it starts can not use the previous processes stack.
+-       */
+-      .globl relocate_new_kernel
+ relocate_new_kernel:
+       /* read the arguments and say goodbye to the stack */
+       movl  4(%esp), %ebx /* page_list */
+-      movl  8(%esp), %ebp /* reboot_code_buffer */
++      movl  8(%esp), %ebp /* list of pages */
+       movl  12(%esp), %edx /* start address */
+       movl  16(%esp), %ecx /* cpu_has_pae */
+ 
+@@ -24,11 +146,26 @@ relocate_new_kernel:
+       pushl $0
+       popfl
+ 
+-      /* set a new stack at the bottom of our page... */
+-      lea   4096(%ebp), %esp
++      /* get physical address of control page now */
++      /* this is impossible after page table switch */
++      movl    PTR(PA_CONTROL_PAGE)(%ebp), %edi
++
++      /* switch to new set of page tables */
++      movl    PTR(PA_PGD)(%ebp), %eax
++      movl    %eax, %cr3
++
++      /* setup a new stack at the end of the physical control page */
++      lea     4096(%edi), %esp
+ 
+-      /* store the parameters back on the stack */
+-      pushl   %edx /* store the start address */
++      /* jump to identity mapped page */
++      movl    %edi, %eax
++      addl    $(identity_mapped - relocate_kernel), %eax
++      pushl   %eax
++      ret
++
++identity_mapped:
++      /* store the start address on the stack */
++      pushl   %edx
+ 
+       /* Set cr0 to a known state:
+        * 31 0 == Paging disabled
+@@ -113,8 +250,3 @@ relocate_new_kernel:
+       xorl    %edi, %edi
+       xorl    %ebp, %ebp
+       ret
+-relocate_new_kernel_end:
+-
+-      .globl relocate_new_kernel_size
+-relocate_new_kernel_size:
+-      .long relocate_new_kernel_end - relocate_new_kernel
+--- a/include/asm-i386/kexec.h
++++ b/include/asm-i386/kexec.h
+@@ -1,6 +1,26 @@
+ #ifndef _I386_KEXEC_H
+ #define _I386_KEXEC_H
+ 
++#define PA_CONTROL_PAGE  0
++#define VA_CONTROL_PAGE  1
++#define PA_PGD           2
++#define VA_PGD           3
++#define PA_PTE_0         4
++#define VA_PTE_0         5
++#define PA_PTE_1         6
++#define VA_PTE_1         7
++#ifdef CONFIG_X86_PAE
++#define PA_PMD_0         8
++#define VA_PMD_0         9
++#define PA_PMD_1         10
++#define VA_PMD_1         11
++#define PAGES_NR         12
++#else
++#define PAGES_NR         8
++#endif
++
++#ifndef __ASSEMBLY__
++
+ #include <asm/fixmap.h>
+ #include <asm/ptrace.h>
+ #include <asm/string.h>
+@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru
+                newregs->eip = (unsigned long)current_text_addr();
+        }
+ }
++asmlinkage NORET_TYPE void
++relocate_kernel(unsigned long indirection_page,
++              unsigned long control_page,
++              unsigned long start_address,
++              unsigned int has_pae) ATTRIB_NORET;
++
++#endif /* __ASSEMBLY__ */
+ 
+ #endif /* _I386_KEXEC_H */
--- /dev/null
+++ work/patches/linux-2.6.16.29/linux-2.6.16.29-crash-xen-i386.patch
@@ -0,0 +1,31 @@
+--- 0001/arch/i386/kernel/crash.c
++++ work/arch/i386/kernel/crash.c      2006-10-26 15:45:35.000000000 +0900
+@@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re
+       crash_save_this_cpu(regs, cpu);
+ }
+ 
++#ifndef CONFIG_XEN
+ #ifdef CONFIG_SMP
+ static atomic_t waiting_for_crash_ipi;
+ 
+@@ -158,6 +159,7 @@ static void nmi_shootdown_cpus(void)
+       /* There are no cpus to shootdown */
+ }
+ #endif
++#endif /* CONFIG_XEN */
+ 
+ void machine_crash_shutdown(struct pt_regs *regs)
+ {
+@@ -174,10 +176,12 @@ void machine_crash_shutdown(struct pt_re
+ 
+       /* Make a note of crashing cpu. Will be used in NMI callback.*/
+       crashing_cpu = smp_processor_id();
++#ifndef CONFIG_XEN
+       nmi_shootdown_cpus();
+       lapic_shutdown();
+ #if defined(CONFIG_X86_IO_APIC)
+       disable_IO_APIC();
+ #endif
++#endif /* CONFIG_XEN */
+       crash_save_self(regs);
+ }
--- /dev/null
+++ 
work/patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
@@ -0,0 +1,169 @@
+kexec: Move asm segment handling code to the assembly file (i386)
+
+This patch moves the idt, gdt, and segment handling code from machine_kexec.c
+to relocate_kernel.S. The main reason behind this move is to avoid code 
+duplication in the Xen hypervisor. With this patch all code required to kexec
+is put on the control page.
+
+On top of that this patch also counts as a cleanup - I think it is much
+nicer to write assembly directly in assembly files than wrap inline assembly
+in C functions for no apparent reason.
+
+Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx>
+---
+
+ Applies to 2.6.19-rc1.
+
+ machine_kexec.c   |   59 -----------------------------------------------------
+ relocate_kernel.S |   58 +++++++++++++++++++++++++++++++++++++++++++++++-----
+ 2 files changed, 53 insertions(+), 64 deletions(-)
+
+--- 0002/arch/i386/kernel/machine_kexec.c
++++ work/arch/i386/kernel/machine_kexec.c      2006-10-05 15:49:08.000000000 
+0900
+@@ -29,48 +29,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
+ static u32 kexec_pte0[1024] PAGE_ALIGNED;
+ static u32 kexec_pte1[1024] PAGE_ALIGNED;
+ 
+-static void set_idt(void *newidt, __u16 limit)
+-{
+-      struct Xgt_desc_struct curidt;
+-
+-      /* ia32 supports unaliged loads & stores */
+-      curidt.size    = limit;
+-      curidt.address = (unsigned long)newidt;
+-
+-      load_idt(&curidt);
+-};
+-
+-
+-static void set_gdt(void *newgdt, __u16 limit)
+-{
+-      struct Xgt_desc_struct curgdt;
+-
+-      /* ia32 supports unaligned loads & stores */
+-      curgdt.size    = limit;
+-      curgdt.address = (unsigned long)newgdt;
+-
+-      load_gdt(&curgdt);
+-};
+-
+-static void load_segments(void)
+-{
+-#define __STR(X) #X
+-#define STR(X) __STR(X)
+-
+-      __asm__ __volatile__ (
+-              "\tljmp $"STR(__KERNEL_CS)",$1f\n"
+-              "\t1:\n"
+-              "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
+-              "\tmovl %%eax,%%ds\n"
+-              "\tmovl %%eax,%%es\n"
+-              "\tmovl %%eax,%%fs\n"
+-              "\tmovl %%eax,%%gs\n"
+-              "\tmovl %%eax,%%ss\n"
+-              ::: "eax", "memory");
+-#undef STR
+-#undef __STR
+-}
+-
+ /*
+  * A architecture hook called to validate the
+  * proposed image and prepare the control pages
+@@ -127,23 +85,6 @@ NORET_TYPE void machine_kexec(struct kim
+       page_list[PA_PTE_1] = __pa(kexec_pte1);
+       page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+ 
+-      /* The segment registers are funny things, they have both a
+-       * visible and an invisible part.  Whenever the visible part is
+-       * set to a specific selector, the invisible part is loaded
+-       * with from a table in memory.  At no other time is the
+-       * descriptor table in memory accessed.
+-       *
+-       * I take advantage of this here by force loading the
+-       * segments, before I zap the gdt with an invalid value.
+-       */
+-      load_segments();
+-      /* The gdt & idt are now invalid.
+-       * If you want to load them you must set up your own idt & gdt.
+-       */
+-      set_gdt(phys_to_virt(0),0);
+-      set_idt(phys_to_virt(0),0);
+-
+-      /* now call it */
+       relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+                       image->start, cpu_has_pae);
+ }
+--- 0002/arch/i386/kernel/relocate_kernel.S
++++ work/arch/i386/kernel/relocate_kernel.S    2006-10-05 16:03:21.000000000 
+0900
+@@ -154,14 +154,45 @@ relocate_new_kernel:
+       movl    PTR(PA_PGD)(%ebp), %eax
+       movl    %eax, %cr3
+ 
++      /* setup idt */
++      movl    %edi, %eax
++      addl    $(idt_48 - relocate_kernel), %eax
++      lidtl   (%eax)
++
++      /* setup gdt */
++      movl    %edi, %eax
++      addl    $(gdt - relocate_kernel), %eax
++      movl    %edi, %esi
++      addl    $((gdt_48 - relocate_kernel) + 2), %esi
++      movl    %eax, (%esi)
++      
++      movl    %edi, %eax
++      addl    $(gdt_48 - relocate_kernel), %eax
++      lgdtl   (%eax)
++
++      /* setup data segment registers */
++      mov     $(gdt_ds - gdt), %eax
++      mov     %eax, %ds
++      mov     %eax, %es
++      mov     %eax, %fs
++      mov     %eax, %gs
++      mov     %eax, %ss
++      
+       /* setup a new stack at the end of the physical control page */
+       lea     4096(%edi), %esp
+ 
+-      /* jump to identity mapped page */
+-      movl    %edi, %eax
+-      addl    $(identity_mapped - relocate_kernel), %eax
+-      pushl   %eax
+-      ret
++      /* load new code segment and jump to identity mapped page */
++      movl    %edi, %esi
++      xorl    %eax, %eax
++      pushl   %eax
++      pushl   %esi
++      pushl   %eax
++      movl    $(gdt_cs - gdt), %eax
++      pushl   %eax    
++      movl    %edi, %eax
++      addl    $(identity_mapped - relocate_kernel),%eax
++      pushl   %eax
++      iretl
+ 
+ identity_mapped:
+       /* store the start address on the stack */
+@@ -250,3 +281,20 @@ identity_mapped:
+       xorl    %edi, %edi
+       xorl    %ebp, %ebp
+       ret
++
++      .align  16
++gdt:
++      .quad   0x0000000000000000      /* NULL descriptor */
++gdt_cs:       
++      .quad   0x00cf9a000000ffff      /* kernel 4GB code at 0x00000000 */
++gdt_ds:
++      .quad   0x00cf92000000ffff      /* kernel 4GB data at 0x00000000 */
++gdt_end:
++      
++gdt_48:
++      .word   gdt_end - gdt - 1       /* limit */
++      .long   0                       /* base - filled in by code above */
++
++idt_48:
++      .word   0                       /* limit */
++      .long   0                       /* base */
--- /dev/null
+++ work/patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec-xen-i386.patch
@@ -0,0 +1,54 @@
+--- 0004/arch/i386/kernel/machine_kexec.c
++++ work/arch/i386/kernel/machine_kexec.c      2006-10-11 18:34:06.000000000 
+0900
+@@ -20,6 +20,10 @@
+ #include <asm/desc.h>
+ #include <asm/system.h>
+ 
++#ifdef CONFIG_XEN
++#include <xen/interface/kexec.h>
++#endif
++
+ #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+ static u32 kexec_pgd[1024] PAGE_ALIGNED;
+ #ifdef CONFIG_X86_PAE
+@@ -29,6 +33,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
+ static u32 kexec_pte0[1024] PAGE_ALIGNED;
+ static u32 kexec_pte1[1024] PAGE_ALIGNED;
+ 
++#ifdef CONFIG_XEN
++
++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
++
++#if PAGES_NR > KEXEC_XEN_NO_PAGES
++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
++#endif
++
++#if PA_CONTROL_PAGE != 0
++#error PA_CONTROL_PAGE is non zero - Xen support will break
++#endif
++
++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage 
*image)
++{
++      void *control_page;
++
++      memset(xki->page_list, 0, sizeof(xki->page_list));
++
++      control_page = page_address(image->control_code_page);
++      memcpy(control_page, relocate_kernel, PAGE_SIZE);
++
++      xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
++      xki->page_list[PA_PGD] = __ma(kexec_pgd);
++#ifdef CONFIG_X86_PAE
++      xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
++      xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
++#endif
++      xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
++      xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
++
++}
++
++#endif /* CONFIG_XEN */
++
+ /*
+  * A architecture hook called to validate the
+  * proposed image and prepare the control pages
--- 0004/patches/linux-2.6.16.29/series
+++ work/patches/linux-2.6.16.29/series
@@ -1,6 +1,10 @@
 kexec-generic.patch
 git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch
 git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch
+git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
+linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
+linux-2.6.19-rc1-kexec-xen-i386.patch
+linux-2.6.16.29-crash-xen-i386.patch
 blktap-aio-16_03_06.patch
 device_bind.patch
 fix-hz-suspend.patch
--- 0001/xen/arch/x86/x86_32/entry.S
+++ work/xen/arch/x86/x86_32/entry.S
@@ -672,6 +672,7 @@ ENTRY(hypercall_table)
         .long do_hvm_op
         .long do_sysctl             /* 35 */
         .long do_domctl
+        .long do_kexec_op
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
@@ -714,6 +715,7 @@ ENTRY(hypercall_args_table)
         .byte 2 /* do_hvm_op            */
         .byte 1 /* do_sysctl            */  /* 35 */
         .byte 1 /* do_domctl            */
+        .byte 2 /* do_kexec_op          */
         .rept NR_hypercalls-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
--- 0004/xen/include/asm-x86/x86_32/elf.h
+++ work/xen/include/asm-x86/x86_32/elf.h
@@ -1,15 +1,47 @@
 #ifndef __X86_32_ELF_H__
 #define __X86_32_ELF_H__
 
-#include <xen/lib.h>       /* for printk() used in stub */
+#include <asm/processor.h>
 
 typedef struct {
-    unsigned long dummy;
+    unsigned long ebx;
+    unsigned long ecx;
+    unsigned long edx;
+    unsigned long esi;
+    unsigned long edi;
+    unsigned long ebp;
+    unsigned long eax;
+    unsigned long ds;
+    unsigned long es;
+    unsigned long fs;
+    unsigned long gs;
+    unsigned long orig_eax;
+    unsigned long eip;
+    unsigned long cs;
+    unsigned long eflags;
+    unsigned long esp;
+    unsigned long ss;
 } ELF_Gregset;
 
 extern inline void elf_core_save_regs(ELF_Gregset *dst)
 {
-    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+    asm volatile("movl %%ebx,%0" : "=m"(dst->ebx));
+    asm volatile("movl %%ecx,%0" : "=m"(dst->ecx));
+    asm volatile("movl %%edx,%0" : "=m"(dst->edx));
+    asm volatile("movl %%esi,%0" : "=m"(dst->esi));
+    asm volatile("movl %%edi,%0" : "=m"(dst->edi));
+    asm volatile("movl %%ebp,%0" : "=m"(dst->ebp));
+    asm volatile("movl %%eax,%0" : "=m"(dst->eax));
+    asm volatile("movw %%ds, %%ax;" :"=a"(dst->ds));
+    asm volatile("movw %%es, %%ax;" :"=a"(dst->es));
+    asm volatile("movw %%fs, %%ax;" :"=a"(dst->fs));
+    asm volatile("movw %%gs, %%ax;" :"=a"(dst->gs));
+    /* orig_eax not filled in for now */
+    dst->eip = (unsigned long)current_text_addr();
+    asm volatile("movw %%cs, %%ax;" :"=a"(dst->cs));
+    asm volatile("pushfl; popl %0" :"=m"(dst->eflags));
+    asm volatile("movl %%esp,%0" : "=m"(dst->esp));
+    asm volatile("movw %%ss, %%ax;" :"=a"(dst->ss));
 }
 
 #endif /* __X86_32_ELF_H__ */
--- 0004/xen/include/asm-x86/x86_32/kexec.h
+++ work/xen/include/asm-x86/x86_32/kexec.h
@@ -1,17 +1,33 @@
-#ifndef __X86_32_KEXEC_H__
-#define __X86_32_KEXEC_H__
+/******************************************************************************
+ * kexec.h
+ * 
+ * Based heavily on machine_kexec.c and kexec.h from Linux 2.6.19-rc1
+ *
+ */
+  
+#ifndef __X86_KEXEC_X86_32_H__
+#define __X86_KEXEC_X86_32_H__
 
-#include <xen/lib.h>       /* for printk() used in stub */
 #include <xen/types.h>
-#include <public/xen.h>
 #include <xen/kexec.h>
+#include <asm/fixmap.h>
+
+typedef asmlinkage void (*relocate_new_kernel_t)(
+               unsigned long indirection_page,
+               unsigned long page_list,
+               unsigned long start_address,
+               unsigned int has_pae);
 
 static inline void machine_kexec(xen_kexec_image_t *image)
 {
-    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+    relocate_new_kernel_t rnk;
+
+    rnk = (relocate_new_kernel_t) image->page_list[1];
+    (*rnk)(image->indirection_page, (unsigned long)image->page_list, 
+           image->start_address, (unsigned long)cpu_has_pae);
 }
 
-#endif /* __X86_32_KEXEC_H__ */
+#endif /* __X86_KEXEC_X86_32_H__ */
 
 /*
  * Local variables:

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.