diff -Npru kexec-tools-2.0.3.orig/configure.ac kexec-tools-2.0.3/configure.ac --- kexec-tools-2.0.3.orig/configure.ac 2012-01-15 23:17:28.000000000 +0100 +++ kexec-tools-2.0.3/configure.ac 2012-03-02 22:15:00.000000000 +0100 @@ -157,15 +157,23 @@ if test "$with_lzma" = yes ; then AC_MSG_NOTICE([lzma support disabled]))) fi -dnl find Xen control stack libraries -if test "$with_xen" = yes ; then - AC_CHECK_HEADER(xenctrl.h, - AC_CHECK_LIB(xenctrl, xc_version, , - AC_MSG_NOTICE([Xen support disabled]))) - if test "$ac_cv_lib_xenctrl_xc_version" = yes ; then - AC_CHECK_FUNCS(xc_get_machine_memory_map) - fi -fi +dnl Check for Xen support +case $ARCH in + i386|x86_64 ) + if test "$with_xen" = yes ; then + AC_CHECK_HEADER(xenctrl.h, + AC_CHECK_LIB(xenctrl, xc_version, , + AC_MSG_NOTICE([Xen support disabled]))) + if test "$ac_cv_lib_xenctrl_xc_version" = yes ; then + AC_CHECK_FUNCS(xc_get_machine_memory_map) + AC_CHECK_FUNCS(xc_get_memory_map) + fi + fi + ;; + * ) + AC_MSG_NOTICE([Xen is not supported on this architecture]) + ;; +esac dnl ---Sanity checks if test "$CC" = "no"; then AC_MSG_ERROR([cc not found]); fi diff -Npru kexec-tools-2.0.3.orig/kexec/Makefile kexec-tools-2.0.3/kexec/Makefile --- kexec-tools-2.0.3.orig/kexec/Makefile 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/Makefile 2012-03-27 09:27:33.000000000 +0200 @@ -20,7 +20,6 @@ KEXEC_SRCS += kexec/kexec-elf-boot.c KEXEC_SRCS += kexec/kexec-iomem.c KEXEC_SRCS += kexec/firmware_memmap.c KEXEC_SRCS += kexec/crashdump.c -KEXEC_SRCS += kexec/crashdump-xen.c KEXEC_SRCS += kexec/phys_arch.c KEXEC_SRCS += kexec/kernel_version.c KEXEC_SRCS += kexec/lzma.c diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/Makefile kexec-tools-2.0.3/kexec/arch/i386/Makefile --- kexec-tools-2.0.3.orig/kexec/arch/i386/Makefile 2010-07-29 11:22:16.000000000 +0200 +++ kexec-tools-2.0.3/kexec/arch/i386/Makefile 2012-05-22 11:12:53.000000000 +0200 @@ -9,8 +9,13 @@ i386_KEXEC_SRCS += kexec/arch/i386/kexec i386_KEXEC_SRCS += kexec/arch/i386/kexec-multiboot-x86.c i386_KEXEC_SRCS += kexec/arch/i386/kexec-beoboot-x86.c i386_KEXEC_SRCS += kexec/arch/i386/kexec-nbi.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-x86-xen-common.c +i386_KEXEC_SRCS += kexec/arch/i386/kexec-xen-pv.c +i386_KEXEC_SRCS += kexec/arch/i386/i386-xen-pv.c +i386_KEXEC_SRCS += kexec/arch/i386/i386-xen-pv-kernel-bootstrap.S i386_KEXEC_SRCS += kexec/arch/i386/x86-linux-setup.c i386_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c +i386_KEXEC_SRCS += kexec/arch/i386/crashdump-x86-xen.c dist += kexec/arch/i386/Makefile $(i386_KEXEC_SRCS) \ kexec/arch/i386/kexec-x86.h kexec/arch/i386/crashdump-x86.h \ diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/crashdump-x86-xen.c kexec-tools-2.0.3/kexec/arch/i386/crashdump-x86-xen.c --- kexec-tools-2.0.3.orig/kexec/arch/i386/crashdump-x86-xen.c 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/kexec/arch/i386/crashdump-x86-xen.c 2012-04-27 15:04:51.000000000 +0200 @@ -0,0 +1,123 @@ +#include "config.h" + +#ifdef HAVE_LIBXENCTRL + +#include +#include +#include +#include +#include +#include + +#include "../../kexec.h" +#include "../../kexec-xen.h" +#include "../../crashdump.h" + +struct crash_note_info { + unsigned long base; + unsigned long length; +}; + +static int xen_phys_cpus; +static struct crash_note_info *xen_phys_notes; + +unsigned long xen_architecture(struct crash_elf_info *elf_info) +{ + unsigned long machine = elf_info->machine; + int rc; + xen_capabilities_info_t capabilities; +#ifdef XENCTRL_HAS_XC_INTERFACE + xc_interface *xc; +#else + int xc; +#endif + + if (!(xen_detect() & XEN_DOM0)) + goto out; + + memset(capabilities, '0', XEN_CAPABILITIES_INFO_LEN); + +#ifdef XENCTRL_HAS_XC_INTERFACE + xc = xc_interface_open(NULL, NULL, 0); + if ( !xc ) { + fprintf(stderr, "failed to open xen control interface.\n"); + goto out; + } +#else + xc = xc_interface_open(); + if ( xc == -1 ) { + fprintf(stderr, "failed to open xen control interface.\n"); + goto out; + } +#endif + + rc = xc_version(xc, XENVER_capabilities, &capabilities[0]); + if ( rc == -1 ) { + fprintf(stderr, "failed to make Xen version hypercall.\n"); + goto out_close; + } + + if (strstr(capabilities, "xen-3.0-x86_64")) + machine = EM_X86_64; + else if (strstr(capabilities, "xen-3.0-x86_32")) + machine = EM_386; + + out_close: + xc_interface_close(xc); + + out: + return machine; +} + +static int xen_crash_note_callback(void *UNUSED(data), int nr, + char *UNUSED(str), + unsigned long base, + unsigned long length) +{ + struct crash_note_info *note = xen_phys_notes + nr; + + note->base = base; + note->length = length; + + return 0; +} + +int xen_get_nr_phys_cpus(void) +{ + char *match = "Crash note\n"; + int cpus, n; + + if (xen_phys_cpus) + return xen_phys_cpus; + + if ((cpus = kexec_iomem_for_each_line(match, NULL, NULL))) { + n = sizeof(struct crash_note_info) * cpus; + xen_phys_notes = malloc(n); + if (!xen_phys_notes) { + fprintf(stderr, "failed to allocate xen_phys_notes.\n"); + return -1; + } + memset(xen_phys_notes, 0, n); + kexec_iomem_for_each_line(match, + xen_crash_note_callback, NULL); + xen_phys_cpus = cpus; + } + + return cpus; +} + +int xen_get_note(int cpu, uint64_t *addr, uint64_t *len) +{ + struct crash_note_info *note; + + if (xen_phys_cpus <= 0) + return -1; + + note = xen_phys_notes + cpu; + + *addr = note->base; + *len = note->length; + + return 0; +} +#endif /* HAVE_LIBXENCTRL */ diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/crashdump-x86.c kexec-tools-2.0.3/kexec/arch/i386/crashdump-x86.c --- kexec-tools-2.0.3.orig/kexec/arch/i386/crashdump-x86.c 2011-11-21 09:48:53.000000000 +0100 +++ kexec-tools-2.0.3/kexec/arch/i386/crashdump-x86.c 2012-04-27 14:47:04.000000000 +0200 @@ -30,6 +30,7 @@ #include "../../kexec.h" #include "../../kexec-elf.h" #include "../../kexec-syscall.h" +#include "../../kexec-xen.h" #include "../../crashdump.h" #include "kexec-x86.h" #include "crashdump-x86.h" @@ -71,7 +72,7 @@ static int get_kernel_paddr(struct kexec if (elf_info->machine != EM_X86_64) return 0; - if (xen_present()) /* Kernel not entity mapped under Xen */ + if (xen_detect() & XEN_DOM0) /* Kernel not entity mapped under Xen dom0 */ return 0; if (parse_iomem_single("Kernel code\n", &start, NULL) == 0) { @@ -108,7 +109,7 @@ static int get_kernel_vaddr_and_size(str if (elf_info->machine != EM_X86_64) return 0; - if (xen_present()) /* Kernel not entity mapped under Xen */ + if (xen_detect() & XEN_DOM0) /* Kernel not entity mapped under Xen dom0 */ return 0; align = getpagesize(); diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/i386-xen-pv-kernel-bootstrap.S kexec-tools-2.0.3/kexec/arch/i386/i386-xen-pv-kernel-bootstrap.S --- kexec-tools-2.0.3.orig/kexec/arch/i386/i386-xen-pv-kernel-bootstrap.S 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/kexec/arch/i386/i386-xen-pv-kernel-bootstrap.S 2012-05-22 12:44:37.000000000 +0200 @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2011-2012 Acunu Limited + * + * kexec/kdump implementation for Xen domU guests was written by Daniel Kiper. + * + * Some ideas are taken from: + * - native kexec/kdump implementation, + * - kexec/kdump implementation for Xen Linux Kernel Ver. 2.6.18, + * - PV-GRUB. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" + +#ifdef HAVE_LIBXENCTRL + +#define __ASSEMBLY__ + +#include + +#include "kexec-x86-xen.h" + +#ifdef UVMF_INVLPG +#undef UVMF_INVLPG +#endif + +#define DOMID_SELF 0x7ff0 + +#define UVMF_INVLPG 2 + +#define VCPUOP_down 2 +#define VCPUOP_is_up 3 + +#define XPKB_TRANSITION 1 +#define XPKB_BOOTSTRAP 2 + + /* + * This code must be in .data section because it is updated + * by xen-pv loader (.text section is read only). However, + * it is never executed in place. It is copied by xen-pv loader + * to its destination and later called after purgatory code. + */ + + .data + .globl transition_pgtable_uvm, transition_pgtable_mfn, bootstrap_pgtable_mfn + .globl bootstrap_stack_vaddr, xen_pv_kernel_entry_vaddr, start_info_vaddr + .globl xen_pv_kernel_bootstrap, xen_pv_kernel_bootstrap_size + +xen_pv_kernel_bootstrap: + +transition_pgtable_uvm: + .rept TRANSITION_PGTABLE_SIZE + .quad __HYPERVISOR_update_va_mapping + .fill 3, 8, 0 + .quad UVMF_INVLPG + .fill 3, 8, 0 + .endr + +transition_pgtable_mfn: + .quad 0 /* MFN of transition page table directory. */ + +bootstrap_pgtable_mfn: + .quad 0 /* MFN of bootstrap page table directory. */ + +bootstrap_stack_vaddr: + .quad 0 /* VIRTUAL address of bootstrap stack. */ + +xen_pv_kernel_entry_vaddr: + .quad 0 /* VIRTUAL address of kernel entry point. */ + +start_info_vaddr: + .quad 0 /* VIRTUAL address of start info. */ + +mmuext_args: + .long MMUEXT_NEW_BASEPTR /* Operation. */ + .long 0 /* PAD. */ + +mmuext_new_baseptr: + .quad 0 /* MFN of target page table directory. */ + .quad 0 /* UNUSED. */ + + .long MMUEXT_NEW_USER_BASEPTR /* Operation. */ + .long 0 /* PAD. */ + +mmuext_new_user_baseptr: + .quad 0 /* MFN of user target page table directory. */ + .quad 0 /* UNUSED. */ + + .long MMUEXT_PIN_L4_TABLE /* Operation. */ + .long 0 /* PAD. */ + +mmuext_pin_l4_table: + .quad 0 /* MFN of page table directory to pin. */ + .quad 0 /* UNUSED. */ + +xen_pv_kernel_bootstrap_size: + .quad . - xen_pv_kernel_bootstrap /* Bootstrap size. */ +#endif /* HAVE_LIBXENCTRL */ diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/i386-xen-pv.c kexec-tools-2.0.3/kexec/arch/i386/i386-xen-pv.c --- kexec-tools-2.0.3.orig/kexec/arch/i386/i386-xen-pv.c 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/kexec/arch/i386/i386-xen-pv.c 2012-05-22 12:44:47.000000000 +0200 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2011-2012 Acunu Limited + * + * kexec/kdump implementation for Xen domU guests was written by Daniel Kiper. + * + * Some ideas are taken from: + * - native kexec/kdump implementation, + * - kexec/kdump implementation for Xen Linux Kernel Ver. 2.6.18, + * - PV-GRUB. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" + +#ifdef HAVE_LIBXENCTRL + +#include + +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "kexec-x86-xen.h" + +unsigned long build_bootstrap_pgtable(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes, + struct start_info *si_new, int p2m_seg) +{ +} + +void build_transition_pgtable(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes, + int p2m_seg, int bs_seg) +{ +} +#endif /* HAVE_LIBXENCTRL */ diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/include/arch/options.h kexec-tools-2.0.3/kexec/arch/i386/include/arch/options.h --- kexec-tools-2.0.3.orig/kexec/arch/i386/include/arch/options.h 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/arch/i386/include/arch/options.h 2012-05-12 17:04:22.000000000 +0200 @@ -29,6 +29,7 @@ #define OPT_MOD (OPT_ARCH_MAX+7) #define OPT_VGA (OPT_ARCH_MAX+8) #define OPT_REAL_MODE (OPT_ARCH_MAX+9) +#define OPT_CONSOLE_XEN_PV (OPT_ARCH_MAX+10) /* Options relevant to the architecture (excluding loader-specific ones): */ #define KEXEC_ARCH_OPTIONS \ @@ -69,7 +70,8 @@ { "args-none", 0, NULL, OPT_ARGS_NONE }, \ { "debug", 0, NULL, OPT_DEBUG }, \ { "module", 1, 0, OPT_MOD }, \ - { "real-mode", 0, NULL, OPT_REAL_MODE }, + { "real-mode", 0, NULL, OPT_REAL_MODE }, \ + { "console-xen-pv", 0, NULL, OPT_CONSOLE_XEN_PV }, #define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86-common.c kexec-tools-2.0.3/kexec/arch/i386/kexec-x86-common.c --- kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86-common.c 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/arch/i386/kexec-x86-common.c 2012-03-27 10:06:17.000000000 +0200 @@ -17,41 +17,19 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#define _XOPEN_SOURCE 600 -#define _BSD_SOURCE - -#include -#include -#include #include #include -#include -#include -#include #include -#include -#include -#include -#include +#include + +#include "../../firmware_memmap.h" #include "../../kexec.h" #include "../../kexec-syscall.h" -#include "../../firmware_memmap.h" -#include "../../crashdump.h" +#include "../../kexec-xen.h" #include "kexec-x86.h" +#include "kexec-x86-xen.h" -#ifdef HAVE_LIBXENCTRL -#ifdef HAVE_XC_GET_MACHINE_MEMORY_MAP -#include -#else -#define __XEN_TOOLS__ 1 -#include -#include -#include -#include -#endif /* HAVE_XC_GET_MACHINE_MEMORY_MAP */ -#endif /* HAVE_LIBXENCTRL */ - -static struct memory_range memory_range[MAX_MEMORY_RANGES]; +struct memory_range memory_range[MAX_MEMORY_RANGES]; /** * The old /proc/iomem parsing code. @@ -150,172 +128,6 @@ static int get_memory_ranges_sysfs(struc return 0; } -#ifdef HAVE_LIBXENCTRL -static unsigned e820_to_kexec_type(uint32_t type) -{ - switch (type) { - case E820_RAM: - return RANGE_RAM; - case E820_ACPI: - return RANGE_ACPI; - case E820_NVS: - return RANGE_ACPI_NVS; - case E820_RESERVED: - default: - return RANGE_RESERVED; - } -} - -/** - * Memory map detection for Xen. - * - * @param[out] range pointer that will be set to an array that holds the - * memory ranges - * @param[out] ranges number of ranges valid in @p range - * - * @return 0 on success, any other value on failure. - */ -#ifdef HAVE_XC_GET_MACHINE_MEMORY_MAP -static int get_memory_ranges_xen(struct memory_range **range, int *ranges) -{ - int rc, ret = -1; - struct e820entry e820entries[MAX_MEMORY_RANGES]; - unsigned int i; -#ifdef XENCTRL_HAS_XC_INTERFACE - xc_interface *xc; -#else - int xc; -#endif - -#ifdef XENCTRL_HAS_XC_INTERFACE - xc = xc_interface_open(NULL, NULL, 0); - - if (!xc) { - fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); - goto err; - } -#else - xc = xc_interface_open(); - - if (xc == -1) { - fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); - goto err; - } -#endif - - rc = xc_get_machine_memory_map(xc, e820entries, MAX_MEMORY_RANGES); - - if (rc < 0) { - fprintf(stderr, "%s: xc_get_machine_memory_map: %s\n", __func__, strerror(rc)); - goto err; - } - - for (i = 0; i < rc; ++i) { - memory_range[i].start = e820entries[i].addr; - memory_range[i].end = e820entries[i].addr + e820entries[i].size; - memory_range[i].type = e820_to_kexec_type(e820entries[i].type); - } - - qsort(memory_range, rc, sizeof(struct memory_range), compare_ranges); - - *range = memory_range; - *ranges = rc; - - ret = 0; - -err: - xc_interface_close(xc); - - return ret; -} -#else -static int get_memory_ranges_xen(struct memory_range **range, int *ranges) -{ - int fd, rc, ret = -1; - privcmd_hypercall_t hypercall; - struct e820entry *e820entries = NULL; - struct xen_memory_map *xen_memory_map = NULL; - unsigned int i; - - fd = open("/proc/xen/privcmd", O_RDWR); - - if (fd == -1) { - fprintf(stderr, "%s: open(/proc/xen/privcmd): %m\n", __func__); - goto err; - } - - rc = posix_memalign((void **)&e820entries, sysconf(_SC_PAGESIZE), - sizeof(struct e820entry) * MAX_MEMORY_RANGES); - - if (rc) { - fprintf(stderr, "%s: posix_memalign(e820entries): %s\n", __func__, strerror(rc)); - e820entries = NULL; - goto err; - } - - rc = posix_memalign((void **)&xen_memory_map, sysconf(_SC_PAGESIZE), - sizeof(struct xen_memory_map)); - - if (rc) { - fprintf(stderr, "%s: posix_memalign(xen_memory_map): %s\n", __func__, strerror(rc)); - xen_memory_map = NULL; - goto err; - } - - if (mlock(e820entries, sizeof(struct e820entry) * MAX_MEMORY_RANGES) == -1) { - fprintf(stderr, "%s: mlock(e820entries): %m\n", __func__); - goto err; - } - - if (mlock(xen_memory_map, sizeof(struct xen_memory_map)) == -1) { - fprintf(stderr, "%s: mlock(xen_memory_map): %m\n", __func__); - goto err; - } - - xen_memory_map->nr_entries = MAX_MEMORY_RANGES; - set_xen_guest_handle(xen_memory_map->buffer, e820entries); - - hypercall.op = __HYPERVISOR_memory_op; - hypercall.arg[0] = XENMEM_machine_memory_map; - hypercall.arg[1] = (__u64)xen_memory_map; - - rc = ioctl(fd, IOCTL_PRIVCMD_HYPERCALL, &hypercall); - - if (rc == -1) { - fprintf(stderr, "%s: ioctl(IOCTL_PRIVCMD_HYPERCALL): %m\n", __func__); - goto err; - } - - for (i = 0; i < xen_memory_map->nr_entries; ++i) { - memory_range[i].start = e820entries[i].addr; - memory_range[i].end = e820entries[i].addr + e820entries[i].size; - memory_range[i].type = e820_to_kexec_type(e820entries[i].type); - } - - qsort(memory_range, xen_memory_map->nr_entries, sizeof(struct memory_range), compare_ranges); - - *range = memory_range; - *ranges = xen_memory_map->nr_entries; - - ret = 0; - -err: - munlock(xen_memory_map, sizeof(struct xen_memory_map)); - munlock(e820entries, sizeof(struct e820entry) * MAX_MEMORY_RANGES); - free(xen_memory_map); - free(e820entries); - close(fd); - - return ret; -} -#endif /* HAVE_XC_GET_MACHINE_MEMORY_MAP */ -#else -static int get_memory_ranges_xen(struct memory_range **range, int *ranges) -{ - return 0; -} -#endif /* HAVE_LIBXENCTRL */ - static void remove_range(struct memory_range *range, int nr_ranges, int index) { int i, j; @@ -429,11 +241,11 @@ int get_memory_ranges(struct memory_rang { int ret, i; - if (!efi_map_added() && !xen_present() && have_sys_firmware_memmap()) { + if (!efi_map_added() && !(xen_detect() & XEN_PV) && have_sys_firmware_memmap()) { ret = get_memory_ranges_sysfs(range, ranges); if (!ret) ret = fixup_memory_ranges(range, ranges); - } else if (xen_present()) { + } else if (xen_detect() & XEN_PV) { ret = get_memory_ranges_xen(range, ranges); if (!ret) ret = fixup_memory_ranges(range, ranges); @@ -493,5 +305,3 @@ int get_memory_ranges(struct memory_rang return ret; } - - diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86-xen-common.c kexec-tools-2.0.3/kexec/arch/i386/kexec-x86-xen-common.c --- kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86-xen-common.c 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/kexec/arch/i386/kexec-x86-xen-common.c 2012-05-22 12:34:14.000000000 +0200 @@ -0,0 +1,320 @@ +#include "config.h" + +#ifdef HAVE_LIBXENCTRL + +#define _XOPEN_SOURCE 600 +#define _BSD_SOURCE +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../firmware_memmap.h" +#include "../../kexec.h" +#include "../../kexec-xen.h" +#include "kexec-x86.h" + +#if defined(HAVE_XC_GET_MACHINE_MEMORY_MAP) && defined(HAVE_XC_GET_MEMORY_MAP) +#include +#else +#define __XEN_TOOLS__ 1 +#include +#include +#include +#include +#endif + +#define XEN_CHECK_HVM 0 +#define XEN_CHECK_PV 1 + +#ifdef __i386__ +#define R(x) "%%e"#x"x" +#else +#define R(x) "%%r"#x"x" +#endif + +static jmp_buf xen_sigill_jmp; + +/* Based on code from xen-detect.c. */ + +static void xen_sigill_handler(int sig) +{ + longjmp(xen_sigill_jmp, 1); +} + +/* Based on code from xen-detect.c. */ + +static void xen_cpuid(uint32_t idx, uint32_t *regs, int pv_context) +{ + asm volatile ( + "push "R(a)"; push "R(b)"; push "R(c)"; push "R(d)"\n\t" + "test %1,%1 ; jz 1f ; ud2a ; .ascii \"xen\" ; 1: cpuid\n\t" + "mov %%eax,(%2); mov %%ebx,4(%2)\n\t" + "mov %%ecx,8(%2); mov %%edx,12(%2)\n\t" + "pop "R(d)"; pop "R(c)"; pop "R(b)"; pop "R(a)"\n\t" + : : "a" (idx), "c" (pv_context), "S" (regs) : "memory" ); +} + +/* Based on code from xen-detect.c. */ + +static int check_for_xen(int pv_context) +{ + uint32_t regs[4]; + char signature[13]; + uint32_t base; + + for (base = 0x40000000; base < 0x40010000; base += 0x100) + { + xen_cpuid(base, regs, pv_context); + + *(uint32_t *)(signature + 0) = regs[1]; + *(uint32_t *)(signature + 4) = regs[2]; + *(uint32_t *)(signature + 8) = regs[3]; + signature[12] = '\0'; + + if (strcmp("XenVMMXenVMM", signature) == 0 && regs[0] >= (base + 2)) + goto found; + } + + return 0; + +found: + return 1; +} + +/* Based on code from xen-detect.c. */ + +int xen_detect(void) +{ + char buf[32] = {}; + int fd; + sighandler_t sig = sig; /* Do not emit uninitialized warning. */ + ssize_t rc; + static int domain_type = XEN_NOT_YET_DETECTED; + + /* Run this weird code only once. */ + if (domain_type != XEN_NOT_YET_DETECTED) + return domain_type; + + /* Check for execution in HVM context. */ + if (check_for_xen(XEN_CHECK_HVM)) + return domain_type = XEN_HVM; + + if (setjmp(xen_sigill_jmp)) { + sig = signal(SIGILL, sig); + if (sig == SIG_ERR) + fprintf(stderr, "%s: signal(SIGILL): Original signal handler not restored\n", __func__); + return domain_type = XEN_NONE; + } + + sig = signal(SIGILL, xen_sigill_handler); + + if (sig == SIG_ERR) { + fprintf(stderr, "%s: signal(SIGILL): New signal handler not installed\n", __func__); + return domain_type = XEN_NONE; + } + + /* + * Check for execution in PV context. + * If this function returns it means that we are in PV context. + */ + check_for_xen(XEN_CHECK_PV); + + sig = signal(SIGILL, sig); + + if (sig == SIG_ERR) + fprintf(stderr, "%s: signal(SIGILL): Original signal handler not restored\n", __func__); + + fd = open("/proc/xen/capabilities", O_RDONLY); + + if (fd == -1) + return domain_type = XEN_PV; + + rc = read(fd, buf, sizeof(buf)); + + close(fd); + + if (rc == -1) + return domain_type = XEN_PV; + + buf[sizeof(buf) - 1] = '\0'; + + if (!strstr(buf, "control_d")) + return domain_type = XEN_PV; + + return domain_type = XEN_PV | XEN_DOM0; +} + +static unsigned e820_to_kexec_type(uint32_t type) +{ + switch (type) { + case E820_RAM: + return RANGE_RAM; + case E820_ACPI: + return RANGE_ACPI; + case E820_NVS: + return RANGE_ACPI_NVS; + case E820_RESERVED: + default: + return RANGE_RESERVED; + } +} + +/** + * Memory map detection for Xen. + * + * @param[out] range pointer that will be set to an array that holds the + * memory ranges + * @param[out] ranges number of ranges valid in @p range + * + * @return 0 on success, any other value on failure. + */ +#if defined(HAVE_XC_GET_MACHINE_MEMORY_MAP) && defined(HAVE_XC_GET_MEMORY_MAP) +int get_memory_ranges_xen(struct memory_range **range, int *ranges) +{ + int rc, ret = -1; + struct e820entry e820entries[MAX_MEMORY_RANGES]; + unsigned int i; +#ifdef XENCTRL_HAS_XC_INTERFACE + xc_interface *xc; +#else + int xc; +#endif + +#ifdef XENCTRL_HAS_XC_INTERFACE + xc = xc_interface_open(NULL, NULL, 0); + + if (!xc) { + fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); + goto err; + } +#else + xc = xc_interface_open(); + + if (xc == -1) { + fprintf(stderr, "%s: Failed to open Xen control interface\n", __func__); + goto err; + } +#endif + + if (xen_detect() & XEN_DOM0) + rc = xc_get_machine_memory_map(xc, e820entries, MAX_MEMORY_RANGES); + else + rc = xc_get_memory_map(xc, e820entries, MAX_MEMORY_RANGES); + + if (rc < 0) { + fprintf(stderr, "%s: %s: %s\n", __func__, + (xen_detect() & XEN_DOM0) ? "xc_get_machine_memory_map" : "xc_get_memory_map", + strerror(-rc)); + goto err; + } + + for (i = 0; i < rc; ++i) { + memory_range[i].start = e820entries[i].addr; + memory_range[i].end = e820entries[i].addr + e820entries[i].size; + memory_range[i].type = e820_to_kexec_type(e820entries[i].type); + } + + qsort(memory_range, rc, sizeof(struct memory_range), compare_ranges); + + *range = memory_range; + *ranges = rc; + + ret = 0; + +err: + xc_interface_close(xc); + + return ret; +} +#else +int get_memory_ranges_xen(struct memory_range **range, int *ranges) +{ + int fd, rc, ret = -1; + privcmd_hypercall_t hypercall; + struct e820entry *e820entries = NULL; + struct xen_memory_map *xen_memory_map = NULL; + unsigned int i; + + fd = open("/proc/xen/privcmd", O_RDWR); + + if (fd == -1) { + fprintf(stderr, "%s: open(/proc/xen/privcmd): %m\n", __func__); + goto err; + } + + rc = posix_memalign((void **)&e820entries, getpagesize(), + sizeof(struct e820entry) * MAX_MEMORY_RANGES); + + if (rc) { + fprintf(stderr, "%s: posix_memalign(e820entries): %s\n", __func__, strerror(rc)); + e820entries = NULL; + goto err; + } + + rc = posix_memalign((void **)&xen_memory_map, getpagesize(), + sizeof(struct xen_memory_map)); + + if (rc) { + fprintf(stderr, "%s: posix_memalign(xen_memory_map): %s\n", __func__, strerror(rc)); + xen_memory_map = NULL; + goto err; + } + + if (mlock(e820entries, sizeof(struct e820entry) * MAX_MEMORY_RANGES) == -1) { + fprintf(stderr, "%s: mlock(e820entries): %m\n", __func__); + goto err; + } + + if (mlock(xen_memory_map, sizeof(struct xen_memory_map)) == -1) { + fprintf(stderr, "%s: mlock(xen_memory_map): %m\n", __func__); + goto err; + } + + xen_memory_map->nr_entries = MAX_MEMORY_RANGES; + set_xen_guest_handle(xen_memory_map->buffer, e820entries); + + hypercall.op = __HYPERVISOR_memory_op; + hypercall.arg[0] = (xen_detect() & XEN_DOM0) ? XENMEM_machine_memory_map : XENMEM_memory_map; + hypercall.arg[1] = (__u64)xen_memory_map; + + rc = ioctl(fd, IOCTL_PRIVCMD_HYPERCALL, &hypercall); + + if (rc == -1) { + fprintf(stderr, "%s: ioctl(IOCTL_PRIVCMD_HYPERCALL): %m\n", __func__); + goto err; + } + + for (i = 0; i < xen_memory_map->nr_entries; ++i) { + memory_range[i].start = e820entries[i].addr; + memory_range[i].end = e820entries[i].addr + e820entries[i].size; + memory_range[i].type = e820_to_kexec_type(e820entries[i].type); + } + + qsort(memory_range, xen_memory_map->nr_entries, sizeof(struct memory_range), compare_ranges); + + *range = memory_range; + *ranges = xen_memory_map->nr_entries; + + ret = 0; + +err: + munlock(xen_memory_map, sizeof(struct xen_memory_map)); + munlock(e820entries, sizeof(struct e820entry) * MAX_MEMORY_RANGES); + free(xen_memory_map); + free(e820entries); + close(fd); + + return ret; +} +#endif /* HAVE_XC_GET_MACHINE_MEMORY_MAP && HAVE_XC_GET_MEMORY_MAP */ +#endif /* HAVE_LIBXENCTRL */ diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86-xen.h kexec-tools-2.0.3/kexec/arch/i386/kexec-x86-xen.h --- kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86-xen.h 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/kexec/arch/i386/kexec-x86-xen.h 2012-05-22 12:45:02.000000000 +0200 @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2011-2012 Acunu Limited + * + * kexec/kdump implementation for Xen domU guests was written by Daniel Kiper. + * + * Some ideas are taken from: + * - native kexec/kdump implementation, + * - kexec/kdump implementation for Xen Linux Kernel Ver. 2.6.18, + * - PV-GRUB. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef __KEXEC_X86_XEN_H__ +#define __KEXEC_X86_XEN_H__ + +#include "config.h" + +#ifndef __ASSEMBLY__ +#include "../../kexec.h" +#endif + +#ifdef HAVE_LIBXENCTRL + +#ifndef __ASSEMBLY__ +#include +#include "../../kexec-elf.h" +#endif + +#define XP_PAGE_SHIFT 12 +#define XP_PAGE_SIZE (1 << 12) + +#define _PAGE_PRESENT 0x001 +#define _PAGE_RW 0x002 +#define _PAGE_USER 0x004 +#define _PAGE_ACCESSED 0x020 +#define _PAGE_DIRTY 0x040 + +#ifdef __i386__ +#define TRANSITION_PGTABLE_SIZE 5 +#else +#define TRANSITION_PGTABLE_SIZE 7 +#endif + +#define XP_PFN_DOWN(x) ((x) >> XP_PAGE_SHIFT) +#define XP_PFN_PHYS(x) ((x) << XP_PAGE_SHIFT) + +#ifndef __ASSEMBLY__ +struct xen_elf_notes { + unsigned long entry; + unsigned long hypercall_page; + unsigned long virt_base; +}; + +extern struct multicall_entry transition_pgtable_uvm[TRANSITION_PGTABLE_SIZE]; +extern unsigned long transition_pgtable_mfn; +extern unsigned long bootstrap_pgtable_mfn; +extern unsigned long bootstrap_stack_vaddr; +extern unsigned long xen_pv_kernel_entry_vaddr; +extern unsigned long start_info_vaddr; +extern const unsigned long xen_pv_kernel_bootstrap_size; + +extern void xen_pv_usage(void); +extern int xen_pv_probe(const char *kernel_buf, off_t kernel_size); +extern int xen_pv_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info); + +extern int get_memory_ranges_xen(struct memory_range **range, int *ranges); + +extern unsigned long get_next_paddr(struct kexec_info *info); + +extern unsigned long build_bootstrap_pgtable(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes, + struct start_info *si_new, int p2m_seg); +extern void build_transition_pgtable(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes, + int p2m_seg, int bs_seg); + +extern void xen_pv_kernel_bootstrap(void); +#endif /* __ASSEMBLY__ */ +#else +static inline int get_memory_ranges_xen(struct memory_range **range, int *ranges) +{ + return 0; +} +#endif /* HAVE_LIBXENCTRL */ +#endif /* __KEXEC_X86_XEN_H__ */ diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86.c kexec-tools-2.0.3/kexec/arch/i386/kexec-x86.c --- kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86.c 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/arch/i386/kexec-x86.c 2012-05-21 20:20:59.000000000 +0200 @@ -30,10 +30,14 @@ #include "../../kexec-syscall.h" #include "../../firmware_memmap.h" #include "kexec-x86.h" +#include "kexec-x86-xen.h" #include "crashdump-x86.h" #include struct file_type file_type[] = { +#ifdef HAVE_LIBXENCTRL + { "xen-pv", xen_pv_probe, xen_pv_load, xen_pv_usage }, +#endif { "multiboot-x86", multiboot_x86_probe, multiboot_x86_load, multiboot_x86_usage }, { "elf-x86", elf_x86_probe, elf_x86_load, elf_x86_usage }, diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86.h kexec-tools-2.0.3/kexec/arch/i386/kexec-x86.h --- kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-x86.h 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/arch/i386/kexec-x86.h 2012-03-03 22:34:17.000000000 +0100 @@ -11,6 +11,7 @@ enum coretype { extern unsigned char compat_x86_64[]; extern uint32_t compat_x86_64_size, compat_x86_64_entry32; +extern struct memory_range memory_range[MAX_MEMORY_RANGES]; struct entry32_regs { uint32_t eax; diff -Npru kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-xen-pv.c kexec-tools-2.0.3/kexec/arch/i386/kexec-xen-pv.c --- kexec-tools-2.0.3.orig/kexec/arch/i386/kexec-xen-pv.c 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/kexec/arch/i386/kexec-xen-pv.c 2012-05-22 12:46:11.000000000 +0200 @@ -0,0 +1,549 @@ +/* + * Copyright (c) 2011-2012 Acunu Limited + * + * kexec/kdump implementation for Xen domU guests was written by Daniel Kiper. + * + * Some ideas are taken from: + * - native kexec/kdump implementation, + * - kexec/kdump implementation for Xen Linux Kernel Ver. 2.6.18, + * - PV-GRUB. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" + +#ifdef HAVE_LIBXENCTRL + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../../kexec-syscall.h" +#include "../../kexec-xen.h" +#include "kexec-x86-xen.h" +#include "crashdump-x86.h" + +#define SYSFS_HYPERCALL_PAGE "/sys/kernel/hypercall_page" +#define SYSFS_P2M "/sys/kernel/p2m" +#define SYSFS_START_INFO "/sys/kernel/start_info" + +static const char optstring[] = KEXEC_ARCH_OPT_STR ""; + +static const struct option longopts[] = { + KEXEC_ARCH_OPTIONS + {"command-line", 1, NULL, OPT_APPEND}, + {"append", 1, NULL, OPT_APPEND}, + {"reuse-cmdline", 0, NULL, OPT_REUSE_CMDLINE}, + {"initrd", 1, NULL, OPT_RAMDISK}, + {"ramdisk", 1, NULL, OPT_RAMDISK}, + {"console-xen-pv", 0, NULL, OPT_CONSOLE_XEN_PV}, + {NULL, 0, NULL, 0} +}; + +unsigned long get_next_paddr(struct kexec_info *info) +{ + unsigned long next_paddr; + + next_paddr = (unsigned long)info->segment[info->nr_segments - 1].mem; + next_paddr += info->segment[info->nr_segments - 1].memsz; + + return next_paddr; +} + +static void xchg_mfns(struct kexec_info *info, int p2m_seg, + unsigned long pfn, unsigned long mfn) +{ + unsigned long i, nr_pages, *p2m; + + p2m = (unsigned long *)info->segment[p2m_seg].buf; + nr_pages = info->segment[p2m_seg].bufsz / sizeof(unsigned long); + + for (i = 0; i < nr_pages && p2m[i] != mfn; ++i); + + if (i == nr_pages) + die("xen-pv loader: %s: Invalid MFN: PFN: 0x%lx MFN: 0x%lx\n", + __func__, pfn, mfn); + + p2m[i] = p2m[pfn]; + p2m[pfn] = mfn; +} + +static unsigned long read_note(struct mem_ehdr *ehdr, int idx) +{ + if (ehdr->e_note[idx].n_descsz == 4) + return elf32_to_cpu(ehdr, *(uint32_t *)ehdr->e_note[idx].n_desc); + + if (ehdr->e_note[idx].n_descsz == 8) + return elf64_to_cpu(ehdr, *(uint64_t *)ehdr->e_note[idx].n_desc); + + die("xen-pv loader: %s: Invalid Xen ELF note: Type: 0x%x Data size: 0x%x\n", + __func__, ehdr->e_note[idx].n_type, ehdr->e_note[idx].n_descsz); + + /* Do not emit "control reaches end of non-void function" warning. */ + return 0; +} + +static void read_xen_elf_notes(struct mem_ehdr *ehdr, + struct xen_elf_notes *xen_elf_notes) +{ + int i; + + for (i = 0; i < ehdr->e_notenum; ++i) { + if (strcmp(ehdr->e_note[i].n_name, "Xen")) + continue; + + switch (ehdr->e_note[i].n_type) { + case XEN_ELFNOTE_ENTRY: + xen_elf_notes->entry = read_note(ehdr, i); + break; + + case XEN_ELFNOTE_HYPERCALL_PAGE: + xen_elf_notes->hypercall_page = read_note(ehdr, i); + break; + + case XEN_ELFNOTE_VIRT_BASE: + xen_elf_notes->virt_base = read_note(ehdr, i); + break; + + default: + break; + } + } +} + +static void load_hypercall_page(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes) +{ + int fd, i; + unsigned long hp_paddr; + void *hp_dst, *hp_src; + + hp_paddr = xen_elf_notes->hypercall_page - xen_elf_notes->virt_base; + + for (i = 0; i < info->nr_segments; ++i) + if (hp_paddr >= (unsigned long)info->segment[i].mem && + hp_paddr + XP_PAGE_SIZE <= + (unsigned long)info->segment[i].mem + info->segment[i].bufsz) + break; + + if (i == info->nr_segments) + die("There is no place for hypercall page !!!\n"); + + fd = open(SYSFS_HYPERCALL_PAGE, O_RDONLY); + + if (fd == -1) + die("xen-pv loader: %s: open(%s): %m\n", __func__, SYSFS_HYPERCALL_PAGE); + + hp_src = mmap(NULL, XP_PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0); + + if (hp_src == MAP_FAILED) + die("xen-pv loader: %s: mmap: %m\n", __func__); + + hp_dst = (void *)info->segment[i].buf; + hp_dst += hp_paddr - (unsigned long)info->segment[i].mem; + + memcpy(hp_dst, hp_src, XP_PAGE_SIZE); + + munmap(hp_src, XP_PAGE_SIZE); + close(fd); +} + +static void load_ramdisk(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes, + const char *ramdisk, + struct start_info *si_new) +{ + char *ramdisk_buf; + off_t ramdisk_size; + unsigned long ramdisk_paddr; + + if (!ramdisk) + return; + + ramdisk_buf = slurp_file(ramdisk, &ramdisk_size); + + ramdisk_paddr = get_next_paddr(info); + + add_buffer(info, ramdisk_buf, ramdisk_size, ramdisk_size, XP_PAGE_SIZE, + ramdisk_paddr, round_up(ramdisk_paddr + ramdisk_size, + XP_PAGE_SIZE), 1); + + si_new->mod_start = xen_elf_notes->virt_base + ramdisk_paddr; + si_new->mod_len = ramdisk_size; +} + +static int load_p2m(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes, + struct start_info *si_new) +{ + int fd; + struct stat stat; + unsigned long p2m_paddr; + void *p2m; + + fd = open(SYSFS_P2M, O_RDONLY); + + if (fd == -1) + die("xen-pv loader: %s: open(%s): %m\n", __func__, SYSFS_P2M); + + if (fstat(fd, &stat) == -1) + die("xen-pv loader: %s: fstat: %m\n", __func__); + + p2m = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + + if (p2m == MAP_FAILED) + die("xen-pv loader: %s: mmap: %m\n", __func__); + + p2m_paddr = get_next_paddr(info); + + add_buffer(info, p2m, stat.st_size, stat.st_size, XP_PAGE_SIZE, p2m_paddr, + round_up(p2m_paddr + stat.st_size, XP_PAGE_SIZE), 1); + + si_new->mfn_list = xen_elf_notes->virt_base + p2m_paddr; + si_new->nr_pages = stat.st_size / sizeof(unsigned long); + + return info->nr_segments - 1; +} + +static void load_sys_start_info(struct start_info *si_sys) +{ + int fd; + ssize_t rc; + + fd = open(SYSFS_START_INFO, O_RDONLY); + + if (fd == -1) + die("xen-pv loader: %s: open(%s): %m\n", __func__, SYSFS_START_INFO); + + rc = read(fd, si_sys, sizeof(struct start_info)); + + if (rc == -1) + die("xen-pv loader: %s: read: %m\n", __func__); + + /* + * Warning: Linux Kernel start_info struct may not contain + * first_p2m_pfn and nr_p2m_frames members. + */ + if (rc < sizeof(struct start_info) - sizeof(unsigned long) * 2) + die("xen-pv loader: %s: read: File was truncated\n", __func__); + + close(fd); +} + +static int alloc_start_info(struct kexec_info *info) +{ + unsigned long si_dst_paddr; + void *si_dst; + + si_dst = xmalloc(XP_PAGE_SIZE); + memset(si_dst, 0, XP_PAGE_SIZE); + + si_dst_paddr = get_next_paddr(info); + + add_buffer(info, si_dst, XP_PAGE_SIZE, XP_PAGE_SIZE, XP_PAGE_SIZE, + si_dst_paddr, si_dst_paddr + XP_PAGE_SIZE, 1); + + return info->nr_segments - 1; +} + +/* + * Reserve xenstore and console pages (in this order). + * Magic pages are behind start info. + * + * WARNING: Do not change xenstore and console pages order nor their location. + * Linux Kernel and some code in kexec-tools depend on it. + */ + +static void reserve_magic_pages(struct kexec_info *info, + struct start_info *si_sys, int p2m_seg) +{ + unsigned long magic_paddr, magic_pfn, magic_size; + + magic_paddr = get_next_paddr(info); + magic_pfn = XP_PFN_DOWN(magic_paddr); + magic_size = 2 * XP_PAGE_SIZE; + + /* Move xenstore MFN to new place. */ + xchg_mfns(info, p2m_seg, magic_pfn, si_sys->store_mfn); + + /* Move console MFN to new place. */ + xchg_mfns(info, p2m_seg, ++magic_pfn, si_sys->console.domU.mfn); + + add_buffer(info, NULL, 0, magic_size, XP_PAGE_SIZE, + magic_paddr, magic_paddr + magic_size, 1); +} + +static int alloc_bootstrap_stack(struct kexec_info *info) +{ + unsigned long bs_paddr; + + bs_paddr = get_next_paddr(info); + + add_buffer(info, xen_pv_kernel_bootstrap, xen_pv_kernel_bootstrap_size, + xen_pv_kernel_bootstrap_size, XP_PAGE_SIZE, + bs_paddr, bs_paddr + XP_PAGE_SIZE, 1); + + return info->nr_segments - 1; +} + +static void reserve_padding(struct kexec_info *info, unsigned long end_paddr) +{ + unsigned long padding_paddr, padding_size; + + padding_paddr = get_next_paddr(info); + padding_size = end_paddr - padding_paddr; + + if (!padding_size) + return; + + add_buffer(info, NULL, 0, padding_size, XP_PAGE_SIZE, + padding_paddr, end_paddr, 1); +} + +static int load_crashdump(struct kexec_info *info, struct mem_ehdr *ehdr, + char **command_line) +{ + *command_line = xrealloc(*command_line, COMMAND_LINE_SIZE); + + if (load_crashdump_segments(info, *command_line, elf_max_addr(ehdr), + get_next_paddr(info)) < 0) + return -1; + + return 0; +} + +static void init_start_info(struct kexec_info *info, struct start_info *si_sys, + struct start_info *si_new, int si_seg) +{ + struct start_info *si_dst; + + si_dst = (struct start_info *)info->segment[si_seg].buf; + + memcpy(si_dst->magic, si_sys->magic, sizeof(si_dst->magic)); + si_dst->shared_info = si_sys->shared_info; + si_dst->flags = si_sys->flags; + si_dst->store_mfn = si_sys->store_mfn; + si_dst->store_evtchn = si_sys->store_evtchn; + si_dst->console.domU.mfn = si_sys->console.domU.mfn; + si_dst->console.domU.evtchn = si_sys->console.domU.evtchn; + + memcpy(si_dst->cmd_line, si_new->cmd_line, sizeof(si_dst->cmd_line)); + si_dst->pt_base = si_new->pt_base; + si_dst->nr_pt_frames = si_new->nr_pt_frames; + si_dst->mfn_list = si_new->mfn_list; + si_dst->nr_pages = si_new->nr_pages; + si_dst->mod_start = si_new->mod_start; + si_dst->mod_len = si_new->mod_len; +} + +static void init_bootstrap(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes, + int p2m_seg, int si_seg, int bs_seg, + unsigned long end_paddr) +{ + int i; + struct start_info *si; + unsigned long *p2m; + + p2m = (unsigned long *)info->segment[p2m_seg].buf; + si = (struct start_info *)info->segment[si_seg].buf; + + for (i = 0; i < TRANSITION_PGTABLE_SIZE; ++i) + transition_pgtable_uvm[i].args[0] = end_paddr + i * XP_PAGE_SIZE; + + transition_pgtable_mfn = p2m[XP_PFN_DOWN(end_paddr)]; + bootstrap_pgtable_mfn = p2m[XP_PFN_DOWN(si->pt_base - xen_elf_notes->virt_base)]; + bootstrap_stack_vaddr = xen_elf_notes->virt_base; + bootstrap_stack_vaddr += (unsigned long)info->segment[bs_seg].mem; + xen_pv_kernel_entry_vaddr = xen_elf_notes->entry; + start_info_vaddr = xen_elf_notes->virt_base; + start_info_vaddr += (unsigned long)info->segment[si_seg].mem; +} + +static void load_purgatory(struct kexec_info *info, struct start_info *si_sys, + int si_seg, int bs_seg, uint8_t console_xen_pv) +{ + const void *console_xen_pv_if; + + elf_rel_build_load(info, &info->rhdr, purgatory, purgatory_size, + 0, ULONG_MAX, 1, 0); + + info->entry = (void *)elf_rel_get_addr(&info->rhdr, "xen_pv_purgatory_start"); + + elf_rel_set_symbol(&info->rhdr, "bootstrap_stack_paddr", + &info->segment[bs_seg].mem, + sizeof(info->segment[bs_seg].mem)); + + if (!console_xen_pv) + return; + + /* This depends on assumptions made in reserve_magic_pages(). */ + console_xen_pv_if = info->segment[si_seg].mem + 2 * XP_PAGE_SIZE; + + elf_rel_set_symbol(&info->rhdr, "console_xen_pv", + &console_xen_pv, sizeof(console_xen_pv)); + elf_rel_set_symbol(&info->rhdr, "console_xen_pv_if", + &console_xen_pv_if, sizeof(console_xen_pv_if)); + elf_rel_set_symbol(&info->rhdr, "console_xen_pv_evtchn", + &si_sys->console.domU.evtchn, + sizeof(si_sys->console.domU.evtchn)); +} + +void xen_pv_usage(void) +{ + printf(" --command-line=STRING Set the kernel command line to STRING\n" + " --append=STRING Set the kernel command line to STRING\n" + " --reuse-cmdline Use kernel command line from running system.\n" + " --initrd=FILE Use FILE as the kernel's initial ramdisk.\n" + " --ramdisk=FILE Use FILE as the kernel's initial ramdisk.\n" + " --console-xen-pv Enable the Xen PV console.\n"); +} + +int xen_pv_probe(const char *kernel_buf, off_t kernel_size) +{ + struct mem_ehdr ehdr; + int i, rc; + + /* Are we in Xen PV domain ??? */ + if (!(xen_detect() & XEN_PV) || (xen_detect() & XEN_DOM0)) + return -ENOSYS; + + rc = build_elf_exec_info(kernel_buf, kernel_size, &ehdr, 0); + + /* It does not look like ELF file... */ + if (rc < 0) + goto err; + + /* Look for Xen notes. */ + for (i = 0; i < ehdr.e_notenum; ++i) + if (!strcmp(ehdr.e_note[i].n_name, "Xen")) + break; + + /* This is not Xen compatible kernel. */ + if (i == ehdr.e_notenum) { + rc = -ENOEXEC; + goto err; + } + +err: + free_elf_info(&ehdr); + + return rc; +} + +int xen_pv_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + char *command_line = NULL; + const char *append = NULL, *ramdisk = NULL; + int bs_seg, c, p2m_seg, si_seg; + struct mem_ehdr ehdr; + struct start_info si_new = {}, si_sys; + struct xen_elf_notes xen_elf_notes = {}; + uint8_t console_xen_pv = 0; + unsigned long end_paddr; + + while (1) { + c = getopt_long(argc, argv, optstring, longopts, NULL); + + if (c == -1) + break; + + switch (c) { + case OPT_APPEND: + append = optarg; + break; + + case OPT_CONSOLE_XEN_PV: + console_xen_pv = 1; + break; + + case OPT_RAMDISK: + ramdisk = optarg; + break; + + case OPT_REUSE_CMDLINE: + command_line = get_command_line(); + break; + + default: + if (c >= OPT_ARCH_MAX) + fprintf(stderr, "Unknown option: opt: %d\n", c); + break; + + case '?': + usage(); + return -1; + } + } + + command_line = concat_cmdline(command_line, append); + + if (command_line && strlen(command_line) > COMMAND_LINE_SIZE - 1) + die("Command line overflow\n"); + + /* Load the ELF executable. */ + elf_exec_build_load(info, &ehdr, kernel_buf, kernel_size, 0); + + read_xen_elf_notes(&ehdr, &xen_elf_notes); + + sort_segments(info); + + load_hypercall_page(info, &xen_elf_notes); + load_ramdisk(info, &xen_elf_notes, ramdisk, &si_new); + p2m_seg = load_p2m(info, &xen_elf_notes, &si_new); + load_sys_start_info(&si_sys); + si_seg = alloc_start_info(info); + reserve_magic_pages(info, &si_sys, p2m_seg); + end_paddr = build_bootstrap_pgtable(info, &xen_elf_notes, &si_new, p2m_seg); + bs_seg = alloc_bootstrap_stack(info); + reserve_padding(info, end_paddr); + build_transition_pgtable(info, &xen_elf_notes, p2m_seg, bs_seg); + + if (info->kexec_flags & KEXEC_ON_CRASH) + if (load_crashdump(info, &ehdr, &command_line) < 0) + return -1; + + if (command_line) { + if (strlen(command_line) > MAX_GUEST_CMDLINE - 1) + die("Command line overflow\n"); + + strcpy((char *)si_new.cmd_line, command_line); + } + + init_start_info(info, &si_sys, &si_new, si_seg); + init_bootstrap(info, &xen_elf_notes, p2m_seg, si_seg, bs_seg, end_paddr); + + load_purgatory(info, &si_sys, si_seg, bs_seg, console_xen_pv); + + return 0; +} +#endif /* HAVE_LIBXENCTRL */ diff -Npru kexec-tools-2.0.3.orig/kexec/arch/ia64/kexec-iomem.c kexec-tools-2.0.3/kexec/arch/ia64/kexec-iomem.c --- kexec-tools-2.0.3.orig/kexec/arch/ia64/kexec-iomem.c 2010-07-29 11:22:16.000000000 +0200 +++ kexec-tools-2.0.3/kexec/arch/ia64/kexec-iomem.c 2012-03-02 13:30:15.000000000 +0100 @@ -4,20 +4,8 @@ #include "../../crashdump.h" static const char proc_iomem_str[]= "/proc/iomem"; -static const char proc_iomem_machine_str[]= "/proc/iomem_machine"; -/* - * On IA64 XEN the EFI tables are virtualised. - * For this reason on such systems /proc/iomem_machine is provided, - * which is based on the hypervisor's (machine's) EFI tables. - * If Xen is in use, then /proc/iomem is used for memory regions relating - * to the currently running dom0 kernel, and /proc/iomem_machine is used - * for regions relating to the machine itself or the hypervisor. - * If Xen is not in used, then /proc/iomem used. - */ const char *proc_iomem(void) { - if (xen_present()) - return proc_iomem_machine_str; return proc_iomem_str; } diff -Npru kexec-tools-2.0.3.orig/kexec/arch/mips/crashdump-mips.c kexec-tools-2.0.3/kexec/arch/mips/crashdump-mips.c --- kexec-tools-2.0.3.orig/kexec/arch/mips/crashdump-mips.c 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/arch/mips/crashdump-mips.c 2012-04-27 14:48:59.000000000 +0200 @@ -31,6 +31,7 @@ #include "../../kexec-syscall.h" #include "../../crashdump.h" #include "kexec-mips.h" +#include "kexec-xen.h" #include "crashdump-mips.h" #include "unused.h" @@ -55,7 +56,7 @@ static int get_kernel_paddr(struct crash { uint64_t start; - if (xen_present()) /* Kernel not entity mapped under Xen */ + if (xen_detect() & XEN_DOM0) /* Kernel not entity mapped under Xen dom0 */ return 0; if (parse_iomem_single("Kernel code\n", &start, NULL) == 0) { diff -Npru kexec-tools-2.0.3.orig/kexec/arch/x86_64/Makefile kexec-tools-2.0.3/kexec/arch/x86_64/Makefile --- kexec-tools-2.0.3.orig/kexec/arch/x86_64/Makefile 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/arch/x86_64/Makefile 2012-05-22 11:13:18.000000000 +0200 @@ -6,12 +6,17 @@ x86_64_KEXEC_SRCS += kexec/arch/i386/kex x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-multiboot-x86.c x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-beoboot-x86.c x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-nbi.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-x86-xen-common.c +x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-xen-pv.c x86_64_KEXEC_SRCS += kexec/arch/i386/x86-linux-setup.c x86_64_KEXEC_SRCS += kexec/arch/i386/kexec-x86-common.c x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c +x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86-xen.c x86_64_KEXEC_SRCS += kexec/arch/x86_64/kexec-x86_64.c x86_64_KEXEC_SRCS += kexec/arch/x86_64/kexec-elf-x86_64.c x86_64_KEXEC_SRCS += kexec/arch/x86_64/kexec-elf-rel-x86_64.c +x86_64_KEXEC_SRCS += kexec/arch/x86_64/x86_64-xen-pv.c +x86_64_KEXEC_SRCS += kexec/arch/x86_64/x86_64-xen-pv-kernel-bootstrap.S dist += kexec/arch/x86_64/Makefile $(x86_64_KEXEC_SRCS) \ kexec/arch/x86_64/kexec-x86_64.h \ diff -Npru kexec-tools-2.0.3.orig/kexec/arch/x86_64/kexec-x86_64.c kexec-tools-2.0.3/kexec/arch/x86_64/kexec-x86_64.c --- kexec-tools-2.0.3.orig/kexec/arch/x86_64/kexec-x86_64.c 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/arch/x86_64/kexec-x86_64.c 2012-05-21 20:15:06.000000000 +0200 @@ -29,10 +29,14 @@ #include "../../kexec-elf.h" #include "../../kexec-syscall.h" #include "kexec-x86_64.h" +#include "../i386/kexec-x86-xen.h" #include "../i386/crashdump-x86.h" #include struct file_type file_type[] = { +#ifdef HAVE_LIBXENCTRL + { "xen-pv", xen_pv_probe, xen_pv_load, xen_pv_usage }, +#endif { "elf-x86_64", elf_x86_64_probe, elf_x86_64_load, elf_x86_64_usage }, { "multiboot-x86", multiboot_x86_probe, multiboot_x86_load, multiboot_x86_usage }, diff -Npru kexec-tools-2.0.3.orig/kexec/arch/x86_64/x86_64-xen-pv-kernel-bootstrap.S kexec-tools-2.0.3/kexec/arch/x86_64/x86_64-xen-pv-kernel-bootstrap.S --- kexec-tools-2.0.3.orig/kexec/arch/x86_64/x86_64-xen-pv-kernel-bootstrap.S 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/kexec/arch/x86_64/x86_64-xen-pv-kernel-bootstrap.S 2012-05-22 12:45:18.000000000 +0200 @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2011-2012 Acunu Limited + * + * kexec/kdump implementation for Xen domU guests was written by Daniel Kiper. + * + * Some ideas are taken from: + * - native kexec/kdump implementation, + * - kexec/kdump implementation for Xen Linux Kernel Ver. 2.6.18, + * - PV-GRUB. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" + +#ifdef HAVE_LIBXENCTRL + +#define __ASSEMBLY__ + +#include + +#include "../i386/kexec-x86-xen.h" + +#ifdef UVMF_INVLPG +#undef UVMF_INVLPG +#endif + +#define DOMID_SELF 0x7ff0 + +#define UVMF_INVLPG 2 + +#define VCPUOP_down 2 +#define VCPUOP_is_up 3 + +#define XPKB_TRANSITION 1 +#define XPKB_BOOTSTRAP 2 + + /* + * This code must be in .data section because it is updated + * by xen-pv loader (.text section is read only). However, + * it is never executed in place. It is copied by xen-pv loader + * to its destination and later called after purgatory code. + */ + + .data + .code64 + .globl transition_pgtable_uvm, transition_pgtable_mfn, bootstrap_pgtable_mfn + .globl bootstrap_stack_vaddr, xen_pv_kernel_entry_vaddr, start_info_vaddr + .globl xen_pv_kernel_bootstrap, xen_pv_kernel_bootstrap_size + +xen_pv_kernel_bootstrap: + testq %rax, %rax + jnz 0f + + leaq xen_pv_kexec_halt(%rip), %rax + jmpq *%rax + +0: + /* Is everybody at entry stage? */ + cmpl %r15d, xpkh_stage_cpus(%rip) + jne 0b + + /* Reset stage counter. */ + movl $0, xpkh_stage_cpus(%rip) + + /* Unmap transition page table pages. */ + leaq transition_pgtable_uvm(%rip), %rdi + movq $TRANSITION_PGTABLE_SIZE, %rsi + movq $__HYPERVISOR_multicall, %rax + syscall + testq %rax, %rax + jz 0f + ud2a + +0: + /* Store transition page table MFN. */ + movq transition_pgtable_mfn(%rip), %rax + movq %rax, mmuext_new_baseptr(%rip) + movq %rax, mmuext_new_user_baseptr(%rip) + + /* Switch to transition page table. */ + leaq mmuext_args(%rip), %rdi + movq $2, %rsi + xorq %rdx, %rdx + movq $DOMID_SELF, %r10 + movq $__HYPERVISOR_mmuext_op, %rax + syscall + testq %rax, %rax + jz 0f + ud2a + +0: + /* Go to virtual address. */ + movq bootstrap_stack_vaddr(%rip), %rax + addq $(0f - xen_pv_kernel_bootstrap), %rax + jmpq *%rax + +0: + sfence + + /* Signal that we are at transition stage. */ + lock incb xpkb_stage(%rip) + +0: + /* Is everybody at transition stage? */ + cmpl %r15d, xpkh_stage_cpus(%rip) + jne 0b + + /* Reset stage counter. */ + movl $0, xpkh_stage_cpus(%rip) + + /* Setup bootstrap stack. */ + movq bootstrap_stack_vaddr(%rip), %rsp + addq $XP_PAGE_SIZE, %rsp + + /* Store bootstrap page table MFN. */ + movq bootstrap_pgtable_mfn(%rip), %rax + movq %rax, mmuext_new_baseptr(%rip) + movq %rax, mmuext_new_user_baseptr(%rip) + movq %rax, mmuext_pin_l4_table(%rip) + + /* Switch to bootstrap page table. */ + leaq mmuext_args(%rip), %rdi + movq $3, %rsi + xorq %rdx, %rdx + movq $DOMID_SELF, %r10 + movq $__HYPERVISOR_mmuext_op, %rax + syscall + testq %rax, %rax + jz 0f + ud2a + +0: + sfence + + /* Signal that we are at bootstrap stage. */ + lock incb xpkb_stage(%rip) + +0: + cmpl %r14d, %r15d + je 2f + +1: + /* CPU is up? */ + movq $VCPUOP_is_up, %rdi + movq %r15, %rsi + xorq %rdx, %rdx + movq $__HYPERVISOR_vcpu_op, %rax + syscall + testq %rax, %rax + jnz 1b + +2: + testl %r15d, %r15d + jz 3f + + decl %r15d + jmp 0b + +3: + /* Set unused registers to zero. */ + xorq %rax, %rax + xorq %rbx, %rbx + xorq %rcx, %rcx + xorq %rdx, %rdx + xorq %rdi, %rdi + xorq %rbp, %rbp + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 + + /* Load start info address into %rsi. */ + movq start_info_vaddr(%rip), %rsi + + /* Jump into new kernel... */ + pushq xen_pv_kernel_entry_vaddr(%rip) + retq + +xen_pv_kexec_halt: + /* Signal that we are at entry stage. */ + lock incl xpkh_stage_cpus(%rip) + +0: + /* Is xen_pv_kernel_bootstrap() at transition stage? */ + cmpb $XPKB_TRANSITION, xpkb_stage(%rip) + jne 0b + + /* Switch to transition page table. */ + leaq mmuext_args(%rip), %rdi + movq $2, %rsi + xorq %rdx, %rdx + movq $DOMID_SELF, %r10 + movq $__HYPERVISOR_mmuext_op, %rax + syscall + testq %rax, %rax + jz 0f + ud2a + +0: + /* Go to virtual address. */ + movq bootstrap_stack_vaddr(%rip), %rax + addq $(0f - xen_pv_kernel_bootstrap), %rax + jmpq *%rax + +0: + /* Signal that we are at transition stage. */ + lock incl xpkh_stage_cpus(%rip) + +0: + /* Is xen_pv_kernel_bootstrap() at bootstrap stage? */ + cmpb $XPKB_BOOTSTRAP, xpkb_stage(%rip) + jne 0b + + /* Switch to bootstrap page table. */ + leaq mmuext_args(%rip), %rdi + movq $2, %rsi + xorq %rdx, %rdx + movq $DOMID_SELF, %r10 + movq $__HYPERVISOR_mmuext_op, %rax + syscall + testq %rax, %rax + jz 0f + ud2a + +0: + /* Stop this CPU. */ + movq $VCPUOP_down, %rdi + movq %r14, %rsi + xorq %rdx, %rdx + movq $__HYPERVISOR_vcpu_op, %rax + syscall + ud2a + +transition_pgtable_uvm: + .rept TRANSITION_PGTABLE_SIZE + .quad __HYPERVISOR_update_va_mapping + .fill 3, 8, 0 + .quad UVMF_INVLPG + .fill 3, 8, 0 + .endr + +transition_pgtable_mfn: + .quad 0 /* MFN of transition page table directory. */ + +bootstrap_pgtable_mfn: + .quad 0 /* MFN of bootstrap page table directory. */ + +bootstrap_stack_vaddr: + .quad 0 /* VIRTUAL address of bootstrap stack. */ + +xen_pv_kernel_entry_vaddr: + .quad 0 /* VIRTUAL address of kernel entry point. */ + +start_info_vaddr: + .quad 0 /* VIRTUAL address of start info. */ + +mmuext_args: + .long MMUEXT_NEW_BASEPTR /* Operation. */ + .long 0 /* PAD. */ + +mmuext_new_baseptr: + .quad 0 /* MFN of target page table directory. */ + .quad 0 /* UNUSED. */ + + .long MMUEXT_NEW_USER_BASEPTR /* Operation. */ + .long 0 /* PAD. */ + +mmuext_new_user_baseptr: + .quad 0 /* MFN of user target page table directory. */ + .quad 0 /* UNUSED. */ + + .long MMUEXT_PIN_L4_TABLE /* Operation. */ + .long 0 /* PAD. */ + +mmuext_pin_l4_table: + .quad 0 /* MFN of page table directory to pin. */ + .quad 0 /* UNUSED. */ + + .align 4 + +xpkh_stage_cpus: + .long 0 /* Number of CPUs at given stage. */ + +xpkb_stage: + .byte 0 /* xen_pv_kernel_bootstrap() stage. */ + +xen_pv_kernel_bootstrap_size: + .quad . - xen_pv_kernel_bootstrap /* Bootstrap size. */ +#endif /* HAVE_LIBXENCTRL */ diff -Npru kexec-tools-2.0.3.orig/kexec/arch/x86_64/x86_64-xen-pv.c kexec-tools-2.0.3/kexec/arch/x86_64/x86_64-xen-pv.c --- kexec-tools-2.0.3.orig/kexec/arch/x86_64/x86_64-xen-pv.c 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/kexec/arch/x86_64/x86_64-xen-pv.c 2012-05-22 12:45:25.000000000 +0200 @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2011-2012 Acunu Limited + * + * kexec/kdump implementation for Xen domU guests was written by Daniel Kiper. + * + * Some ideas are taken from: + * - native kexec/kdump implementation, + * - kexec/kdump implementation for Xen Linux Kernel Ver. 2.6.18, + * - PV-GRUB. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" + +#ifdef HAVE_LIBXENCTRL + +#include +#include +#include + +#include "../../kexec.h" +#include "../../kexec-elf.h" +#include "../i386/kexec-x86-xen.h" + +#define PGDIR_SHIFT 39 +#define PUD_SHIFT 30 +#define PMD_SHIFT 21 + +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PMD_SIZE (1UL << PMD_SHIFT) + +#define PTRS_PER_PGD 512 +#define PTRS_PER_PUD 512 +#define PTRS_PER_PMD 512 +#define PTRS_PER_PTE 512 + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_rw (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED) +#define _PAGE_ro (_PAGE_PRESENT | _PAGE_ACCESSED) + +#define pgd_index(vaddr) (((vaddr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) +#define pud_index(vaddr) (((vaddr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pmd_index(vaddr) (((vaddr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +#define pte_index(vaddr) (((vaddr) >> XP_PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + +static void init_level1_page(struct xen_elf_notes *xen_elf_notes, + struct start_info *si_new, + unsigned long *ptp, + unsigned long *p2m, + unsigned long vaddr, + unsigned long end_vaddr) +{ + unsigned long pfn, pgprot, pt_end, pt_start, *pte; + + pfn = XP_PFN_DOWN(vaddr - xen_elf_notes->virt_base); + + pt_start = si_new->pt_base; + pt_end = pt_start + si_new->nr_pt_frames * XP_PAGE_SIZE; + + pte = &ptp[pte_index(vaddr)]; + + while (vaddr < end_vaddr) { + pgprot = (vaddr >= pt_start && vaddr < pt_end) ? _PAGE_ro : _PAGE_rw; + *pte++ = XP_PFN_PHYS(p2m[pfn++]) | pgprot; + + if (pte_index(vaddr) == PTRS_PER_PTE - 1) + break; + + vaddr += XP_PAGE_SIZE; + } +} + +static void init_level2_page(struct xen_elf_notes *xen_elf_notes, + struct start_info *si_new, + unsigned long **ptp, + unsigned long *ptp_pfn, + unsigned long *p2m, + unsigned long vaddr, + unsigned long end_vaddr) +{ + unsigned long *pmd; + + pmd = &(*ptp)[pmd_index(vaddr)]; + + while (vaddr < end_vaddr) { + *ptp += PTRS_PER_PMD; + ++*ptp_pfn; + + *pmd++ = XP_PFN_PHYS(p2m[*ptp_pfn]) | _PAGE_TABLE; + + init_level1_page(xen_elf_notes, si_new, *ptp, + p2m, vaddr, end_vaddr); + + if (pmd_index(vaddr) == PTRS_PER_PMD - 1) + break; + + vaddr += PMD_SIZE; + } +} + +static void init_level3_page(struct xen_elf_notes *xen_elf_notes, + struct start_info *si_new, + unsigned long **ptp, + unsigned long *ptp_pfn, + unsigned long *p2m, + unsigned long vaddr, + unsigned long end_vaddr) +{ + unsigned long *pud; + + pud = &(*ptp)[pud_index(vaddr)]; + + while (vaddr < end_vaddr) { + *ptp += PTRS_PER_PUD; + ++*ptp_pfn; + + *pud++ = XP_PFN_PHYS(p2m[*ptp_pfn]) | _PAGE_TABLE; + + init_level2_page(xen_elf_notes, si_new, ptp, + ptp_pfn, p2m, vaddr, end_vaddr); + + if (pud_index(vaddr) == PTRS_PER_PUD - 1) + break; + + vaddr += PUD_SIZE; + } +} + +static void init_level4_page(struct xen_elf_notes *xen_elf_notes, + struct start_info *si_new, + unsigned long *ptp, + unsigned long *p2m, + unsigned long end_vaddr) +{ + unsigned long *pgd, ptp_pfn, vaddr; + + vaddr = xen_elf_notes->virt_base; + + ptp_pfn = XP_PFN_DOWN(si_new->pt_base - xen_elf_notes->virt_base); + pgd = &ptp[pgd_index(vaddr)]; + + while (vaddr < end_vaddr) { + ptp += PTRS_PER_PGD; + ++ptp_pfn; + + *pgd++ = XP_PFN_PHYS(p2m[ptp_pfn]) | _PAGE_TABLE; + + init_level3_page(xen_elf_notes, si_new, &ptp, + &ptp_pfn, p2m, vaddr, end_vaddr); + + if (pgd_index(vaddr) == PTRS_PER_PGD - 1) + break; + + vaddr += PGDIR_SIZE; + } +} + +unsigned long build_bootstrap_pgtable(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes, + struct start_info *si_new, int p2m_seg) +{ + unsigned long end_vaddr, *p2m, pt_paddr, pt_size, *ptp, try_vaddr = 0; + + pt_paddr = get_next_paddr(info); + + si_new->pt_base = xen_elf_notes->virt_base + pt_paddr; + + /* Minimal number of frames required to establish valid page table for x86_64. */ + si_new->nr_pt_frames = 4; + + do { + end_vaddr = try_vaddr; + + /* + * try_vaddr = round_up(pt_base + nr_pt_frames * XP_PAGE_SIZE + + * size_of_bootstrap_stack + 512 KiB, 4 MiB); + */ + try_vaddr = round_up(si_new->pt_base + + si_new->nr_pt_frames * XP_PAGE_SIZE + + XP_PAGE_SIZE + 0x80000, 0x400000); + try_vaddr -= XP_PAGE_SIZE; + + /* 1 frame for PGD. */ + si_new->nr_pt_frames = 1; + + /* X frames for PUDs. */ + si_new->nr_pt_frames += + ((try_vaddr - xen_elf_notes->virt_base) >> + PGDIR_SHIFT) + 1; + + /* Y frames for PMDs. */ + si_new->nr_pt_frames += + ((try_vaddr - xen_elf_notes->virt_base) >> + PUD_SHIFT) + 1; + + /* Z frames for PTEs. */ + si_new->nr_pt_frames += + ((try_vaddr - xen_elf_notes->virt_base) >> + PMD_SHIFT) + 1; + } while (end_vaddr != try_vaddr); + + end_vaddr += XP_PAGE_SIZE; + + p2m = (unsigned long *)info->segment[p2m_seg].buf; + + pt_size = si_new->nr_pt_frames * XP_PAGE_SIZE; + ptp = xmalloc(pt_size); + memset(ptp, 0, pt_size); + + init_level4_page(xen_elf_notes, si_new, ptp, p2m, end_vaddr); + + add_buffer(info, ptp, pt_size, pt_size, XP_PAGE_SIZE, + pt_paddr, pt_paddr + pt_size, 1); + + return end_vaddr - xen_elf_notes->virt_base; +} + +void build_transition_pgtable(struct kexec_info *info, + struct xen_elf_notes *xen_elf_notes, + int p2m_seg, int bs_seg) +{ + unsigned long bs_addr, bs_maddr, *p2m, *pgd, pt_paddr; + unsigned long pt_size, *ptp, ptp_pfn; + + p2m = (unsigned long *)info->segment[p2m_seg].buf; + + bs_addr = (unsigned long)info->segment[bs_seg].mem; + bs_maddr = XP_PFN_PHYS(p2m[XP_PFN_DOWN(bs_addr)]); + + pt_paddr = get_next_paddr(info); + + /* + * We need following number of pages to establish + * valid transition page table: + * - 1 page for 1 PGD, + * - 2 pages for 2 PUDs, + * - 2 pages for 2 PMDs, + * - 2 pages for 2 PTEs. + * + * Sum of above equals 7... + */ + + pt_size = TRANSITION_PGTABLE_SIZE * XP_PAGE_SIZE; + ptp = xmalloc(pt_size); + memset(ptp, 0, pt_size); + + pgd = ptp; + ptp_pfn = XP_PFN_DOWN(pt_paddr); + + pgd[pgd_index(bs_addr)] = XP_PFN_PHYS(p2m[++ptp_pfn]) | _PAGE_TABLE; + ptp += PTRS_PER_PGD; + + ptp[pud_index(bs_addr)] = XP_PFN_PHYS(p2m[++ptp_pfn]) | _PAGE_TABLE; + ptp += PTRS_PER_PUD; + + ptp[pmd_index(bs_addr)] = XP_PFN_PHYS(p2m[++ptp_pfn]) | _PAGE_TABLE; + ptp += PTRS_PER_PMD; + + ptp[pte_index(bs_addr)] = bs_maddr | _PAGE_rw; + ptp += PTRS_PER_PTE; + + bs_addr += xen_elf_notes->virt_base; + + pgd[pgd_index(bs_addr)] = XP_PFN_PHYS(p2m[++ptp_pfn]) | _PAGE_TABLE; + + ptp[pud_index(bs_addr)] = XP_PFN_PHYS(p2m[++ptp_pfn]) | _PAGE_TABLE; + ptp += PTRS_PER_PUD; + + ptp[pmd_index(bs_addr)] = XP_PFN_PHYS(p2m[++ptp_pfn]) | _PAGE_TABLE; + ptp += PTRS_PER_PMD; + + ptp[pte_index(bs_addr)] = bs_maddr | _PAGE_rw; + + add_buffer(info, pgd, pt_size, pt_size, XP_PAGE_SIZE, + pt_paddr, pt_paddr + pt_size, 1); +} +#endif /* HAVE_LIBXENCTRL */ diff -Npru kexec-tools-2.0.3.orig/kexec/crashdump-elf.c kexec-tools-2.0.3/kexec/crashdump-elf.c --- kexec-tools-2.0.3.orig/kexec/crashdump-elf.c 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/crashdump-elf.c 2012-04-27 14:57:47.000000000 +0200 @@ -44,7 +44,7 @@ int FUNC(struct kexec_info *info, int has_vmcoreinfo_xen = 0; int (*get_note_info)(int cpu, uint64_t *addr, uint64_t *len); - if (xen_present()) + if (xen_detect() & XEN_DOM0) nr_cpus = xen_get_nr_phys_cpus(); else nr_cpus = sysconf(_SC_NPROCESSORS_CONF); @@ -57,10 +57,9 @@ int FUNC(struct kexec_info *info, has_vmcoreinfo = 1; } - if (xen_present() && - get_xen_vmcoreinfo(&vmcoreinfo_addr_xen, &vmcoreinfo_len_xen) == 0) { + if ((xen_detect() & XEN_DOM0) && + get_xen_vmcoreinfo(&vmcoreinfo_addr_xen, &vmcoreinfo_len_xen) == 0) has_vmcoreinfo_xen = 1; - } sz = sizeof(EHDR) + (nr_cpus + has_vmcoreinfo + has_vmcoreinfo_xen) * sizeof(PHDR) + ranges * sizeof(PHDR); @@ -85,9 +84,8 @@ int FUNC(struct kexec_info *info, * PT_LOAD program header and in the physical RAM program headers. */ - if (elf_info->kern_size && !xen_present()) { + if (elf_info->kern_size && !(xen_detect() & XEN_DOM0)) sz += sizeof(PHDR); - } /* * Make sure the ELF core header is aligned to at least 1024. @@ -138,7 +136,7 @@ int FUNC(struct kexec_info *info, if (!get_note_info) get_note_info = get_crash_notes_per_cpu; - if (xen_present()) + if (xen_detect() & XEN_DOM0) get_note_info = xen_get_note; /* PT_NOTE program headers. One per cpu */ @@ -198,7 +196,7 @@ int FUNC(struct kexec_info *info, * Kernel is mapped if elf_info->kern_size is non-zero. */ - if (elf_info->kern_size && !xen_present()) { + if (elf_info->kern_size && !(xen_detect() & XEN_DOM0)) { phdr = (PHDR *) bufp; bufp += sizeof(PHDR); phdr->p_type = PT_LOAD; diff -Npru kexec-tools-2.0.3.orig/kexec/crashdump-xen.c kexec-tools-2.0.3/kexec/crashdump-xen.c --- kexec-tools-2.0.3.orig/kexec/crashdump-xen.c 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/crashdump-xen.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,225 +0,0 @@ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "kexec.h" -#include "crashdump.h" -#include "kexec-syscall.h" - -#include "config.h" - -#ifdef HAVE_LIBXENCTRL -#include -#endif - -struct crash_note_info { - unsigned long base; - unsigned long length; -}; - -static int xen_phys_cpus; -static struct crash_note_info *xen_phys_notes; - -/* based on code from xen-detect.c */ -static int is_dom0; -#if defined(__i386__) || defined(__x86_64__) -static jmp_buf xen_sigill_jmp; -void xen_sigill_handler(int sig) -{ - longjmp(xen_sigill_jmp, 1); -} - -static void xen_cpuid(uint32_t idx, uint32_t *regs, int pv_context) -{ - asm volatile ( -#ifdef __i386__ -#define R(x) "%%e"#x"x" -#else -#define R(x) "%%r"#x"x" -#endif - "push "R(a)"; push "R(b)"; push "R(c)"; push "R(d)"\n\t" - "test %1,%1 ; jz 1f ; ud2a ; .ascii \"xen\" ; 1: cpuid\n\t" - "mov %%eax,(%2); mov %%ebx,4(%2)\n\t" - "mov %%ecx,8(%2); mov %%edx,12(%2)\n\t" - "pop "R(d)"; pop "R(c)"; pop "R(b)"; pop "R(a)"\n\t" - : : "a" (idx), "c" (pv_context), "S" (regs) : "memory" ); -} - -static int check_for_xen(int pv_context) -{ - uint32_t regs[4]; - char signature[13]; - uint32_t base; - - for (base = 0x40000000; base < 0x40010000; base += 0x100) - { - xen_cpuid(base, regs, pv_context); - - *(uint32_t *)(signature + 0) = regs[1]; - *(uint32_t *)(signature + 4) = regs[2]; - *(uint32_t *)(signature + 8) = regs[3]; - signature[12] = '\0'; - - if (strcmp("XenVMMXenVMM", signature) == 0 && regs[0] >= (base + 2)) - goto found; - } - - return 0; - -found: - xen_cpuid(base + 1, regs, pv_context); - return regs[0]; -} - -static int xen_detect_pv_guest(void) -{ - struct sigaction act, oldact; - int is_pv = -1; - - if (setjmp(xen_sigill_jmp)) - return is_pv; - - memset(&act, 0, sizeof(act)); - act.sa_handler = xen_sigill_handler; - sigemptyset (&act.sa_mask); - if (sigaction(SIGILL, &act, &oldact)) - return is_pv; - if (check_for_xen(1)) - is_pv = 1; - sigaction(SIGILL, &oldact, NULL); - return is_pv; -} -#else -static int xen_detect_pv_guest(void) -{ - return 1; -} -#endif - -/* - * Return 1 if its a PV guest. - * This includes dom0, which is the only PV guest where kexec/kdump works. - * HVM guests have to be handled as native hardware. - */ -int xen_present(void) -{ - if (!is_dom0) { - if (access("/proc/xen", F_OK) == 0) - is_dom0 = xen_detect_pv_guest(); - else - is_dom0 = -1; - } - return is_dom0 > 0; -} - -unsigned long xen_architecture(struct crash_elf_info *elf_info) -{ - unsigned long machine = elf_info->machine; -#ifdef HAVE_LIBXENCTRL - int rc; - xen_capabilities_info_t capabilities; -#ifdef XENCTRL_HAS_XC_INTERFACE - xc_interface *xc; -#else - int xc; -#endif - - if (!xen_present()) - goto out; - - memset(capabilities, '0', XEN_CAPABILITIES_INFO_LEN); - -#ifdef XENCTRL_HAS_XC_INTERFACE - xc = xc_interface_open(NULL, NULL, 0); - if ( !xc ) { - fprintf(stderr, "failed to open xen control interface.\n"); - goto out; - } -#else - xc = xc_interface_open(); - if ( xc == -1 ) { - fprintf(stderr, "failed to open xen control interface.\n"); - goto out; - } -#endif - - rc = xc_version(xc, XENVER_capabilities, &capabilities[0]); - if ( rc == -1 ) { - fprintf(stderr, "failed to make Xen version hypercall.\n"); - goto out_close; - } - - if (strstr(capabilities, "xen-3.0-x86_64")) - machine = EM_X86_64; - else if (strstr(capabilities, "xen-3.0-x86_32")) - machine = EM_386; - - out_close: - xc_interface_close(xc); - - out: -#endif - return machine; -} - -static int xen_crash_note_callback(void *UNUSED(data), int nr, - char *UNUSED(str), - unsigned long base, - unsigned long length) -{ - struct crash_note_info *note = xen_phys_notes + nr; - - note->base = base; - note->length = length; - - return 0; -} - -int xen_get_nr_phys_cpus(void) -{ - char *match = "Crash note\n"; - int cpus, n; - - if (xen_phys_cpus) - return xen_phys_cpus; - - if ((cpus = kexec_iomem_for_each_line(match, NULL, NULL))) { - n = sizeof(struct crash_note_info) * cpus; - xen_phys_notes = malloc(n); - if (!xen_phys_notes) { - fprintf(stderr, "failed to allocate xen_phys_notes.\n"); - return -1; - } - memset(xen_phys_notes, 0, n); - kexec_iomem_for_each_line(match, - xen_crash_note_callback, NULL); - xen_phys_cpus = cpus; - } - - return cpus; -} - -int xen_get_note(int cpu, uint64_t *addr, uint64_t *len) -{ - struct crash_note_info *note; - - if (xen_phys_cpus <= 0) - return -1; - - note = xen_phys_notes + cpu; - - *addr = note->base; - *len = note->length; - - return 0; -} diff -Npru kexec-tools-2.0.3.orig/kexec/crashdump.c kexec-tools-2.0.3/kexec/crashdump.c --- kexec-tools-2.0.3.orig/kexec/crashdump.c 2012-01-09 23:39:39.000000000 +0100 +++ kexec-tools-2.0.3/kexec/crashdump.c 2012-04-27 15:01:53.000000000 +0200 @@ -30,6 +30,7 @@ #include "kexec.h" #include "crashdump.h" #include "kexec-syscall.h" +#include "kexec-xen.h" /* include "crashdump-elf.c" twice to create two functions from one */ @@ -55,7 +56,7 @@ unsigned long crash_architecture(struct crash_elf_info *elf_info) { - if (xen_present()) + if (xen_detect() & XEN_DOM0) return xen_architecture(elf_info); else return elf_info->machine; diff -Npru kexec-tools-2.0.3.orig/kexec/crashdump.h kexec-tools-2.0.3/kexec/crashdump.h --- kexec-tools-2.0.3.orig/kexec/crashdump.h 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/crashdump.h 2012-03-02 22:49:30.000000000 +0100 @@ -56,9 +56,25 @@ unsigned long crash_architecture(struct unsigned long phys_to_virt(struct crash_elf_info *elf_info, unsigned long paddr); -int xen_present(void); -unsigned long xen_architecture(struct crash_elf_info *elf_info); -int xen_get_nr_phys_cpus(void); -int xen_get_note(int cpu, uint64_t *addr, uint64_t *len); +#ifdef HAVE_LIBXENCTRL +extern unsigned long xen_architecture(struct crash_elf_info *elf_info); +extern int xen_get_nr_phys_cpus(void); +extern int xen_get_note(int cpu, uint64_t *addr, uint64_t *len); +#else +static inline unsigned long xen_architecture(struct crash_elf_info *elf_info) +{ + return 0; +} + +static inline int xen_get_nr_phys_cpus(void) +{ + return 0; +} + +static inline int xen_get_note(int cpu, uint64_t *addr, uint64_t *len) +{ + return 0; +} +#endif /* HAVE_LIBXENCTRL */ #endif /* CRASHDUMP_H */ diff -Npru kexec-tools-2.0.3.orig/kexec/kexec-elf.c kexec-tools-2.0.3/kexec/kexec-elf.c --- kexec-tools-2.0.3.orig/kexec/kexec-elf.c 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/kexec/kexec-elf.c 2012-03-10 15:28:04.000000000 +0100 @@ -667,50 +667,27 @@ static void read_nhdr(const struct mem_e hdr->n_type = elf32_to_cpu(ehdr, hdr->n_type); } -static int build_mem_notes(struct mem_ehdr *ehdr) + +static void read_notes(struct mem_ehdr *ehdr, const unsigned char *note_start, + const unsigned char *note_end) { - const unsigned char *note_start, *note_end, *note; - size_t note_size, i; - /* First find the note segment or section */ - note_start = note_end = NULL; - for(i = 0; !note_start && (i < ehdr->e_phnum); i++) { - struct mem_phdr *phdr = &ehdr->e_phdr[i]; - /* - * binutils <= 2.17 has a bug where it can create the - * PT_NOTE segment with an offset of 0. Therefore - * check p_offset > 0. - * - * See: http://sourceware.org/bugzilla/show_bug.cgi?id=594 - */ - if (phdr->p_type == PT_NOTE && phdr->p_offset) { - note_start = (unsigned char *)phdr->p_data; - note_end = note_start + phdr->p_filesz; - } - } - for(i = 0; !note_start && (i < ehdr->e_shnum); i++) { - struct mem_shdr *shdr = &ehdr->e_shdr[i]; - if (shdr->sh_type == SHT_NOTE) { - note_start = shdr->sh_data; - note_end = note_start + shdr->sh_size; - } - } - if (!note_start) { - return 0; - } + const unsigned char *note; + size_t i = ehdr->e_notenum, note_size; /* Walk through and count the notes */ - ehdr->e_notenum = 0; for(note = note_start; note < note_end; note+= note_size) { ElfNN_Nhdr hdr; read_nhdr(ehdr, &hdr, note); note_size = sizeof(hdr); note_size += (hdr.n_namesz + 3) & ~3; note_size += (hdr.n_descsz + 3) & ~3; - ehdr->e_notenum += 1; + ++ehdr->e_notenum; } + + ehdr->e_note = xrealloc(ehdr->e_note, sizeof(*ehdr->e_note) * ehdr->e_notenum); + /* Now walk and normalize the notes */ - ehdr->e_note = xmalloc(sizeof(*ehdr->e_note) * ehdr->e_notenum); - for(i = 0, note = note_start; note < note_end; note+= note_size, i++) { + for(note = note_start; note < note_end; note+= note_size, i++) { const unsigned char *name, *desc; ElfNN_Nhdr hdr; read_nhdr(ehdr, &hdr, note); @@ -734,8 +711,46 @@ static int build_mem_notes(struct mem_eh ehdr->e_note[i].n_name = (char *)name; ehdr->e_note[i].n_desc = desc; ehdr->e_note[i].n_descsz = hdr.n_descsz; + } +} +static int build_mem_notes(struct mem_ehdr *ehdr) +{ + const unsigned char *note_start = NULL, *note_end; + size_t i; + + ehdr->e_note = NULL; + ehdr->e_notenum = 0; + + /* Find the note segment or section */ + for(i = 0; i < ehdr->e_phnum; i++) { + struct mem_phdr *phdr = &ehdr->e_phdr[i]; + /* + * binutils <= 2.17 has a bug where it can create the + * PT_NOTE segment with an offset of 0. Therefore + * check p_offset > 0. + * + * See: http://sourceware.org/bugzilla/show_bug.cgi?id=594 + */ + if (phdr->p_type == PT_NOTE && phdr->p_offset) { + note_start = (unsigned char *)phdr->p_data; + note_end = note_start + phdr->p_filesz; + read_notes(ehdr, note_start, note_end); + } } + + if (note_start) + return 0; + + for(i = 0; i < ehdr->e_shnum; i++) { + struct mem_shdr *shdr = &ehdr->e_shdr[i]; + if (shdr->sh_type == SHT_NOTE) { + note_start = shdr->sh_data; + note_end = note_start + shdr->sh_size; + read_notes(ehdr, note_start, note_end); + } + } + return 0; } diff -Npru kexec-tools-2.0.3.orig/kexec/kexec-xen.h kexec-tools-2.0.3/kexec/kexec-xen.h --- kexec-tools-2.0.3.orig/kexec/kexec-xen.h 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/kexec/kexec-xen.h 2012-05-22 12:43:10.000000000 +0200 @@ -0,0 +1,20 @@ +#ifndef __KEXEC_XEN_H__ +#define __KEXEC_XEN_H__ + +#include "config.h" + +#define XEN_NOT_YET_DETECTED -1 +#define XEN_NONE 0 +#define XEN_DOM0 (1 << 0) +#define XEN_PV (1 << 1) +#define XEN_HVM (1 << 2) + +#ifdef HAVE_LIBXENCTRL +extern int xen_detect(void); +#else +static inline int xen_detect(void) +{ + return XEN_NONE; +} +#endif /* HAVE_LIBXENCTRL */ +#endif /* __KEXEC_XEN_H__ */ diff -Npru kexec-tools-2.0.3.orig/kexec/kexec.c kexec-tools-2.0.3/kexec/kexec.c --- kexec-tools-2.0.3.orig/kexec/kexec.c 2011-11-09 01:34:30.000000000 +0100 +++ kexec-tools-2.0.3/kexec/kexec.c 2012-05-12 18:20:51.000000000 +0200 @@ -614,6 +614,12 @@ static void update_purgatory(struct kexe if (info->segment[i].mem == (void *)info->rhdr.rel_addr) { continue; } + /* + * We do not care about contents of reserved + * but not initialized segments. + */ + if (!info->segment[i].buf) + continue; sha256_update(&ctx, info->segment[i].buf, info->segment[i].bufsz); nullsz = info->segment[i].memsz - info->segment[i].bufsz; @@ -747,7 +753,7 @@ static int my_load(const char *type, int update_purgatory(&info); if (entry) info.entry = entry; -#if 0 +#if DEBUG fprintf(stderr, "kexec_load: entry = %p flags = %lx\n", info.entry, info.kexec_flags); print_segments(stderr, &info); diff -Npru kexec-tools-2.0.3.orig/kexec/kexec.h kexec-tools-2.0.3/kexec/kexec.h --- kexec-tools-2.0.3.orig/kexec/kexec.h 2011-10-21 09:46:10.000000000 +0200 +++ kexec-tools-2.0.3/kexec/kexec.h 2012-03-27 20:33:51.000000000 +0200 @@ -94,6 +94,16 @@ do { \ } \ } while(0) +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + extern unsigned long long mem_min, mem_max; struct kexec_segment { diff -Npru kexec-tools-2.0.3.orig/purgatory/arch/i386/console-x86.c kexec-tools-2.0.3/purgatory/arch/i386/console-x86.c --- kexec-tools-2.0.3.orig/purgatory/arch/i386/console-x86.c 2010-07-29 11:22:16.000000000 +0200 +++ kexec-tools-2.0.3/purgatory/arch/i386/console-x86.c 2012-05-22 09:42:10.000000000 +0200 @@ -1,7 +1,14 @@ +#include "config.h" + #include #include #include +#ifdef HAVE_LIBXENCTRL +#include +#include +#endif + /* * VGA * ============================================================================= @@ -124,12 +131,74 @@ static void putchar_serial(int ch) serial_tx_byte(ch); } +#ifdef HAVE_LIBXENCTRL + +/* This code is based on Xen Mini-OS console implementation. */ + +uint8_t console_xen_pv = 0; +struct xencons_interface *console_xen_pv_if = NULL; +uint32_t console_xen_pv_evtchn = 0; + +#ifdef __i386__ +#define mb() asm volatile("lock addl $0, 0(%%esp)" : : : "memory") +#define wmb() asm volatile("" : : : "memory") +#else +#define mb() asm volatile("mfence" : : : "memory") +#define wmb() asm volatile("sfence" : : : "memory") +#endif + +static void xen_pv_send_char(int ch) +{ + XENCONS_RING_IDX cons, prod; + evtchn_send_t op; + + cons = console_xen_pv_if->out_cons; + prod = console_xen_pv_if->out_prod; + + mb(); + + /* Hmmm... Something is wrong with Xen PV console... */ + if ((prod - cons) > sizeof(console_xen_pv_if->out)) + return; + + console_xen_pv_if->out[MASK_XENCONS_IDX(prod++, console_xen_pv_if->out)] = ch; + + wmb(); + + console_xen_pv_if->out_prod = prod; + + op.port = console_xen_pv_evtchn; + +#ifdef __i386__ + asm("int $0x82" : : "a" (__HYPERVISOR_event_channel_op), + "b" (EVTCHNOP_send), "c" (&op) : "memory"); +#else + asm("syscall" : : "a" (__HYPERVISOR_event_channel_op), + "D" (EVTCHNOP_send), "S" (&op) : "rcx", "r11", "memory"); +#endif +} + +static void putchar_xen_pv(int ch) +{ + if (!console_xen_pv) + return; + + if (ch == '\n') + xen_pv_send_char('\r'); + + xen_pv_send_char(ch); +} +#else +static void putchar_xen_pv(int ch) +{ +} +#endif /* HAVE_LIBXENCTRL */ + /* Generic wrapper function */ void putchar(int ch) { putchar_vga(ch); putchar_serial(ch); + putchar_xen_pv(ch); } - - diff -Npru kexec-tools-2.0.3.orig/purgatory/arch/x86_64/Makefile kexec-tools-2.0.3/purgatory/arch/x86_64/Makefile --- kexec-tools-2.0.3.orig/purgatory/arch/x86_64/Makefile 2010-07-29 11:22:16.000000000 +0200 +++ kexec-tools-2.0.3/purgatory/arch/x86_64/Makefile 2012-05-21 19:59:53.000000000 +0200 @@ -5,6 +5,7 @@ x86_64_PURGATORY_SRCS_native = purgatory/arch/x86_64/entry64-32.S x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/entry64.S x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/setup-x86_64.S +x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/setup-x86_64-xen-pv.S x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/stack.S x86_64_PURGATORY_SRCS_native += purgatory/arch/x86_64/purgatory-x86_64.c diff -Npru kexec-tools-2.0.3.orig/purgatory/arch/x86_64/setup-x86_64-xen-pv.S kexec-tools-2.0.3/purgatory/arch/x86_64/setup-x86_64-xen-pv.S --- kexec-tools-2.0.3.orig/purgatory/arch/x86_64/setup-x86_64-xen-pv.S 1970-01-01 01:00:00.000000000 +0100 +++ kexec-tools-2.0.3/purgatory/arch/x86_64/setup-x86_64-xen-pv.S 2012-05-22 12:45:38.000000000 +0200 @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2011-2012 Acunu Limited + * + * kexec/kdump implementation for Xen domU guests was written by Daniel Kiper. + * + * Some ideas are taken from: + * - native kexec/kdump implementation, + * - kexec/kdump implementation for Xen Linux Kernel Ver. 2.6.18, + * - PV-GRUB. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" + +#ifdef HAVE_LIBXENCTRL + + .text + .code64 + .globl xen_pv_purgatory_start, bootstrap_stack_paddr + +xen_pv_purgatory_start: + testq %rax, %rax + jnz 1f + +0: + /* Is boot CPU ready? */ + cmpb $0, wait_for_boot_cpu + jnz 0b + + /* Go to bootstrap. */ + jmpq *bootstrap_stack_paddr + +1: + /* Setup a stack. */ + movq $lstack_end, %rsp + + pushq %r15 + pushq %r14 + pushq %rax + + call purgatory + + popq %rax + popq %r14 + popq %r15 + + /* Boot CPU is ready. */ + lock decb wait_for_boot_cpu + + /* Go to bootstrap. */ + jmpq *bootstrap_stack_paddr + +bootstrap_stack_paddr: + .quad 0 /* PHYSICAL address of bootstrap stack. */ + .size bootstrap_stack_paddr, . - bootstrap_stack_paddr + +wait_for_boot_cpu: + .byte 1 /* Wait for boot CPU. */ +#endif /* HAVE_LIBXENCTRL */ diff -Npru kexec-tools-2.0.3.orig/purgatory/arch/x86_64/setup-x86_64.S kexec-tools-2.0.3/purgatory/arch/x86_64/setup-x86_64.S --- kexec-tools-2.0.3.orig/purgatory/arch/x86_64/setup-x86_64.S 2011-10-03 00:56:38.000000000 +0200 +++ kexec-tools-2.0.3/purgatory/arch/x86_64/setup-x86_64.S 2012-04-13 17:56:19.000000000 +0200 @@ -23,7 +23,7 @@ #undef i386 .text - .globl purgatory_start + .globl purgatory_start, lstack_end .balign 16 purgatory_start: .code64