KEXEC: Add new hypercall to fix 64/32bit truncation errors. Currently, KEXEC_CMD_kexec_get_range in compat mode, or with 32bit Xen, will truncate 64bit pointers to 32bits, leading to problems on machines with more than 4GiB of RAM. As 32bit dom0 kernels tend to be PAE capable, this is especially wasteful, as most structures currently limited to <4GiB could easily be <64GiB instead. Therefore, introduce a new hypercall 'KEXEC_CMD_kexec64_get_range' which passes similar information as KEXEC_CMD_kexec_get_range, but which uses a structure with explicitly specified field widths, causing it to be identical in the compat and regular case. This new structure, xen_kexec64_range_t, will be the same as xen_kexec_range_t if Xen is compiled for x86_64, but will still use 64bit integers if Xen is compiled for x86_32. To fix 32bit Xen which uses 32bit intergers for addresses and sizes, change the internals to use xen_kexec64_range_t which will use 64bit integers instead. This also invovles changing several casts to explicitly use uint64_ts rather than unsigned longs. In addition, the hypercall entry points need updating to be able to cater for all possiblities. |Xen/dom0 bits| Bit width of addresses in structure for | +------+------+---------------------------+-----------------------------+ | Xen | dom0 | KEXEC_CMD_kexec_get_range | KEXEC_CMD_kexec64_get_range | +------+------+---------------------------+-----------------------------+ | 32 | 32 | 32 | 64 | | 64 | 32 | 32 | 64 | | 64 | 64 | 64 | 64 | +------+------+---------------------------+-----------------------------+ This has been solved by splitting do_kexec_op_internal back into do_kexec_op{,_compat} and changing kexec_get_range{,_compat} to kexec_get_range{32,64} which now exist irrispective of CONFIG_COMPAT or not. The knock-on effect of removing do_kexec_op_internal means that kexec_load_unload_compat is only ever called from inside an #ifdef CONFIG_COMPAT codepath, which does not exist on Xen x86_32. Therefore, move the #ifdef CONFIG_COMPAT guards to include the entire function. Finally, and a little unrelatedly, fix KEXEC_CMD_kexec_{load,unload} to return -EBUSY instead of EOK if a kexec is in progress. Signed-off-by: Andrew Cooper diff -r d960ea9b337d xen/arch/ia64/xen/machine_kexec.c --- a/xen/arch/ia64/xen/machine_kexec.c +++ b/xen/arch/ia64/xen/machine_kexec.c @@ -102,10 +102,10 @@ void machine_reboot_kexec(xen_kexec_imag machine_kexec(image); } -int machine_kexec_get_xen(xen_kexec_range_t *range) +int machine_kexec_get_xen(xen_kexec64_range_t *range) { range->start = ia64_tpa(_text); - range->size = (unsigned long)_end - (unsigned long)_text; + range->size = (uint64_t)_end - (uint64_t)_text; return 0; } @@ -113,31 +113,31 @@ int machine_kexec_get_xen(xen_kexec_rang #define ELF_PAGE_SIZE (__IA64_UL_CONST(1) << ELF_PAGE_SHIFT) #define ELF_PAGE_MASK (~(ELF_PAGE_SIZE - 1)) -static int machine_kexec_get_xenheap(xen_kexec_range_t *range) +static int machine_kexec_get_xenheap(xen_kexec64_range_t *range) { range->start = (ia64_tpa(_end) + (ELF_PAGE_SIZE - 1)) & ELF_PAGE_MASK; range->size = - (((unsigned long)range->start + KERNEL_TR_PAGE_SIZE) & + (((uint64_t)range->start + KERNEL_TR_PAGE_SIZE) & ~(KERNEL_TR_PAGE_SIZE - 1)) - - (unsigned long)range->start; + - (uint64_t)range->start; return 0; } -static int machine_kexec_get_boot_param(xen_kexec_range_t *range) +static int machine_kexec_get_boot_param(xen_kexec64_range_t *range) { range->start = __pa(ia64_boot_param); range->size = sizeof(*ia64_boot_param); return 0; } -static int machine_kexec_get_efi_memmap(xen_kexec_range_t *range) +static int machine_kexec_get_efi_memmap(xen_kexec64_range_t *range) { range->start = ia64_boot_param->efi_memmap; range->size = ia64_boot_param->efi_memmap_size; return 0; } -int machine_kexec_get(xen_kexec_range_t *range) +int machine_kexec_get(xen_kexec64_range_t *range) { switch (range->range) { case KEXEC_RANGE_MA_XEN: diff -r d960ea9b337d xen/arch/x86/machine_kexec.c --- a/xen/arch/x86/machine_kexec.c +++ b/xen/arch/x86/machine_kexec.c @@ -120,7 +120,7 @@ void machine_kexec(xen_kexec_image_t *im } } -int machine_kexec_get(xen_kexec_range_t *range) +int machine_kexec_get(xen_kexec64_range_t *range) { if (range->range != KEXEC_RANGE_MA_XEN) return -EINVAL; diff -r d960ea9b337d xen/arch/x86/x86_32/machine_kexec.c --- a/xen/arch/x86/x86_32/machine_kexec.c +++ b/xen/arch/x86/x86_32/machine_kexec.c @@ -11,11 +11,11 @@ #include #include -int machine_kexec_get_xen(xen_kexec_range_t *range) +int machine_kexec_get_xen(xen_kexec64_range_t *range) { range->start = virt_to_maddr(_start); - range->size = (unsigned long)xenheap_phys_end - - (unsigned long)range->start; + range->size = (uint64_t)xenheap_phys_end - + (uint64_t)range->start; return 0; } diff -r d960ea9b337d xen/arch/x86/x86_64/machine_kexec.c --- a/xen/arch/x86/x86_64/machine_kexec.c +++ b/xen/arch/x86/x86_64/machine_kexec.c @@ -11,10 +11,10 @@ #include #include -int machine_kexec_get_xen(xen_kexec_range_t *range) +int machine_kexec_get_xen(xen_kexec64_range_t *range) { range->start = virt_to_maddr(_start); - range->size = virt_to_maddr(_end) - (unsigned long)range->start; + range->size = virt_to_maddr(_end) - (uint64_t)range->start; return 0; } diff -r d960ea9b337d xen/common/kexec.c --- a/xen/common/kexec.c +++ b/xen/common/kexec.c @@ -56,6 +56,14 @@ static struct { unsigned long size; } ranges[16] __initdata; +/* This XLAT macro is required now even without CONFIG_COMPAT. */ +#define TRANSLATE_kexec_range(_d_, _s_) do { \ + (_d_)->range = (_s_)->range; \ + (_d_)->nr = (_s_)->nr; \ + (_d_)->size = (_s_)->size; \ + (_d_)->start = (_s_)->start; \ +} while (0) + /* * Parse command lines in the format * @@ -280,7 +288,7 @@ static int sizeof_note(const char *name, ELFNOTE_ALIGN(descsz)); } -static int kexec_get_reserve(xen_kexec_range_t *range) +static int kexec_get_reserve(xen_kexec64_range_t *range) { if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) { range->start = kexec_crash_area.start; @@ -291,7 +299,7 @@ static int kexec_get_reserve(xen_kexec_r return 0; } -static int kexec_get_cpu(xen_kexec_range_t *range) +static int kexec_get_cpu(xen_kexec64_range_t *range) { int nr = range->nr; int nr_bytes = 0; @@ -335,14 +343,14 @@ static int kexec_get_cpu(xen_kexec_range return 0; } -static int kexec_get_vmcoreinfo(xen_kexec_range_t *range) +static int kexec_get_vmcoreinfo(xen_kexec64_range_t *range) { range->start = __pa((unsigned long)vmcoreinfo_data); range->size = VMCOREINFO_BYTES; return 0; } -static int kexec_get_range_internal(xen_kexec_range_t *range) +static int kexec_get_range_internal(xen_kexec64_range_t *range) { int ret = -EINVAL; @@ -365,9 +373,14 @@ static int kexec_get_range_internal(xen_ return ret; } -static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg) +/* This function can be invoked from either a KEXEC_CMD_kexec64_get_range + * hypercall, or from a KEXEC_CMD_kexec_get_range hypercall with 64bit dom0 + * on 64bit Xen. In the first case, the guest structure is a + * xen_kexec64_range_t, and in the second case, the xen_kexec_range_t guest + * structure is identical to xen_kexec64_range_t. */ +static int kexec_get_range64(XEN_GUEST_HANDLE(void) uarg) { - xen_kexec_range_t range; + xen_kexec64_range_t range; int ret = -EINVAL; if ( unlikely(copy_from_guest(&range, uarg, 1)) ) @@ -381,34 +394,40 @@ static int kexec_get_range(XEN_GUEST_HAN return ret; } -static int kexec_get_range_compat(XEN_GUEST_HANDLE(void) uarg) +/* This function can be invoked from either a KEXEC_CMD_kexec_get_range + * compat hypercall for 32bit dom0 on 64bit Xen, or from the same hypercall + * on 32bit Xen. In both cases, the guest argument uses 32bit integers, so + * translate them to 64bit for use by kexec_get_range_internal. The + * preprocessor guards are to choose the correct 32bit structure, as the + * compat_* structures dont exist in 32bit Xen. */ +static int kexec_get_range32(XEN_GUEST_HANDLE(void) uarg) { + xen_kexec64_range_t range64; #ifdef CONFIG_COMPAT - xen_kexec_range_t range; - compat_kexec_range_t compat_range; + compat_kexec_range_t range32; +#else + xen_kexec_range_t range32; +#endif int ret = -EINVAL; - if ( unlikely(copy_from_guest(&compat_range, uarg, 1)) ) + if ( unlikely(copy_from_guest(&range32, uarg, 1)) ) return -EFAULT; - XLAT_kexec_range(&range, &compat_range); + TRANSLATE_kexec_range(&range64, &range32); - ret = kexec_get_range_internal(&range); + ret = kexec_get_range_internal(&range64); /* Dont silently truncate physical addresses or sizes. */ - if ( (range.start | range.size) & ~(unsigned long)(~0u) ) + if ( (range64.start | range64.size) & ~(unsigned long)(~0u) ) return -ERANGE; if ( ret == 0 ) { - XLAT_kexec_range(&compat_range, &range); - if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) ) + TRANSLATE_kexec_range(&range32, &range64); + if ( unlikely(copy_to_guest(uarg, &range32, 1)) ) return -EFAULT; } return ret; -#else /* CONFIG_COMPAT */ - return 0; -#endif /* CONFIG_COMPAT */ } static int kexec_load_get_bits(int type, int *base, int *bit) @@ -543,10 +562,10 @@ static int kexec_load_unload(unsigned lo return kexec_load_unload_internal(op, &load); } +#ifdef CONFIG_COMPAT static int kexec_load_unload_compat(unsigned long op, XEN_GUEST_HANDLE(void) uarg) { -#ifdef CONFIG_COMPAT compat_kexec_load_t compat_load; xen_kexec_load_t load; @@ -564,10 +583,8 @@ static int kexec_load_unload_compat(unsi XLAT_kexec_image(&load.image, &compat_load.image); return kexec_load_unload_internal(op, &load); -#else /* CONFIG_COMPAT */ - return 0; +} #endif /* CONFIG_COMPAT */ -} static int kexec_exec(XEN_GUEST_HANDLE(void) uarg) { @@ -601,8 +618,51 @@ static int kexec_exec(XEN_GUEST_HANDLE(v return -EINVAL; /* never reached */ } -int do_kexec_op_internal(unsigned long op, XEN_GUEST_HANDLE(void) uarg, - int compat) +long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg) +{ + unsigned long flags; + int ret = -EINVAL; + + if ( !IS_PRIV(current->domain) ) + return -EPERM; + + ret = xsm_kexec(); + if ( ret ) + return ret; + + switch ( op ) + { +#ifdef __i386__ + case KEXEC_CMD_kexec_get_range: + ret = kexec_get_range32(uarg); + break; +#else + case KEXEC_CMD_kexec_get_range: +#endif + case KEXEC_CMD_kexec64_get_range: + ret = kexec_get_range64(uarg); + break; + + case KEXEC_CMD_kexec_load: + case KEXEC_CMD_kexec_unload: + spin_lock_irqsave(&kexec_lock, flags); + if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags)) + ret = kexec_load_unload(op, uarg); + else + ret = -EBUSY; + spin_unlock_irqrestore(&kexec_lock, flags); + break; + + case KEXEC_CMD_kexec: + ret = kexec_exec(uarg); + break; + } + + return ret; +} + +#ifdef CONFIG_COMPAT +int compat_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg) { unsigned long flags; int ret = -EINVAL; @@ -617,23 +677,21 @@ int do_kexec_op_internal(unsigned long o switch ( op ) { case KEXEC_CMD_kexec_get_range: - if (compat) - ret = kexec_get_range_compat(uarg); - else - ret = kexec_get_range(uarg); + ret = kexec_get_range32(uarg); + break; + case KEXEC_CMD_kexec64_get_range: + ret = kexec_get_range64(uarg); break; case KEXEC_CMD_kexec_load: case KEXEC_CMD_kexec_unload: spin_lock_irqsave(&kexec_lock, flags); if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags)) - { - if (compat) - ret = kexec_load_unload_compat(op, uarg); - else - ret = kexec_load_unload(op, uarg); - } + ret = kexec_load_unload_compat(op, uarg); + else + ret = -EBUSY; spin_unlock_irqrestore(&kexec_lock, flags); break; + case KEXEC_CMD_kexec: ret = kexec_exec(uarg); break; @@ -641,17 +699,6 @@ int do_kexec_op_internal(unsigned long o return ret; } - -long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg) -{ - return do_kexec_op_internal(op, uarg, 0); -} - -#ifdef CONFIG_COMPAT -int compat_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg) -{ - return do_kexec_op_internal(op, uarg, 1); -} #endif /* diff -r d960ea9b337d xen/include/public/kexec.h --- a/xen/include/public/kexec.h +++ b/xen/include/public/kexec.h @@ -155,6 +155,15 @@ typedef struct xen_kexec_range { unsigned long start; } xen_kexec_range_t; +#define KEXEC_CMD_kexec64_get_range 4 +/* xen_kexec_range_t using explicit sizes for fields. */ +typedef struct xen_kexec64_range { + int32_t range; + int32_t nr; + uint64_t size; + uint64_t start; +} xen_kexec64_range_t; + #endif /* _XEN_PUBLIC_KEXEC_H */ /* diff -r d960ea9b337d xen/include/xen/kexec.h --- a/xen/include/xen/kexec.h +++ b/xen/include/xen/kexec.h @@ -34,8 +34,8 @@ void kexec_crash(void); void kexec_crash_save_cpu(void); crash_xen_info_t *kexec_crash_save_info(void); void machine_crash_shutdown(void); -int machine_kexec_get(xen_kexec_range_t *range); -int machine_kexec_get_xen(xen_kexec_range_t *range); +int machine_kexec_get(xen_kexec64_range_t *range); +int machine_kexec_get_xen(xen_kexec64_range_t *range); void compat_machine_kexec(unsigned long rnk, unsigned long indirection_page, diff -r d960ea9b337d xen/include/xlat.lst --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -50,9 +50,7 @@ ? grant_entry_v1 grant_table.h ? grant_entry_header grant_table.h ? grant_entry_v2 grant_table.h -? kexec_exec kexec.h ! kexec_image kexec.h -! kexec_range kexec.h ! add_to_physmap memory.h ! foreign_memory_map memory.h ! memory_exchange memory.h