|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 1 of 3] KEXEC: Add new hypercall to fix 64/32bit truncation errors. v2
>>> On 22.12.11 at 18:36, Andrew Cooper <andrew.cooper3@xxxxxxxxxx> wrote:
> Currently, KEXEC_CMD_kexec_get_range in compat mode, or with 32bit
> Xen, will truncate 64bit pointers to 32bits, leading to problems on
> machines with more than 4GiB of RAM. As 32bit dom0 kernels tend to be
> PAE capable, this is especially wasteful, as most structures currently
> limited to <4GiB could easily be <64GiB instead.
>
> Therefore, introduce a new hypercall 'KEXEC_CMD_kexec64_get_range'
> which passes similar information as KEXEC_CMD_kexec_get_range, but
> which uses a structure with explicitly specified field widths, causing
> it to be identical in the compat and regular case. This new
> structure, xen_kexec64_range_t, will be the same as xen_kexec_range_t
> if Xen is compiled for x86_64, but will still use 64bit integers if
> Xen is compiled for x86_32.
>
> To fix 32bit Xen which uses 32bit integers for addresses and sizes,
> change the internals to use xen_kexec64_range_t which will use 64bit
> integers instead. This also involves changing several casts to
> explicitly use uint64_ts rather than unsigned longs.
Just for the record - I continue to be opposed to doing this for the
32-bit hypervisor. All relevant allocations are below 4G there, so
there's simply no need to decrease code readability for no benefit.
Jan
> In addition, the hypercall entry points need updating to be able to
> cater for all possibilities.
>
> |Xen/dom0 bits| Bit width of addresses in structure for |
> +------+------+---------------------------+-----------------------------+
> | Xen | dom0 | KEXEC_CMD_kexec_get_range | KEXEC_CMD_kexec64_get_range |
> +------+------+---------------------------+-----------------------------+
> | 32 | 32 | 32 | 64 |
> | 64 | 32 | 32 | 64 |
> | 64 | 64 | 64 | 64 |
> +------+------+---------------------------+-----------------------------+
>
> This has been solved by splitting do_kexec_op_internal back into
> do_kexec_op{,_compat} and changing kexec_get_range{,_compat} to
> kexec_get_range{32,64} which now exist irrespective of CONFIG_COMPAT
> or not.
>
> The knock-on effect of removing do_kexec_op_internal means that
> kexec_load_unload_compat is only ever called from inside an #ifdef
> CONFIG_COMPAT codepath, which does not exist on Xen x86_32.
> Therefore, move the #ifdef CONFIG_COMPAT guards to include the entire
> function.
>
> Finally, and a little unrelatedly, fix KEXEC_CMD_kexec_{load,unload}
> to return -EBUSY instead of EOK if a kexec is in progress.
>
> Changes since v1:
> * Fix check for pointer truncation to work when Xen is compiled for
> 32 bit mode as well.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
>
> diff -r e56500f95b6a -r f571dc8e4368 xen/arch/ia64/xen/machine_kexec.c
> --- a/xen/arch/ia64/xen/machine_kexec.c
> +++ b/xen/arch/ia64/xen/machine_kexec.c
> @@ -102,10 +102,10 @@ void machine_reboot_kexec(xen_kexec_imag
> machine_kexec(image);
> }
>
> -int machine_kexec_get_xen(xen_kexec_range_t *range)
> +int machine_kexec_get_xen(xen_kexec64_range_t *range)
> {
> range->start = ia64_tpa(_text);
> - range->size = (unsigned long)_end - (unsigned long)_text;
> + range->size = (uint64_t)_end - (uint64_t)_text;
> return 0;
> }
>
> @@ -113,31 +113,31 @@ int machine_kexec_get_xen(xen_kexec_rang
> #define ELF_PAGE_SIZE (__IA64_UL_CONST(1) << ELF_PAGE_SHIFT)
> #define ELF_PAGE_MASK (~(ELF_PAGE_SIZE - 1))
>
> -static int machine_kexec_get_xenheap(xen_kexec_range_t *range)
> +static int machine_kexec_get_xenheap(xen_kexec64_range_t *range)
> {
> range->start = (ia64_tpa(_end) + (ELF_PAGE_SIZE - 1)) & ELF_PAGE_MASK;
> range->size =
> - (((unsigned long)range->start + KERNEL_TR_PAGE_SIZE) &
> + (((uint64_t)range->start + KERNEL_TR_PAGE_SIZE) &
> ~(KERNEL_TR_PAGE_SIZE - 1))
> - - (unsigned long)range->start;
> + - (uint64_t)range->start;
> return 0;
> }
>
> -static int machine_kexec_get_boot_param(xen_kexec_range_t *range)
> +static int machine_kexec_get_boot_param(xen_kexec64_range_t *range)
> {
> range->start = __pa(ia64_boot_param);
> range->size = sizeof(*ia64_boot_param);
> return 0;
> }
>
> -static int machine_kexec_get_efi_memmap(xen_kexec_range_t *range)
> +static int machine_kexec_get_efi_memmap(xen_kexec64_range_t *range)
> {
> range->start = ia64_boot_param->efi_memmap;
> range->size = ia64_boot_param->efi_memmap_size;
> return 0;
> }
>
> -int machine_kexec_get(xen_kexec_range_t *range)
> +int machine_kexec_get(xen_kexec64_range_t *range)
> {
> switch (range->range) {
> case KEXEC_RANGE_MA_XEN:
> diff -r e56500f95b6a -r f571dc8e4368 xen/arch/x86/machine_kexec.c
> --- a/xen/arch/x86/machine_kexec.c
> +++ b/xen/arch/x86/machine_kexec.c
> @@ -120,7 +120,7 @@ void machine_kexec(xen_kexec_image_t *im
> }
> }
>
> -int machine_kexec_get(xen_kexec_range_t *range)
> +int machine_kexec_get(xen_kexec64_range_t *range)
> {
> if (range->range != KEXEC_RANGE_MA_XEN)
> return -EINVAL;
> diff -r e56500f95b6a -r f571dc8e4368 xen/arch/x86/x86_32/machine_kexec.c
> --- a/xen/arch/x86/x86_32/machine_kexec.c
> +++ b/xen/arch/x86/x86_32/machine_kexec.c
> @@ -11,11 +11,11 @@
> #include <asm/page.h>
> #include <public/kexec.h>
>
> -int machine_kexec_get_xen(xen_kexec_range_t *range)
> +int machine_kexec_get_xen(xen_kexec64_range_t *range)
> {
> range->start = virt_to_maddr(_start);
> - range->size = (unsigned long)xenheap_phys_end -
> - (unsigned long)range->start;
> + range->size = (uint64_t)xenheap_phys_end -
> + (uint64_t)range->start;
> return 0;
> }
>
> diff -r e56500f95b6a -r f571dc8e4368 xen/arch/x86/x86_64/machine_kexec.c
> --- a/xen/arch/x86/x86_64/machine_kexec.c
> +++ b/xen/arch/x86/x86_64/machine_kexec.c
> @@ -11,10 +11,10 @@
> #include <asm/page.h>
> #include <public/kexec.h>
>
> -int machine_kexec_get_xen(xen_kexec_range_t *range)
> +int machine_kexec_get_xen(xen_kexec64_range_t *range)
> {
> range->start = virt_to_maddr(_start);
> - range->size = virt_to_maddr(_end) - (unsigned long)range->start;
> + range->size = virt_to_maddr(_end) - (uint64_t)range->start;
> return 0;
> }
>
> diff -r e56500f95b6a -r f571dc8e4368 xen/common/kexec.c
> --- a/xen/common/kexec.c
> +++ b/xen/common/kexec.c
> @@ -56,6 +56,14 @@ static struct {
> unsigned long size;
> } ranges[16] __initdata;
>
> +/* This XLAT macro is required now even without CONFIG_COMPAT. */
> +#define TRANSLATE_kexec_range(_d_, _s_) do { \
> + (_d_)->range = (_s_)->range; \
> + (_d_)->nr = (_s_)->nr; \
> + (_d_)->size = (_s_)->size; \
> + (_d_)->start = (_s_)->start; \
> +} while (0)
> +
> /*
> * Parse command lines in the format
> *
> @@ -280,7 +288,7 @@ static int sizeof_note(const char *name,
> ELFNOTE_ALIGN(descsz));
> }
>
> -static int kexec_get_reserve(xen_kexec_range_t *range)
> +static int kexec_get_reserve(xen_kexec64_range_t *range)
> {
> if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) {
> range->start = kexec_crash_area.start;
> @@ -291,7 +299,7 @@ static int kexec_get_reserve(xen_kexec_r
> return 0;
> }
>
> -static int kexec_get_cpu(xen_kexec_range_t *range)
> +static int kexec_get_cpu(xen_kexec64_range_t *range)
> {
> int nr = range->nr;
> int nr_bytes = 0;
> @@ -335,14 +343,14 @@ static int kexec_get_cpu(xen_kexec_range
> return 0;
> }
>
> -static int kexec_get_vmcoreinfo(xen_kexec_range_t *range)
> +static int kexec_get_vmcoreinfo(xen_kexec64_range_t *range)
> {
> range->start = __pa((unsigned long)vmcoreinfo_data);
> range->size = VMCOREINFO_BYTES;
> return 0;
> }
>
> -static int kexec_get_range_internal(xen_kexec_range_t *range)
> +static int kexec_get_range_internal(xen_kexec64_range_t *range)
> {
> int ret = -EINVAL;
>
> @@ -365,9 +373,14 @@ static int kexec_get_range_internal(xen_
> return ret;
> }
>
> -static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg)
> +/* This function can be invoked from either a KEXEC_CMD_kexec64_get_range
> + * hypercall, or from a KEXEC_CMD_kexec_get_range hypercall with 64bit dom0
> + * on 64bit Xen. In the first case, the guest structure is a
> + * xen_kexec64_range_t, and in the second case, the xen_kexec_range_t guest
> + * structure is identical to xen_kexec64_range_t. */
> +static int kexec_get_range64(XEN_GUEST_HANDLE(void) uarg)
> {
> - xen_kexec_range_t range;
> + xen_kexec64_range_t range;
> int ret = -EINVAL;
>
> if ( unlikely(copy_from_guest(&range, uarg, 1)) )
> @@ -381,34 +394,40 @@ static int kexec_get_range(XEN_GUEST_HAN
> return ret;
> }
>
> -static int kexec_get_range_compat(XEN_GUEST_HANDLE(void) uarg)
> +/* This function can be invoked from either a KEXEC_CMD_kexec_get_range
> + * compat hypercall for 32bit dom0 on 64bit Xen, or from the same hypercall
> + * on 32bit Xen. In both cases, the guest argument uses 32bit integers, so
>
> + * translate them to 64bit for use by kexec_get_range_internal. The
> + * preprocessor guards are to choose the correct 32bit structure, as the
> + * compat_* structures dont exist in 32bit Xen. */
> +static int kexec_get_range32(XEN_GUEST_HANDLE(void) uarg)
> {
> + xen_kexec64_range_t range64;
> #ifdef CONFIG_COMPAT
> - xen_kexec_range_t range;
> - compat_kexec_range_t compat_range;
> + compat_kexec_range_t range32;
> +#else
> + xen_kexec_range_t range32;
> +#endif
> int ret = -EINVAL;
>
> - if ( unlikely(copy_from_guest(&compat_range, uarg, 1)) )
> + if ( unlikely(copy_from_guest(&range32, uarg, 1)) )
> return -EFAULT;
>
> - XLAT_kexec_range(&range, &compat_range);
> + TRANSLATE_kexec_range(&range64, &range32);
>
> - ret = kexec_get_range_internal(&range);
> + ret = kexec_get_range_internal(&range64);
>
> /* Dont silently truncate physical addresses or sizes. */
> - if ( (range.start | range.size) & ~(unsigned long)(~0u) )
> + if ( (range64.start | range64.size) & 0xffffffff00000000ULL )
> return -ERANGE;
>
> if ( ret == 0 ) {
> - XLAT_kexec_range(&compat_range, &range);
> - if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) )
> + TRANSLATE_kexec_range(&range32, &range64);
> + if ( unlikely(copy_to_guest(uarg, &range32, 1)) )
> return -EFAULT;
> }
>
> return ret;
> -#else /* CONFIG_COMPAT */
> - return 0;
> -#endif /* CONFIG_COMPAT */
> }
>
> static int kexec_load_get_bits(int type, int *base, int *bit)
> @@ -543,10 +562,10 @@ static int kexec_load_unload(unsigned lo
> return kexec_load_unload_internal(op, &load);
> }
>
> +#ifdef CONFIG_COMPAT
> static int kexec_load_unload_compat(unsigned long op,
> XEN_GUEST_HANDLE(void) uarg)
> {
> -#ifdef CONFIG_COMPAT
> compat_kexec_load_t compat_load;
> xen_kexec_load_t load;
>
> @@ -564,10 +583,8 @@ static int kexec_load_unload_compat(unsi
> XLAT_kexec_image(&load.image, &compat_load.image);
>
> return kexec_load_unload_internal(op, &load);
> -#else /* CONFIG_COMPAT */
> - return 0;
> +}
> #endif /* CONFIG_COMPAT */
> -}
>
> static int kexec_exec(XEN_GUEST_HANDLE(void) uarg)
> {
> @@ -601,8 +618,51 @@ static int kexec_exec(XEN_GUEST_HANDLE(v
> return -EINVAL; /* never reached */
> }
>
> -int do_kexec_op_internal(unsigned long op, XEN_GUEST_HANDLE(void) uarg,
> - int compat)
> +long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
> +{
> + unsigned long flags;
> + int ret = -EINVAL;
> +
> + if ( !IS_PRIV(current->domain) )
> + return -EPERM;
> +
> + ret = xsm_kexec();
> + if ( ret )
> + return ret;
> +
> + switch ( op )
> + {
> +#ifdef __i386__
> + case KEXEC_CMD_kexec_get_range:
> + ret = kexec_get_range32(uarg);
> + break;
> +#else
> + case KEXEC_CMD_kexec_get_range:
> +#endif
> + case KEXEC_CMD_kexec64_get_range:
> + ret = kexec_get_range64(uarg);
> + break;
> +
> + case KEXEC_CMD_kexec_load:
> + case KEXEC_CMD_kexec_unload:
> + spin_lock_irqsave(&kexec_lock, flags);
> + if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
> + ret = kexec_load_unload(op, uarg);
> + else
> + ret = -EBUSY;
> + spin_unlock_irqrestore(&kexec_lock, flags);
> + break;
> +
> + case KEXEC_CMD_kexec:
> + ret = kexec_exec(uarg);
> + break;
> + }
> +
> + return ret;
> +}
> +
> +#ifdef CONFIG_COMPAT
> +int compat_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
> {
> unsigned long flags;
> int ret = -EINVAL;
> @@ -617,23 +677,21 @@ int do_kexec_op_internal(unsigned long o
> switch ( op )
> {
> case KEXEC_CMD_kexec_get_range:
> - if (compat)
> - ret = kexec_get_range_compat(uarg);
> - else
> - ret = kexec_get_range(uarg);
> + ret = kexec_get_range32(uarg);
> + break;
> + case KEXEC_CMD_kexec64_get_range:
> + ret = kexec_get_range64(uarg);
> break;
> case KEXEC_CMD_kexec_load:
> case KEXEC_CMD_kexec_unload:
> spin_lock_irqsave(&kexec_lock, flags);
> if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
> - {
> - if (compat)
> - ret = kexec_load_unload_compat(op, uarg);
> - else
> - ret = kexec_load_unload(op, uarg);
> - }
> + ret = kexec_load_unload_compat(op, uarg);
> + else
> + ret = -EBUSY;
> spin_unlock_irqrestore(&kexec_lock, flags);
> break;
> +
> case KEXEC_CMD_kexec:
> ret = kexec_exec(uarg);
> break;
> @@ -641,17 +699,6 @@ int do_kexec_op_internal(unsigned long o
>
> return ret;
> }
> -
> -long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
> -{
> - return do_kexec_op_internal(op, uarg, 0);
> -}
> -
> -#ifdef CONFIG_COMPAT
> -int compat_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
> -{
> - return do_kexec_op_internal(op, uarg, 1);
> -}
> #endif
>
> /*
> diff -r e56500f95b6a -r f571dc8e4368 xen/include/public/kexec.h
> --- a/xen/include/public/kexec.h
> +++ b/xen/include/public/kexec.h
> @@ -155,6 +155,15 @@ typedef struct xen_kexec_range {
> unsigned long start;
> } xen_kexec_range_t;
>
> +#define KEXEC_CMD_kexec64_get_range 4
> +/* xen_kexec_range_t using explicit sizes for fields. */
> +typedef struct xen_kexec64_range {
> + int32_t range;
> + int32_t nr;
> + uint64_t size;
> + uint64_t start;
> +} xen_kexec64_range_t;
> +
> #endif /* _XEN_PUBLIC_KEXEC_H */
>
> /*
> diff -r e56500f95b6a -r f571dc8e4368 xen/include/xen/kexec.h
> --- a/xen/include/xen/kexec.h
> +++ b/xen/include/xen/kexec.h
> @@ -34,8 +34,8 @@ void kexec_crash(void);
> void kexec_crash_save_cpu(void);
> crash_xen_info_t *kexec_crash_save_info(void);
> void machine_crash_shutdown(void);
> -int machine_kexec_get(xen_kexec_range_t *range);
> -int machine_kexec_get_xen(xen_kexec_range_t *range);
> +int machine_kexec_get(xen_kexec64_range_t *range);
> +int machine_kexec_get_xen(xen_kexec64_range_t *range);
>
> void compat_machine_kexec(unsigned long rnk,
> unsigned long indirection_page,
> diff -r e56500f95b6a -r f571dc8e4368 xen/include/xlat.lst
> --- a/xen/include/xlat.lst
> +++ b/xen/include/xlat.lst
> @@ -50,9 +50,7 @@
> ? grant_entry_v1 grant_table.h
> ? grant_entry_header grant_table.h
> ? grant_entry_v2 grant_table.h
> -? kexec_exec kexec.h
> ! kexec_image kexec.h
> -! kexec_range kexec.h
> ! add_to_physmap memory.h
> ! foreign_memory_map memory.h
> ! memory_exchange memory.h
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |