[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1 of 3] KEXEC: Add new hypercall to fix 64/32bit truncation errors. v2



Currently, KEXEC_CMD_kexec_get_range in compat mode, or with 32bit
Xen, will truncate 64bit pointers to 32bits, leading to problems on
machines with more than 4GiB of RAM.  As 32bit dom0 kernels tend to be
PAE capable, this is especially wasteful, as most structures currently
limited to <4GiB could easily be <64GiB instead.

Therefore, introduce a new hypercall 'KEXEC_CMD_kexec64_get_range'
which passes similar information as KEXEC_CMD_kexec_get_range, but
which uses a structure with explicitly specified field widths, causing
it to be identical in the compat and regular case.  This new
structure, xen_kexec64_range_t, will be the same as xen_kexec_range_t
if Xen is compiled for x86_64, but will still use 64bit integers if
Xen is compiled for x86_32.

To fix 32bit Xen which uses 32bit integers for addresses and sizes,
change the internals to use xen_kexec64_range_t which will use 64bit
integers instead.  This also involves changing several casts to
explicitly use uint64_ts rather than unsigned longs.

In addition, the hypercall entry points need updating to be able to
cater for all possibilities.

|Xen/dom0 bits|          Bit width of addresses in structure for        |
+------+------+---------------------------+-----------------------------+
| Xen  | dom0 | KEXEC_CMD_kexec_get_range | KEXEC_CMD_kexec64_get_range |
+------+------+---------------------------+-----------------------------+
|  32  |  32  |             32            |              64             |
|  64  |  32  |             32            |              64             |
|  64  |  64  |             64            |              64             |
+------+------+---------------------------+-----------------------------+

This has been solved by splitting do_kexec_op_internal back into
do_kexec_op{,_compat} and changing kexec_get_range{,_compat} to
kexec_get_range{32,64} which now exist irrespective of CONFIG_COMPAT
or not.

The knock-on effect of removing do_kexec_op_internal means that
kexec_load_unload_compat is only ever called from inside an #ifdef
CONFIG_COMPAT codepath, which does not exist on Xen x86_32.
Therefore, move the #ifdef CONFIG_COMPAT guards to include the entire
function.

Finally, and a little unrelatedly, fix KEXEC_CMD_kexec_{load,unload}
to return -EBUSY instead of EOK if a kexec is in progress.

Changes since v1:
 *  Fix check for pointer truncation to work when Xen is compiled for
    32 bit mode as well.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

diff -r e56500f95b6a -r f571dc8e4368 xen/arch/ia64/xen/machine_kexec.c
--- a/xen/arch/ia64/xen/machine_kexec.c
+++ b/xen/arch/ia64/xen/machine_kexec.c
@@ -102,10 +102,10 @@ void machine_reboot_kexec(xen_kexec_imag
        machine_kexec(image);
 }
 
-int machine_kexec_get_xen(xen_kexec_range_t *range)
+int machine_kexec_get_xen(xen_kexec64_range_t *range)
 {
        range->start = ia64_tpa(_text);
-       range->size = (unsigned long)_end - (unsigned long)_text;
+       range->size = (uint64_t)_end - (uint64_t)_text;
        return 0;
 }
 
@@ -113,31 +113,31 @@ int machine_kexec_get_xen(xen_kexec_rang
 #define ELF_PAGE_SIZE  (__IA64_UL_CONST(1) << ELF_PAGE_SHIFT)
 #define ELF_PAGE_MASK  (~(ELF_PAGE_SIZE - 1))
 
-static int machine_kexec_get_xenheap(xen_kexec_range_t *range)
+static int machine_kexec_get_xenheap(xen_kexec64_range_t *range)
 {
        range->start = (ia64_tpa(_end) + (ELF_PAGE_SIZE - 1)) & ELF_PAGE_MASK;
        range->size =
-               (((unsigned long)range->start + KERNEL_TR_PAGE_SIZE) &
+               (((uint64_t)range->start + KERNEL_TR_PAGE_SIZE) &
          ~(KERNEL_TR_PAGE_SIZE - 1))
-               - (unsigned long)range->start;
+               - (uint64_t)range->start;
        return 0;
 }
 
-static int machine_kexec_get_boot_param(xen_kexec_range_t *range)
+static int machine_kexec_get_boot_param(xen_kexec64_range_t *range)
 {
        range->start = __pa(ia64_boot_param);
        range->size = sizeof(*ia64_boot_param);
        return 0;
 }
 
-static int machine_kexec_get_efi_memmap(xen_kexec_range_t *range)
+static int machine_kexec_get_efi_memmap(xen_kexec64_range_t *range)
 {
        range->start = ia64_boot_param->efi_memmap;
        range->size = ia64_boot_param->efi_memmap_size;
        return 0;
 }
 
-int machine_kexec_get(xen_kexec_range_t *range)
+int machine_kexec_get(xen_kexec64_range_t *range)
 {
        switch (range->range) {
        case KEXEC_RANGE_MA_XEN:
diff -r e56500f95b6a -r f571dc8e4368 xen/arch/x86/machine_kexec.c
--- a/xen/arch/x86/machine_kexec.c
+++ b/xen/arch/x86/machine_kexec.c
@@ -120,7 +120,7 @@ void machine_kexec(xen_kexec_image_t *im
     }
 }
 
-int machine_kexec_get(xen_kexec_range_t *range)
+int machine_kexec_get(xen_kexec64_range_t *range)
 {
        if (range->range != KEXEC_RANGE_MA_XEN)
                return -EINVAL;
diff -r e56500f95b6a -r f571dc8e4368 xen/arch/x86/x86_32/machine_kexec.c
--- a/xen/arch/x86/x86_32/machine_kexec.c
+++ b/xen/arch/x86/x86_32/machine_kexec.c
@@ -11,11 +11,11 @@
 #include <asm/page.h>
 #include <public/kexec.h>
 
-int machine_kexec_get_xen(xen_kexec_range_t *range)
+int machine_kexec_get_xen(xen_kexec64_range_t *range)
 {
         range->start = virt_to_maddr(_start);
-        range->size = (unsigned long)xenheap_phys_end -
-                      (unsigned long)range->start;
+        range->size = (uint64_t)xenheap_phys_end -
+                      (uint64_t)range->start;
         return 0;
 }
 
diff -r e56500f95b6a -r f571dc8e4368 xen/arch/x86/x86_64/machine_kexec.c
--- a/xen/arch/x86/x86_64/machine_kexec.c
+++ b/xen/arch/x86/x86_64/machine_kexec.c
@@ -11,10 +11,10 @@
 #include <asm/page.h>
 #include <public/kexec.h>
 
-int machine_kexec_get_xen(xen_kexec_range_t *range)
+int machine_kexec_get_xen(xen_kexec64_range_t *range)
 {
         range->start = virt_to_maddr(_start);
-        range->size = virt_to_maddr(_end) - (unsigned long)range->start;
+        range->size = virt_to_maddr(_end) - (uint64_t)range->start;
         return 0;
 }
 
diff -r e56500f95b6a -r f571dc8e4368 xen/common/kexec.c
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -56,6 +56,14 @@ static struct {
     unsigned long size;
 } ranges[16] __initdata;
 
+/* This XLAT macro is required now even without CONFIG_COMPAT. */
+#define TRANSLATE_kexec_range(_d_, _s_) do { \
+    (_d_)->range = (_s_)->range; \
+    (_d_)->nr = (_s_)->nr; \
+    (_d_)->size = (_s_)->size; \
+    (_d_)->start = (_s_)->start; \
+} while (0)
+
 /*
  * Parse command lines in the format
  *
@@ -280,7 +288,7 @@ static int sizeof_note(const char *name,
             ELFNOTE_ALIGN(descsz));
 }
 
-static int kexec_get_reserve(xen_kexec_range_t *range)
+static int kexec_get_reserve(xen_kexec64_range_t *range)
 {
     if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) {
         range->start = kexec_crash_area.start;
@@ -291,7 +299,7 @@ static int kexec_get_reserve(xen_kexec_r
     return 0;
 }
 
-static int kexec_get_cpu(xen_kexec_range_t *range)
+static int kexec_get_cpu(xen_kexec64_range_t *range)
 {
     int nr = range->nr;
     int nr_bytes = 0;
@@ -335,14 +343,14 @@ static int kexec_get_cpu(xen_kexec_range
     return 0;
 }
 
-static int kexec_get_vmcoreinfo(xen_kexec_range_t *range)
+static int kexec_get_vmcoreinfo(xen_kexec64_range_t *range)
 {
     range->start = __pa((unsigned long)vmcoreinfo_data);
     range->size = VMCOREINFO_BYTES;
     return 0;
 }
 
-static int kexec_get_range_internal(xen_kexec_range_t *range)
+static int kexec_get_range_internal(xen_kexec64_range_t *range)
 {
     int ret = -EINVAL;
 
@@ -365,9 +373,14 @@ static int kexec_get_range_internal(xen_
     return ret;
 }
 
-static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg)
+/* This function can be invoked from either a KEXEC_CMD_kexec64_get_range
+ * hypercall, or from a KEXEC_CMD_kexec_get_range hypercall with 64bit dom0
+ * on 64bit Xen.  In the first case, the guest structure is a 
+ * xen_kexec64_range_t, and in the second case, the xen_kexec_range_t guest
+ * structure is identical to xen_kexec64_range_t. */
+static int kexec_get_range64(XEN_GUEST_HANDLE(void) uarg)
 {
-    xen_kexec_range_t range;
+    xen_kexec64_range_t range;
     int ret = -EINVAL;
 
     if ( unlikely(copy_from_guest(&range, uarg, 1)) )
@@ -381,34 +394,40 @@ static int kexec_get_range(XEN_GUEST_HAN
     return ret;
 }
 
-static int kexec_get_range_compat(XEN_GUEST_HANDLE(void) uarg)
+/* This function can be invoked from either a KEXEC_CMD_kexec_get_range
+ * compat hypercall for 32bit dom0 on 64bit Xen, or from the same hypercall
+ * on 32bit Xen.  In both cases, the guest argument uses 32bit integers, so 
+ * translate them to 64bit for use by kexec_get_range_internal.  The
+ * preprocessor guards are to choose the correct 32bit structure, as the
+ * compat_* structures dont exist in 32bit Xen. */
+static int kexec_get_range32(XEN_GUEST_HANDLE(void) uarg)
 {
+    xen_kexec64_range_t range64;
 #ifdef CONFIG_COMPAT
-    xen_kexec_range_t range;
-    compat_kexec_range_t compat_range;
+    compat_kexec_range_t range32;
+#else
+    xen_kexec_range_t range32;
+#endif
     int ret = -EINVAL;
 
-    if ( unlikely(copy_from_guest(&compat_range, uarg, 1)) )
+    if ( unlikely(copy_from_guest(&range32, uarg, 1)) )
         return -EFAULT;
 
-    XLAT_kexec_range(&range, &compat_range);
+    TRANSLATE_kexec_range(&range64, &range32);
 
-    ret = kexec_get_range_internal(&range);
+    ret = kexec_get_range_internal(&range64);
 
     /* Dont silently truncate physical addresses or sizes. */
-    if ( (range.start | range.size) & ~(unsigned long)(~0u) )
+    if ( (range64.start | range64.size) & 0xffffffff00000000ULL )
         return -ERANGE;
 
     if ( ret == 0 ) {
-        XLAT_kexec_range(&compat_range, &range);
-        if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) )
+        TRANSLATE_kexec_range(&range32, &range64);
+        if ( unlikely(copy_to_guest(uarg, &range32, 1)) )
              return -EFAULT;
     }
 
     return ret;
-#else /* CONFIG_COMPAT */
-    return 0;
-#endif /* CONFIG_COMPAT */
 }
 
 static int kexec_load_get_bits(int type, int *base, int *bit)
@@ -543,10 +562,10 @@ static int kexec_load_unload(unsigned lo
     return kexec_load_unload_internal(op, &load);
 }
 
+#ifdef CONFIG_COMPAT
 static int kexec_load_unload_compat(unsigned long op,
                                     XEN_GUEST_HANDLE(void) uarg)
 {
-#ifdef CONFIG_COMPAT
     compat_kexec_load_t compat_load;
     xen_kexec_load_t load;
 
@@ -564,10 +583,8 @@ static int kexec_load_unload_compat(unsi
     XLAT_kexec_image(&load.image, &compat_load.image);
 
     return kexec_load_unload_internal(op, &load);
-#else /* CONFIG_COMPAT */
-    return 0;
+}
 #endif /* CONFIG_COMPAT */
-}
 
 static int kexec_exec(XEN_GUEST_HANDLE(void) uarg)
 {
@@ -601,8 +618,51 @@ static int kexec_exec(XEN_GUEST_HANDLE(v
     return -EINVAL; /* never reached */
 }
 
-int do_kexec_op_internal(unsigned long op, XEN_GUEST_HANDLE(void) uarg,
-                           int compat)
+long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
+{
+    unsigned long flags;
+    int ret = -EINVAL;
+
+    if ( !IS_PRIV(current->domain) )
+        return -EPERM;
+
+    ret = xsm_kexec();
+    if ( ret )
+        return ret;
+
+    switch ( op )
+    {
+#ifdef __i386__
+    case KEXEC_CMD_kexec_get_range:
+        ret = kexec_get_range32(uarg);
+        break;
+#else
+    case KEXEC_CMD_kexec_get_range:
+#endif
+    case KEXEC_CMD_kexec64_get_range:
+        ret = kexec_get_range64(uarg);
+        break;
+
+    case KEXEC_CMD_kexec_load:
+    case KEXEC_CMD_kexec_unload:
+        spin_lock_irqsave(&kexec_lock, flags);
+        if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
+            ret = kexec_load_unload(op, uarg);
+        else
+            ret = -EBUSY;
+        spin_unlock_irqrestore(&kexec_lock, flags);
+        break;
+
+    case KEXEC_CMD_kexec:
+        ret = kexec_exec(uarg);
+        break;
+    }
+
+    return ret;
+}
+
+#ifdef CONFIG_COMPAT
+int compat_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
 {
     unsigned long flags;
     int ret = -EINVAL;
@@ -617,23 +677,21 @@ int do_kexec_op_internal(unsigned long o
     switch ( op )
     {
     case KEXEC_CMD_kexec_get_range:
-        if (compat)
-                ret = kexec_get_range_compat(uarg);
-        else
-                ret = kexec_get_range(uarg);
+        ret = kexec_get_range32(uarg);
+        break;
+    case KEXEC_CMD_kexec64_get_range:
+        ret = kexec_get_range64(uarg);
         break;
     case KEXEC_CMD_kexec_load:
     case KEXEC_CMD_kexec_unload:
         spin_lock_irqsave(&kexec_lock, flags);
         if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
-        {
-                if (compat)
-                        ret = kexec_load_unload_compat(op, uarg);
-                else
-                        ret = kexec_load_unload(op, uarg);
-        }
+            ret = kexec_load_unload_compat(op, uarg);
+        else
+            ret = -EBUSY;
         spin_unlock_irqrestore(&kexec_lock, flags);
         break;
+
     case KEXEC_CMD_kexec:
         ret = kexec_exec(uarg);
         break;
@@ -641,17 +699,6 @@ int do_kexec_op_internal(unsigned long o
 
     return ret;
 }
-
-long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
-{
-    return do_kexec_op_internal(op, uarg, 0);
-}
-
-#ifdef CONFIG_COMPAT
-int compat_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
-{
-    return do_kexec_op_internal(op, uarg, 1);
-}
 #endif
 
 /*
diff -r e56500f95b6a -r f571dc8e4368 xen/include/public/kexec.h
--- a/xen/include/public/kexec.h
+++ b/xen/include/public/kexec.h
@@ -155,6 +155,15 @@ typedef struct xen_kexec_range {
     unsigned long start;
 } xen_kexec_range_t;
 
+#define KEXEC_CMD_kexec64_get_range      4
+/* xen_kexec_range_t using explicit sizes for fields. */
+typedef struct xen_kexec64_range {
+    int32_t range;
+    int32_t nr;
+    uint64_t size;
+    uint64_t start;
+} xen_kexec64_range_t;
+
 #endif /* _XEN_PUBLIC_KEXEC_H */
 
 /*
diff -r e56500f95b6a -r f571dc8e4368 xen/include/xen/kexec.h
--- a/xen/include/xen/kexec.h
+++ b/xen/include/xen/kexec.h
@@ -34,8 +34,8 @@ void kexec_crash(void);
 void kexec_crash_save_cpu(void);
 crash_xen_info_t *kexec_crash_save_info(void);
 void machine_crash_shutdown(void);
-int machine_kexec_get(xen_kexec_range_t *range);
-int machine_kexec_get_xen(xen_kexec_range_t *range);
+int machine_kexec_get(xen_kexec64_range_t *range);
+int machine_kexec_get_xen(xen_kexec64_range_t *range);
 
 void compat_machine_kexec(unsigned long rnk,
                           unsigned long indirection_page,
diff -r e56500f95b6a -r f571dc8e4368 xen/include/xlat.lst
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -50,9 +50,7 @@
 ?      grant_entry_v1                  grant_table.h
 ?       grant_entry_header              grant_table.h
 ?      grant_entry_v2                  grant_table.h
-?      kexec_exec                      kexec.h
 !      kexec_image                     kexec.h
-!      kexec_range                     kexec.h
 !      add_to_physmap                  memory.h
 !      foreign_memory_map              memory.h
 !      memory_exchange                 memory.h

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.