[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH, v2] add privileged/unprivileged kernel feature indication



With our switching away from supporting 32-bit Dom0 operation, users
complained that attempts (perhaps due to lack of knowledge of that
change) to boot the no longer privileged kernel in Dom0 resulted in
apparently silent failure. To make the mismatch explicit and visible,
add feature flags that the kernel can set to indicate operation in
what modes it supports. For backward compatibility, absence of both
feature flags is taken to indicate a kernel that may be capable of
operating in both modes.

v2: Due to the way elf_xen_parse_features() worked up to now (getting
fixed here), adding features indications to the old, string based ELF
note would make the respective kernel unusable on older hypervisors.
For that reason, a new ELF Note is being introduced that allows
specifying supported features as a bit array instead (with features
unknown to the hypervisor simply ignored, as now also done by
elf_xen_parse_features(), whereas here unknown kernel-required
features still keep the kernel [and hence VM] from booting).

v3: Introduce and use elf_note_numeric_array() to be forward
compatible (or else an old hypervisor wouldn't be able to parse kernel
specified features occupying more than 64 bits - thanks, Ian!).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- a/tools/libxc/xc_dom_elfloader.c
+++ b/tools/libxc/xc_dom_elfloader.c
@@ -286,6 +286,15 @@ static int xc_dom_parse_elf_kernel(struc
     if ( (rc = elf_xen_parse(elf, &dom->parms)) != 0 )
         return rc;
 
+    if ( elf_xen_feature_get(XENFEAT_privileged, dom->parms.f_required) ||
+         (elf_xen_feature_get(XENFEAT_privileged, dom->parms.f_supported) &&
+          !elf_xen_feature_get(XENFEAT_unprivileged, dom->parms.f_supported)) )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Kernel does not"
+                     " support unprivileged (DomU) operation", __FUNCTION__);
+        return -EINVAL;
+    }
+
     /* find kernel segment */
     dom->kernel_seg.vstart = dom->parms.virt_kstart;
     dom->kernel_seg.vend   = dom->parms.virt_kend;
--- a/xen/arch/ia64/xen/domain.c
+++ b/xen/arch/ia64/xen/domain.c
@@ -2164,6 +2164,14 @@ int __init construct_dom0(struct domain 
                return -1;
        }
 
+       if (test_bit(XENFEAT_unprivileged, parms.f_required) ||
+           (test_bit(XENFEAT_unprivileged, parms.f_supported) &&
+            !test_bit(XENFEAT_privileged, parms.f_supported)))
+       {
+               printk("Kernel does not support Dom0 operation\n");
+               return -1;
+       }
+
        p_start = parms.virt_base;
        pkern_start = parms.virt_kstart;
        pkern_end = parms.virt_kend;
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -415,6 +415,14 @@ int __init construct_dom0(
         return -EINVAL;
     }
 
+    if ( test_bit(XENFEAT_unprivileged, parms.f_required) ||
+         (test_bit(XENFEAT_unprivileged, parms.f_supported) &&
+          !test_bit(XENFEAT_privileged, parms.f_supported)) )
+    {
+        printk("Kernel does not support Dom0 operation\n");
+        return -EINVAL;
+    }
+
 #if defined(__x86_64__)
     if ( compat32 )
     {
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -278,7 +278,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
         switch ( fi.submap_idx )
         {
         case 0:
-            fi.submap = 0;
+            fi.submap = 1U << (IS_PRIV(current->domain) ?
+                               XENFEAT_privileged : XENFEAT_unprivileged);
             if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) )
                 fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
             if ( paging_mode_translate(current->domain) )
--- a/xen/common/libelf/libelf-dominfo.c
+++ b/xen/common/libelf/libelf-dominfo.c
@@ -26,7 +26,9 @@ static const char *const elf_xen_feature
     [XENFEAT_writable_descriptor_tables] = "writable_descriptor_tables",
     [XENFEAT_auto_translated_physmap] = "auto_translated_physmap",
     [XENFEAT_supervisor_mode_kernel] = "supervisor_mode_kernel",
-    [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb"
+    [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb",
+    [XENFEAT_privileged] = "privileged",
+    [XENFEAT_unprivileged] = "unprivileged"
 };
 static const int elf_xen_features =
 sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]);
@@ -83,7 +85,7 @@ int elf_xen_parse_features(const char *f
                 }
             }
         }
-        if ( i == elf_xen_features )
+        if ( i == elf_xen_features && required && feature[0] == '!' )
             return -1;
     }
 
@@ -114,6 +116,7 @@ int elf_xen_parse_note(struct elf_binary
         [XEN_ELFNOTE_LOADER] = { "LOADER", 1},
         [XEN_ELFNOTE_PAE_MODE] = { "PAE_MODE", 1},
         [XEN_ELFNOTE_FEATURES] = { "FEATURES", 1},
+        [XEN_ELFNOTE_SUPPORTED_FEATURES] = { "SUPPORTED_FEATURES", 0},
         [XEN_ELFNOTE_BSD_SYMTAB] = { "BSD_SYMTAB", 1},
         [XEN_ELFNOTE_SUSPEND_CANCEL] = { "SUSPEND_CANCEL", 0 },
         [XEN_ELFNOTE_MOD_START_PFN] = { "MOD_START_PFN", 0 },
@@ -122,6 +125,7 @@ int elf_xen_parse_note(struct elf_binary
 
     const char *str = NULL;
     uint64_t val = 0;
+    unsigned int i;
     int type = elf_uval(elf, note, type);
 
     if ( (type >= sizeof(note_desc) / sizeof(note_desc[0])) ||
@@ -200,6 +204,12 @@ int elf_xen_parse_note(struct elf_binary
             return -1;
         break;
 
+    case XEN_ELFNOTE_SUPPORTED_FEATURES:
+        for ( i = 0; i < XENFEAT_NR_SUBMAPS; ++i )
+            parms->f_supported[i] |= elf_note_numeric_array(
+                elf, note, sizeof(*parms->f_supported), i);
+        break;
+
     }
     return 0;
 }
--- a/xen/common/libelf/libelf-tools.c
+++ b/xen/common/libelf/libelf-tools.c
@@ -227,6 +227,27 @@ uint64_t elf_note_numeric(struct elf_bin
         return 0;
     }
 }
+
+uint64_t elf_note_numeric_array(struct elf_binary *elf, const elf_note *note,
+                                unsigned int unitsz, unsigned int idx)
+{
+    const void *desc = elf_note_desc(elf, note);
+    int descsz = elf_uval(elf, note, descsz);
+
+    if ( descsz % unitsz || idx >= descsz / unitsz )
+        return 0;
+    switch (unitsz)
+    {
+    case 1:
+    case 2:
+    case 4:
+    case 8:
+        return elf_access_unsigned(elf, desc, idx * unitsz, unitsz);
+    default:
+        return 0;
+    }
+}
+
 const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note)
 {
     int namesz = (elf_uval(elf, note, namesz) + 3) & ~3;
--- a/xen/include/public/elfnote.h
+++ b/xen/include/public/elfnote.h
@@ -179,9 +179,22 @@
 #define XEN_ELFNOTE_MOD_START_PFN 16
 
 /*
+ * The features supported by this kernel (numeric).
+ *
+ * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a
+ * kernel to specify support for features that older hypervisors don't
+ * know about. The set of features 4.2 and newer hypervisors will
+ * consider supported by the kernel is the combination of the sets
+ * specified through this and the string note.
+ *
+ * LEGACY: FEATURES
+ */
+#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
+
+/*
  * The number of the highest elfnote defined.
  */
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_MOD_START_PFN
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
 
 /*
  * System information exported through crash notes.
--- a/xen/include/public/features.h
+++ b/xen/include/public/features.h
@@ -75,7 +75,13 @@
 #define XENFEAT_hvm_safe_pvclock           9
 
 /* x86: pirq can be used by HVM guests */
-#define XENFEAT_hvm_pirqs           10
+#define XENFEAT_hvm_pirqs                 10
+
+/* privileged operation is supported */
+#define XENFEAT_privileged                11
+
+/* un-privileged operation is supported */
+#define XENFEAT_unprivileged              12
 
 #define XENFEAT_NR_SUBMAPS 1
 
--- a/xen/include/xen/libelf.h
+++ b/xen/include/xen/libelf.h
@@ -179,6 +179,8 @@ const elf_sym *elf_sym_by_index(struct e
 const char *elf_note_name(struct elf_binary *elf, const elf_note * note);
 const void *elf_note_desc(struct elf_binary *elf, const elf_note * note);
 uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note);
+uint64_t elf_note_numeric_array(struct elf_binary *, const elf_note *,
+                                unsigned int unitsz, unsigned int idx);
 const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note);
 
 int elf_is_elfbinary(const void *image);


Attachment: guest-kernel-cap.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.