With our switching away from supporting 32-bit Dom0 operation, users complained that attempts (perhaps due to lack of knowledge of that change) to boot the no longer privileged kernel in Dom0 resulted in apparently silent failure. To make the mismatch explicit and visible, add feature flags that the kernel can set to indicate operation in what modes it supports. For backward compatibility, absence of both feature flags is taken to indicate a kernel that may be capable of operating in both modes. v2: Due to the way elf_xen_parse_features() worked up to now (getting fixed here), adding features indications to the old, string based ELF note would make the respective kernel unusable on older hypervisors. For that reason, a new ELF Note is being introduced that allows specifying supported features as a bit array instead (with features unknown to the hypervisor simply ignored, as now also done by elf_xen_parse_features(), whereas here unknown kernel-required features still keep the kernel [and hence VM] from booting). v3: Introduce and use elf_note_numeric_array() to be forward compatible (or else an old hypervisor wouldn't be able to parse kernel specified features occupying more than 64 bits - thanks, Ian!). Signed-off-by: Jan Beulich --- a/tools/libxc/xc_dom_elfloader.c +++ b/tools/libxc/xc_dom_elfloader.c @@ -286,6 +286,15 @@ static int xc_dom_parse_elf_kernel(struc if ( (rc = elf_xen_parse(elf, &dom->parms)) != 0 ) return rc; + if ( elf_xen_feature_get(XENFEAT_privileged, dom->parms.f_required) || + (elf_xen_feature_get(XENFEAT_privileged, dom->parms.f_supported) && + !elf_xen_feature_get(XENFEAT_unprivileged, dom->parms.f_supported)) ) + { + xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Kernel does not" + " support unprivileged (DomU) operation", __FUNCTION__); + return -EINVAL; + } + /* find kernel segment */ dom->kernel_seg.vstart = dom->parms.virt_kstart; dom->kernel_seg.vend = dom->parms.virt_kend; --- a/xen/arch/ia64/xen/domain.c +++ b/xen/arch/ia64/xen/domain.c @@ -2164,6 +2164,14 @@ int __init construct_dom0(struct domain return -1; } + if (test_bit(XENFEAT_unprivileged, parms.f_required) || + (test_bit(XENFEAT_unprivileged, parms.f_supported) && + !test_bit(XENFEAT_privileged, parms.f_supported))) + { + printk("Kernel does not support Dom0 operation\n"); + return -1; + } + p_start = parms.virt_base; pkern_start = parms.virt_kstart; pkern_end = parms.virt_kend; --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -415,6 +415,14 @@ int __init construct_dom0( return -EINVAL; } + if ( test_bit(XENFEAT_unprivileged, parms.f_required) || + (test_bit(XENFEAT_unprivileged, parms.f_supported) && + !test_bit(XENFEAT_privileged, parms.f_supported)) ) + { + printk("Kernel does not support Dom0 operation\n"); + return -EINVAL; + } + #if defined(__x86_64__) if ( compat32 ) { --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -278,7 +278,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL switch ( fi.submap_idx ) { case 0: - fi.submap = 0; + fi.submap = 1U << (IS_PRIV(current->domain) ? + XENFEAT_privileged : XENFEAT_unprivileged); if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) ) fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb); if ( paging_mode_translate(current->domain) ) --- a/xen/common/libelf/libelf-dominfo.c +++ b/xen/common/libelf/libelf-dominfo.c @@ -26,7 +26,9 @@ static const char *const elf_xen_feature [XENFEAT_writable_descriptor_tables] = "writable_descriptor_tables", [XENFEAT_auto_translated_physmap] = "auto_translated_physmap", [XENFEAT_supervisor_mode_kernel] = "supervisor_mode_kernel", - [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb" + [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb", + [XENFEAT_privileged] = "privileged", + [XENFEAT_unprivileged] = "unprivileged" }; static const int elf_xen_features = sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]); @@ -83,7 +85,7 @@ int elf_xen_parse_features(const char *f } } } - if ( i == elf_xen_features ) + if ( i == elf_xen_features && required && feature[0] == '!' ) return -1; } @@ -114,6 +116,7 @@ int elf_xen_parse_note(struct elf_binary [XEN_ELFNOTE_LOADER] = { "LOADER", 1}, [XEN_ELFNOTE_PAE_MODE] = { "PAE_MODE", 1}, [XEN_ELFNOTE_FEATURES] = { "FEATURES", 1}, + [XEN_ELFNOTE_SUPPORTED_FEATURES] = { "SUPPORTED_FEATURES", 0}, [XEN_ELFNOTE_BSD_SYMTAB] = { "BSD_SYMTAB", 1}, [XEN_ELFNOTE_SUSPEND_CANCEL] = { "SUSPEND_CANCEL", 0 }, [XEN_ELFNOTE_MOD_START_PFN] = { "MOD_START_PFN", 0 }, @@ -122,6 +125,7 @@ int elf_xen_parse_note(struct elf_binary const char *str = NULL; uint64_t val = 0; + unsigned int i; int type = elf_uval(elf, note, type); if ( (type >= sizeof(note_desc) / sizeof(note_desc[0])) || @@ -200,6 +204,12 @@ int elf_xen_parse_note(struct elf_binary return -1; break; + case XEN_ELFNOTE_SUPPORTED_FEATURES: + for ( i = 0; i < XENFEAT_NR_SUBMAPS; ++i ) + parms->f_supported[i] |= elf_note_numeric_array( + elf, note, sizeof(*parms->f_supported), i); + break; + } return 0; } --- a/xen/common/libelf/libelf-tools.c +++ b/xen/common/libelf/libelf-tools.c @@ -227,6 +227,27 @@ uint64_t elf_note_numeric(struct elf_bin return 0; } } + +uint64_t elf_note_numeric_array(struct elf_binary *elf, const elf_note *note, + unsigned int unitsz, unsigned int idx) +{ + const void *desc = elf_note_desc(elf, note); + int descsz = elf_uval(elf, note, descsz); + + if ( descsz % unitsz || idx >= descsz / unitsz ) + return 0; + switch (unitsz) + { + case 1: + case 2: + case 4: + case 8: + return elf_access_unsigned(elf, desc, idx * unitsz, unitsz); + default: + return 0; + } +} + const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note) { int namesz = (elf_uval(elf, note, namesz) + 3) & ~3; --- a/xen/include/public/elfnote.h +++ b/xen/include/public/elfnote.h @@ -179,9 +179,22 @@ #define XEN_ELFNOTE_MOD_START_PFN 16 /* + * The features supported by this kernel (numeric). + * + * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a + * kernel to specify support for features that older hypervisors don't + * know about. The set of features 4.2 and newer hypervisors will + * consider supported by the kernel is the combination of the sets + * specified through this and the string note. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 + +/* * The number of the highest elfnote defined. */ -#define XEN_ELFNOTE_MAX XEN_ELFNOTE_MOD_START_PFN +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES /* * System information exported through crash notes. --- a/xen/include/public/features.h +++ b/xen/include/public/features.h @@ -75,7 +75,13 @@ #define XENFEAT_hvm_safe_pvclock 9 /* x86: pirq can be used by HVM guests */ -#define XENFEAT_hvm_pirqs 10 +#define XENFEAT_hvm_pirqs 10 + +/* privileged operation is supported */ +#define XENFEAT_privileged 11 + +/* un-privileged operation is supported */ +#define XENFEAT_unprivileged 12 #define XENFEAT_NR_SUBMAPS 1 --- a/xen/include/xen/libelf.h +++ b/xen/include/xen/libelf.h @@ -179,6 +179,8 @@ const elf_sym *elf_sym_by_index(struct e const char *elf_note_name(struct elf_binary *elf, const elf_note * note); const void *elf_note_desc(struct elf_binary *elf, const elf_note * note); uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note); +uint64_t elf_note_numeric_array(struct elf_binary *, const elf_note *, + unsigned int unitsz, unsigned int idx); const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note); int elf_is_elfbinary(const void *image);