[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/3] x86: allow 64âbit PV guest kernels to suppress user mode exposure of M2P



Xen L4 entries being uniformly installed into any L4 table and 64-bit
PV kernels running in ring 3 means that user mode was able to see the
read-only M2P presented by Xen to the guests. While apparently not
really representing an exploitable information leak, this still very
certainly was never meant to be that way.

Building on the fact that these guests already have separate kernel and
user mode page tables we can allow guest kernels to tell Xen that they
don't want user mode to see this table. We can't, however, do this by
default: There is no ABI requirement that kernel and user mode page
tables be separate. Therefore introduce a new VM-assist flag allowing
the guest to control respective hypervisor behavior:
- when not set, L4 tables get created with the respective slot blank,
  and whenever the L4 table gets used as a kernel one the missing
  mapping gets inserted,
- when set, L4 tables get created with the respective slot initialized
  as before, and whenever the L4 table gets used as a user one the
  mapping gets zapped.

Since the new flag gets assigned a value discontiguous to the existing
ones (in order to preserve the low bits, as only those are currently
accessible to 32-bit guests), this requires a little bit of rework of
the VM assist code in general: An architecture specific
VM_ASSIST_VALID definition gets introduced (with an optional compat
mode counterpart), and compilation of the respective code becomes
conditional upon this being defined (ARM doesn't wire these up and
hence doesn't need that code).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -339,7 +339,7 @@ static int setup_compat_l4(struct vcpu *
 
     l4tab = __map_domain_page(pg);
     clear_page(l4tab);
-    init_guest_l4_table(l4tab, v->domain);
+    init_guest_l4_table(l4tab, v->domain, 1);
     unmap_domain_page(l4tab);
 
     v->arch.guest_table = pagetable_from_page(pg);
@@ -971,7 +971,17 @@ int arch_set_info_guest(
         case -EINTR:
             rc = -ERESTART;
         case -ERESTART:
+            break;
         case 0:
+            if ( !compat && !VM_ASSIST(d, VMASST_TYPE_m2p_strict) &&
+                 !paging_mode_refcounts(d) )
+            {
+                l4_pgentry_t *l4tab = __map_domain_page(cr3_page);
+
+                l4tab[l4_table_offset(RO_MPT_VIRT_START)] =
+                    idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)];
+                unmap_domain_page(l4tab);
+            }
             break;
         default:
             if ( cr3_page == current->arch.old_guest_table )
@@ -1006,7 +1016,16 @@ int arch_set_info_guest(
                 default:
                     if ( cr3_page == current->arch.old_guest_table )
                         cr3_page = NULL;
+                    break;
                 case 0:
+                    if ( VM_ASSIST(d, VMASST_TYPE_m2p_strict) )
+                    {
+                        l4_pgentry_t *l4tab = __map_domain_page(cr3_page);
+
+                        l4tab[l4_table_offset(RO_MPT_VIRT_START)] =
+                            l4e_empty();
+                        unmap_domain_page(l4tab);
+                    }
                     break;
                 }
             }
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -1203,7 +1203,7 @@ int __init construct_dom0(
         l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
     }
     clear_page(l4tab);
-    init_guest_l4_table(l4tab, d);
+    init_guest_l4_table(l4tab, d, 0);
     v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
     if ( is_pv_32on64_domain(d) )
         v->arch.guest_table_user = v->arch.guest_table;
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1380,7 +1380,8 @@ static int alloc_l3_table(struct page_in
     return rc > 0 ? 0 : rc;
 }
 
-void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d)
+void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d,
+                         bool_t zap_ro_mpt)
 {
     /* Xen private mappings. */
     memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
@@ -1395,6 +1396,8 @@ void init_guest_l4_table(l4_pgentry_t l4
         l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
         l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR);
+    if ( zap_ro_mpt || is_pv_32on64_domain(d) || paging_mode_refcounts(d) )
+        l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
 }
 
 static int alloc_l4_table(struct page_info *page)
@@ -1444,7 +1447,7 @@ static int alloc_l4_table(struct page_in
         adjust_guest_l4e(pl4e[i], d);
     }
 
-    init_guest_l4_table(pl4e, d);
+    init_guest_l4_table(pl4e, d, !VM_ASSIST(d, VMASST_TYPE_m2p_strict));
     unmap_domain_page(pl4e);
 
     return rc > 0 ? 0 : rc;
@@ -2755,6 +2758,14 @@ int new_guest_cr3(unsigned long mfn)
 
     invalidate_shadow_ldt(curr, 0);
 
+    if ( !VM_ASSIST(d, VMASST_TYPE_m2p_strict) && !paging_mode_refcounts(d) )
+    {
+        l4_pgentry_t *l4tab = map_domain_page(mfn);
+
+        l4tab[l4_table_offset(RO_MPT_VIRT_START)] =
+            idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)];
+        unmap_domain_page(l4tab);
+    }
     curr->arch.guest_table = pagetable_from_pfn(mfn);
     update_cr3(curr);
 
@@ -3112,6 +3123,14 @@ long do_mmuext_op(
                                 op.arg1.mfn);
                     break;
                 }
+                if ( VM_ASSIST(d, VMASST_TYPE_m2p_strict) &&
+                     !paging_mode_refcounts(d) )
+                {
+                    l4_pgentry_t *l4tab = map_domain_page(op.arg1.mfn);
+
+                    l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
+                    unmap_domain_page(l4tab);
+                }
             }
 
             curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn);
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -1436,6 +1436,9 @@ void sh_install_xen_entries_in_l4(struct
         shadow_l4e_from_mfn(page_to_mfn(d->arch.perdomain_l3_pg),
                             __PAGE_HYPERVISOR);
 
+    if ( !VM_ASSIST(d, VMASST_TYPE_m2p_strict) )
+        sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty();
+
     /* Shadow linear mapping for 4-level shadows.  N.B. for 3-level
      * shadows on 64-bit xen, this linear mapping is later replaced by the
      * monitor pagetable structure, which is built in make_monitor_table
@@ -3975,6 +3978,19 @@ sh_update_cr3(struct vcpu *v, int do_loc
         /* PAGING_LEVELS==4 implies 64-bit, which means that
          * map_domain_page_global can't fail */
         BUG_ON(v->arch.paging.shadow.guest_vtable == NULL);
+        if ( !shadow_mode_external(d) && !is_pv_32on64_domain(d) )
+        {
+            shadow_l4e_t *sl4e = v->arch.paging.shadow.guest_vtable;
+
+            if ( (v->arch.flags & TF_kernel_mode) &&
+                 !VM_ASSIST(d, VMASST_TYPE_m2p_strict) )
+                sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] =
+                    idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)];
+            else if ( !(v->arch.flags & TF_kernel_mode) &&
+                      VM_ASSIST(d, VMASST_TYPE_m2p_strict) )
+                sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] =
+                    shadow_l4e_empty();
+        }
     }
     else
         v->arch.paging.shadow.guest_vtable = __linear_l4_table;
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -480,7 +480,7 @@ static int setup_m2p_table(struct mem_ho
                 l2_ro_mpt += l2_table_offset(va);
             }
 
-            /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. 
*/
+            /* NB. Cannot be GLOBAL: guest user mode should not see it. */
             l2e_write(l2_ro_mpt, l2e_from_pfn(mfn,
                    /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
         }
@@ -583,7 +583,7 @@ void __init paging_init(void)
                        0x77, 1UL << L3_PAGETABLE_SHIFT);
 
                 ASSERT(!l2_table_offset(va));
-                /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this 
area. */
+                /* NB. Cannot be GLOBAL: guest user mode should not see it. */
                 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
                     l3e_from_page(l1_pg,
                         /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
@@ -621,7 +621,7 @@ void __init paging_init(void)
                       l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
             ASSERT(!l2_table_offset(va));
         }
-        /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+        /* NB. Cannot be GLOBAL: guest user mode should not see it. */
         if ( l1_pg )
             l2e_write(l2_ro_mpt, l2e_from_page(
                 l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
--- a/xen/common/compat/kernel.c
+++ b/xen/common/compat/kernel.c
@@ -41,6 +41,11 @@ CHECK_TYPE(domain_handle);
 #define xennmi_callback compat_nmi_callback
 #define xennmi_callback_t compat_nmi_callback_t
 
+#ifdef COMPAT_VM_ASSIST_VALID
+#undef VM_ASSIST_VALID
+#define VM_ASSIST_VALID COMPAT_VM_ASSIST_VALID
+#endif
+
 #define DO(fn) int compat_##fn
 #define COMPAT
 
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1318,9 +1318,11 @@ long do_vcpu_op(int cmd, unsigned int vc
     return rc;
 }
 
-long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
+#ifdef VM_ASSIST_VALID
+long vm_assist(struct domain *p, unsigned int cmd, unsigned int type,
+               unsigned long valid)
 {
-    if ( type > MAX_VMASST_TYPE )
+    if ( type >= BITS_PER_LONG || !test_bit(type, &valid) )
         return -EINVAL;
 
     switch ( cmd )
@@ -1335,6 +1337,7 @@ long vm_assist(struct domain *p, unsigne
 
     return -ENOSYS;
 }
+#endif
 
 struct pirq *pirq_get_info(struct domain *d, int pirq)
 {
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -386,10 +386,12 @@ DO(nmi_op)(unsigned int cmd, XEN_GUEST_H
     return rc;
 }
 
+#ifdef VM_ASSIST_VALID
 DO(vm_assist)(unsigned int cmd, unsigned int type)
 {
-    return vm_assist(current->domain, cmd, type);
+    return vm_assist(current->domain, cmd, type, VM_ASSIST_VALID);
 }
+#endif
 
 DO(ni_hypercall)(void)
 {
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -343,6 +343,15 @@ extern unsigned long xen_phys_start;
 #define ARG_XLAT_START(v)        \
     (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
 
+#define NATIVE_VM_ASSIST_VALID   ((1UL << VMASST_TYPE_4gb_segments)        | \
+                                  (1UL << VMASST_TYPE_4gb_segments_notify) | \
+                                  (1UL << VMASST_TYPE_writable_pagetables) | \
+                                  (1UL << VMASST_TYPE_pae_extended_cr3)    | \
+                                  (1UL << VMASST_TYPE_m2p_strict))
+#define VM_ASSIST_VALID          NATIVE_VM_ASSIST_VALID
+#define COMPAT_VM_ASSIST_VALID   (NATIVE_VM_ASSIST_VALID & \
+                                  ((1UL << COMPAT_BITS_PER_LONG) - 1))
+
 #define ELFSIZE 64
 
 #define ARCH_CRASH_SAVE_VMCOREINFO
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -318,7 +318,8 @@ static inline void *__page_to_virt(const
 int free_page_type(struct page_info *page, unsigned long type,
                    int preemptible);
 
-void init_guest_l4_table(l4_pgentry_t[], const struct domain *);
+void init_guest_l4_table(l4_pgentry_t[], const struct domain *,
+                         bool_t zap_ro_mpt);
 
 int is_iomem_page(unsigned long mfn);
 
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -486,7 +486,12 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
 /* x86/PAE guests: support PDPTs above 4GB. */
 #define VMASST_TYPE_pae_extended_cr3     3
 
+/* x86/64 guests: strictly hide M2P from user mode. */
+#define VMASST_TYPE_m2p_strict           32
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040600
 #define MAX_VMASST_TYPE                  3
+#endif
 
 #ifndef __ASSEMBLY__
 
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -80,7 +80,8 @@ extern void guest_printk(const struct do
     __attribute__ ((format (printf, 2, 3)));
 extern void noreturn panic(const char *format, ...)
     __attribute__ ((format (printf, 1, 2)));
-extern long vm_assist(struct domain *, unsigned int, unsigned int);
+extern long vm_assist(struct domain *, unsigned int cmd, unsigned int type,
+                      unsigned long valid);
 extern int __printk_ratelimit(int ratelimit_ms, int ratelimit_burst);
 extern int printk_ratelimit(void);
 


Attachment: x86-m2p-strict.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.