[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [x86_64] Guests no longer set _PAGE_USER on kernel mappings.



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID fc5736e0a2eb749a242ec1a750b8d7bf35095792
# Parent  d3a9bcf61c331b8985c32cd3d68e7ced25d4da64
[x86_64] Guests no longer set _PAGE_USER on kernel mappings.
This may allow guest kernels to be run outside ring 3 in future, and
also provides scope for optimisations today (e.g., using global bit on
user mappings).

Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c                |    3 
 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c                 |    6 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h |   21 +--
 tools/libxc/xc_linux_build.c                                   |    8 -
 xen/arch/x86/domain_build.c                                    |    8 -
 xen/arch/x86/mm.c                                              |   61 
+++++++++-
 6 files changed, 74 insertions(+), 33 deletions(-)

diff -r d3a9bcf61c33 -r fc5736e0a2eb 
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Tue Aug 22 15:13:07 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Tue Aug 22 15:26:40 
2006 +0100
@@ -282,9 +282,6 @@ void __iomem * __ioremap(unsigned long p
        area->phys_addr = phys_addr;
        addr = (void __iomem *) area->addr;
        flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
-#ifdef __x86_64__
-       flags |= _PAGE_USER;
-#endif
        if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
                                     phys_addr>>PAGE_SHIFT,
                                     size, __pgprot(flags), domid)) {
diff -r d3a9bcf61c33 -r fc5736e0a2eb 
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Tue Aug 22 15:13:07 
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Tue Aug 22 15:26:40 
2006 +0100
@@ -529,7 +529,7 @@ void __init xen_init_pt(void)
                mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
        level3_kernel_pgt[pud_index(__START_KERNEL_map)] = 
                __pud(__pa_symbol(level2_kernel_pgt) |
-                     _KERNPG_TABLE | _PAGE_USER);
+                     _KERNPG_TABLE);
        memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
 
        early_make_page_readonly(init_level4_pgt,
@@ -578,7 +578,7 @@ void __init extend_init_mapping(unsigned
                        pte_page = alloc_static_page(&phys);
                        early_make_page_readonly(
                                pte_page, XENFEAT_writable_page_tables);
-                       set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
+                       set_pmd(pmd, __pmd(phys | _KERNPG_TABLE));
                } else {
                        addr = page[pmd_index(va)];
                        addr_to_page(addr, pte_page);
@@ -587,7 +587,7 @@ void __init extend_init_mapping(unsigned
                if (pte_none(*pte)) {
                        new_pte = pfn_pte(
                                (va - __START_KERNEL_map) >> PAGE_SHIFT, 
-                               __pgprot(_KERNPG_TABLE | _PAGE_USER));
+                               __pgprot(_KERNPG_TABLE));
                        xen_l1_entry_update(pte, new_pte);
                }
                va += PAGE_SIZE;
diff -r d3a9bcf61c33 -r fc5736e0a2eb 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h    Tue Aug 
22 15:13:07 2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h    Tue Aug 
22 15:26:40 2006 +0100
@@ -206,7 +206,7 @@ static inline pte_t ptep_get_and_clear_f
 #define _PAGE_NX        (1UL<<_PAGE_BIT_NX)
 
 #define _PAGE_TABLE    (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED 
| _PAGE_DIRTY)
-#define _KERNPG_TABLE  _PAGE_TABLE
+#define _KERNPG_TABLE  (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | 
_PAGE_DIRTY)
 
 #define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 
@@ -219,22 +219,21 @@ static inline pte_t ptep_get_and_clear_f
 #define PAGE_READONLY  __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | 
_PAGE_NX)
 #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | 
_PAGE_ACCESSED)
 #define __PAGE_KERNEL \
-       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | 
_PAGE_USER )
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
 #define __PAGE_KERNEL_EXEC \
-       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER )
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define __PAGE_KERNEL_NOCACHE \
-       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | 
_PAGE_NX | _PAGE_USER )
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | 
_PAGE_NX)
 #define __PAGE_KERNEL_RO \
-       (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
+       (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
 #define __PAGE_KERNEL_VSYSCALL \
-       (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_USER )
+       (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define __PAGE_KERNEL_VSYSCALL_NOCACHE \
-       (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD | _PAGE_USER )
+       (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD)
 #define __PAGE_KERNEL_LARGE \
-       (__PAGE_KERNEL | _PAGE_PSE | _PAGE_USER )
+       (__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC \
-       (__PAGE_KERNEL_EXEC | _PAGE_PSE | _PAGE_USER )
-
+       (__PAGE_KERNEL_EXEC | _PAGE_PSE)
 
 /*
  * We don't support GLOBAL page in xenolinux64
@@ -423,7 +422,7 @@ static inline pud_t *pud_offset_k(pgd_t 
    can temporarily clear it. */
 #define pmd_present(x) (pmd_val(x))
 #define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
-#define        pmd_bad(x)      ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT)) 
!= (_KERNPG_TABLE & ~_PAGE_PRESENT))
+#define        pmd_bad(x)      ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & 
~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
 #define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
diff -r d3a9bcf61c33 -r fc5736e0a2eb tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Tue Aug 22 15:13:07 2006 +0100
+++ b/tools/libxc/xc_linux_build.c      Tue Aug 22 15:26:40 2006 +0100
@@ -16,15 +16,11 @@
 /* Handy for printing out '0' prepended values at native pointer size */
 #define _p(a) ((void *) ((ulong)a))
 
-#if defined(__i386__)
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#if defined(__i386__)
 #define L3_PROT (_PAGE_PRESENT)
-#endif
-
-#if defined(__x86_64__)
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#elif defined(__x86_64__)
 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #endif
diff -r d3a9bcf61c33 -r fc5736e0a2eb xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Tue Aug 22 15:13:07 2006 +0100
+++ b/xen/arch/x86/domain_build.c       Tue Aug 22 15:26:40 2006 +0100
@@ -66,15 +66,11 @@ static char opt_dom0_ioports_disable[200
 static char opt_dom0_ioports_disable[200] = "";
 string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
 
-#if defined(__i386__)
-/* No ring-3 access in initial leaf page tables. */
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#if CONFIG_PAGING_LEVELS == 3
 #define L3_PROT (_PAGE_PRESENT)
-#elif defined(__x86_64__)
-/* Allow ring-3 access in long mode as guest cannot use ring 1. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#elif CONFIG_PAGING_LEVELS == 4
 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #endif
diff -r d3a9bcf61c33 -r fc5736e0a2eb xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Aug 22 15:13:07 2006 +0100
+++ b/xen/arch/x86/mm.c Tue Aug 22 15:26:40 2006 +0100
@@ -707,6 +707,35 @@ get_page_from_l4e(
 }
 #endif /* 4 level */
 
+#ifdef __x86_64__
+#define adjust_guest_l1e(pl1e)                                  \
+    do  {                                                       \
+        if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) )    \
+            l1e_add_flags((pl1e), _PAGE_USER);                  \
+    } while ( 0 )
+
+#define adjust_guest_l2e(pl2e)                                  \
+    do {                                                        \
+        if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) )    \
+            l2e_add_flags((pl2e), _PAGE_USER);                  \
+    } while ( 0 )
+
+#define adjust_guest_l3e(pl3e)                                  \
+    do {                                                        \
+        if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) )    \
+            l3e_add_flags((pl3e), _PAGE_USER);                  \
+    } while ( 0 )
+
+#define adjust_guest_l4e(pl4e)                                  \
+    do {                                                        \
+        if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) )    \
+            l4e_add_flags((pl4e), _PAGE_USER);                  \
+    } while ( 0 )
+#else
+#define adjust_guest_l1e(_p) ((void)0)
+#define adjust_guest_l2e(_p) ((void)0)
+#define adjust_guest_l3e(_p) ((void)0)
+#endif
 
 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
 {
@@ -806,9 +835,13 @@ static int alloc_l1_table(struct page_in
     pl1e = map_domain_page(pfn);
 
     for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+    {
         if ( is_guest_l1_slot(i) &&
              unlikely(!get_page_from_l1e(pl1e[i], d)) )
             goto fail;
+
+        adjust_guest_l1e(pl1e[i]);
+    }
 
     unmap_domain_page(pl1e);
     return 1;
@@ -985,6 +1018,8 @@ static int alloc_l2_table(struct page_in
         if ( is_guest_l2_slot(type, i) &&
              unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
             goto fail;
+        
+        adjust_guest_l2e(pl2e[i]);
     }
 
 #if CONFIG_PAGING_LEVELS == 2
@@ -1053,6 +1088,8 @@ static int alloc_l3_table(struct page_in
         if ( is_guest_l3_slot(i) &&
              unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
             goto fail;
+        
+        adjust_guest_l3e(pl3e[i]);
     }
 
     if ( !create_pae_xen_mappings(pl3e) )
@@ -1093,6 +1130,8 @@ static int alloc_l4_table(struct page_in
         if ( is_guest_l4_slot(i) &&
              unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) )
             goto fail;
+
+        adjust_guest_l4e(pl4e[i]);
     }
 
     /* Xen private mappings. */
@@ -1254,6 +1293,8 @@ static int mod_l1_entry(l1_pgentry_t *pl
                     l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
             return 0;
         }
+
+        adjust_guest_l1e(nl1e);
 
         /* Fast path for identical mapping, r/w and presence. */
         if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
@@ -1336,6 +1377,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
             return 0;
         }
 
+        adjust_guest_l2e(nl2e);
+
         /* Fast path for identical mapping and presence. */
         if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
             return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn);
@@ -1397,6 +1440,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
                     l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
             return 0;
         }
+
+        adjust_guest_l3e(nl3e);
 
         /* Fast path for identical mapping and presence. */
         if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
@@ -1463,6 +1508,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
                     l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
             return 0;
         }
+
+        adjust_guest_l4e(nl4e);
 
         /* Fast path for identical mapping and presence. */
         if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
@@ -2402,7 +2449,7 @@ int do_mmu_update(
 
 
 static int create_grant_pte_mapping(
-    unsigned long pte_addr, l1_pgentry_t _nl1e, struct vcpu *v)
+    unsigned long pte_addr, l1_pgentry_t nl1e, struct vcpu *v)
 {
     int rc = GNTST_okay;
     void *va;
@@ -2414,6 +2461,8 @@ static int create_grant_pte_mapping(
 
     ASSERT(spin_is_locked(&d->big_lock));
 
+    adjust_guest_l1e(nl1e);
+
     gmfn = pte_addr >> PAGE_SHIFT;
     mfn = gmfn_to_mfn(d, gmfn);
 
@@ -2437,7 +2486,7 @@ static int create_grant_pte_mapping(
     }
 
     ol1e = *(l1_pgentry_t *)va;
-    if ( !update_l1e(va, ol1e, _nl1e, mfn, v) )
+    if ( !update_l1e(va, ol1e, nl1e, mfn, v) )
     {
         put_page_type(page);
         rc = GNTST_general_error;
@@ -2526,17 +2575,19 @@ static int destroy_grant_pte_mapping(
 
 
 static int create_grant_va_mapping(
-    unsigned long va, l1_pgentry_t _nl1e, struct vcpu *v)
+    unsigned long va, l1_pgentry_t nl1e, struct vcpu *v)
 {
     l1_pgentry_t *pl1e, ol1e;
     struct domain *d = v->domain;
     
     ASSERT(spin_is_locked(&d->big_lock));
 
+    adjust_guest_l1e(nl1e);
+
     pl1e = &linear_pg_table[l1_linear_offset(va)];
 
     if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ||
-         !update_l1e(pl1e, ol1e, _nl1e, 
+         !update_l1e(pl1e, ol1e, nl1e, 
                     l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) )
         return GNTST_general_error;
 
@@ -3139,6 +3190,8 @@ static int ptwr_emulated_update(
         }
     }
 
+    adjust_guest_l1e(nl1e);
+
     /* Checked successfully: do the update (write or cmpxchg). */
     pl1e = map_domain_page(page_to_mfn(page));
     pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.