[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Linux 2.6 now always uses writable page tables (even SMP builds). Also



ChangeSet 1.1321, 2005/03/18 15:14:45+00:00, kaf24@xxxxxxxxxxxxxxxxxxxx

        Linux 2.6 now always uses writable page tables (even SMP builds). Also
        use native definitions for atomic read-modify-write operations on
        ptes. Fixed instruction emulator in Xen.
        Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>



 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h           |   14 ---
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h |   34 
--------
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h        |   41 
++--------
 tools/tests/test_x86_emulator.c                                   |   15 +++
 xen/arch/x86/mm.c                                                 |   30 
++++---
 xen/arch/x86/x86_emulate.c                                        |   27 +++---
 xen/include/asm-x86/page.h                                        |   21 ++++-
 xen/include/asm-x86/x86_32/page.h                                 |    5 -
 xen/include/asm-x86/x86_64/page.h                                 |    9 +-
 9 files changed, 89 insertions(+), 107 deletions(-)


diff -Nru a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h 
b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h   2005-03-18 
11:03:51 -05:00
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h   2005-03-18 
11:03:51 -05:00
@@ -116,17 +116,11 @@
 }
 #define pgprot_val(x)  ((x).pgprot)
 
-static inline pte_t __pte(unsigned long x)
-{
-       if (x & 1) x = phys_to_machine(x);
-       return ((pte_t) { (x) });
-}
+#define __pte(x) ({ unsigned long _x = (x); \
+    (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
 #define __pte_ma(x)    ((pte_t) { (x) } )
-static inline pgd_t __pgd(unsigned long x)
-{
-       if ((x & 1)) x = phys_to_machine(x);
-       return ((pgd_t) { (x) });
-}
+#define __pgd(x) ({ unsigned long _x = (x); \
+    (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
 #define __pgprot(x)    ((pgprot_t) { (x) } )
 
 #endif /* !__ASSEMBLY__ */
diff -Nru a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h 
b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h 
2005-03-18 11:03:51 -05:00
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h 
2005-03-18 11:03:51 -05:00
@@ -13,41 +13,13 @@
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
  */
-#define set_pte_batched(pteptr, pteval) \
-       queue_l1_entry_update(pteptr, (pteval).pte_low)
-
-#ifdef CONFIG_SMP
-#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval).pte_low)
-#if 0
-do { \
-  (*(pteptr) = pteval); \
-  HYPERVISOR_xen_version(0); \
-} while (0)
-#endif
-#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval)
-#else
 #define set_pte(pteptr, pteval) (*(pteptr) = pteval)
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
-#endif
 #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
+#define set_pte_batched(pteptr, pteval) \
+       queue_l1_entry_update(pteptr, (pteval).pte_low)
 
-/*
- * A note on implementation of this atomic 'get-and-clear' operation.
- * This is actually very simple because Xen Linux can only run on a single
- * processor. Therefore, we cannot race other processors setting the 'accessed'
- * or 'dirty' bits on a page-table entry.
- * Even if pages are shared between domains, that is not a problem because
- * each domain will have separate page tables, with their own versions of
- * accessed & dirty state.
- */
-static inline pte_t ptep_get_and_clear(pte_t *xp)
-{
-       pte_t pte = *xp;
-       if (pte.pte_low)
-               set_pte(xp, __pte_ma(0));
-       return pte;
-}
-
+#define ptep_get_and_clear(xp) __pte_ma(xchg(&(xp)->pte_low, 0))
 #define pte_same(a, b)         ((a).pte_low == (b).pte_low)
 /*
  * We detect special mappings in one of two ways:
diff -Nru a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h 
b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h        
2005-03-18 11:03:51 -05:00
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h        
2005-03-18 11:03:51 -05:00
@@ -89,9 +89,6 @@
 # define VMALLOC_END   (FIXADDR_START-2*PAGE_SIZE)
 #endif
 
-extern void *high_memory;
-extern unsigned long vmalloc_earlyreserve;
-
 /*
  * The 4MB page is guessing..  Detailed in the infamous "Chapter H"
  * of the Pentium details, but assuming intel did the straightforward
@@ -214,7 +211,7 @@
 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
    can temporarily clear it. */
 #define pmd_present(x) (pmd_val(x))
-/* pmd_clear below */
+#define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
 #define pmd_bad(x)     ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & 
~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
 
 
@@ -254,34 +251,20 @@
 
 static inline int ptep_test_and_clear_dirty(pte_t *ptep)
 {
-       pte_t pte = *ptep;
-       int ret = pte_dirty(pte);
-       if (ret)
-               xen_l1_entry_update(ptep, pte_mkclean(pte).pte_low);
-       return ret;
+       if (!pte_dirty(*ptep))
+               return 0;
+       return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
 }
 
 static inline int ptep_test_and_clear_young(pte_t *ptep)
 {
-       pte_t pte = *ptep;
-       int ret = pte_young(pte);
-       if (ret)
-               xen_l1_entry_update(ptep, pte_mkold(pte).pte_low);
-       return ret;
+       if (!pte_young(*ptep))
+               return 0;
+       return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
 }
 
-static inline void ptep_set_wrprotect(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (pte_write(pte))
-               set_pte(ptep, pte_wrprotect(pte));
-}
-static inline void ptep_mkdirty(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (!pte_dirty(pte))
-               xen_l1_entry_update(ptep, pte_mkdirty(pte).pte_low);
-}
+static inline void ptep_set_wrprotect(pte_t *ptep)             { 
clear_bit(_PAGE_BIT_RW, &ptep->pte_low); }
+static inline void ptep_mkdirty(pte_t *ptep)                   { 
set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); }
 
 /*
  * Macro to mark a page protection value as "uncacheable".  On processors 
which do not support
@@ -316,11 +299,6 @@
 
 #define page_pte(page) page_pte_prot(page, __pgprot(0))
 
-#define pmd_clear(xp)  do {                                    \
-       set_pmd(xp, __pmd(0));                                  \
-       xen_flush_page_update_queue();                          \
-} while (0)
-
 #define pmd_large(pmd) \
 ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
 
@@ -416,7 +394,6 @@
  */
 #define update_mmu_cache(vma,address,pte) do { } while (0)
 #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-
 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
        do {                                                              \
                if (__dirty) {                                            \
diff -Nru a/tools/tests/test_x86_emulator.c b/tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   2005-03-18 11:03:51 -05:00
+++ b/tools/tests/test_x86_emulator.c   2005-03-18 11:03:51 -05:00
@@ -158,6 +158,21 @@
         goto fail;
     printf("okay\n");
 
+    printf("%-40s", "Testing btrl $0x1,(%edi)...");
+    instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01;
+    res         = 0x2233445F;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.edi    = (unsigned long)&res;
+    cr2         = regs.edi;
+    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    if ( (rc != 0) || 
+         (res != 0x2233445D) ||
+         ((regs.eflags&0x201) != 0x201) ||
+         (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
     return 0;
 
  fail:
diff -Nru a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c 2005-03-18 11:03:51 -05:00
+++ b/xen/arch/x86/mm.c 2005-03-18 11:03:51 -05:00
@@ -2560,18 +2560,15 @@
 /* Write page fault handler: check if guest is trying to modify a PTE. */
 int ptwr_do_page_fault(unsigned long addr)
 {
-    unsigned long    pte, pfn, l2e;
-    struct pfn_info *page;
-    l2_pgentry_t    *pl2e;
-    int              which, cpu = smp_processor_id();
-    u32              l2_idx;
-
-#ifdef __x86_64__
-    return 0; /* Writable pagetables need fixing for x86_64. */
-#endif
+    unsigned long       pte, pfn, l2e;
+    struct pfn_info    *page;
+    l2_pgentry_t       *pl2e;
+    int                 which, cpu = smp_processor_id();
+    u32                 l2_idx;
+    struct exec_domain *ed = current;
 
     /* Can't use linear_l2_table with external tables. */
-    BUG_ON(shadow_mode_external(current->domain));
+    BUG_ON(shadow_mode_external(ed->domain));
 
     /*
      * Attempt to read the PTE that maps the VA being accessed. By checking for
@@ -2595,6 +2592,15 @@
         return 0;
     }
 
+    /* x86/64: Writable pagetable code needs auditing. Use emulator for now. */
+#if defined(__x86_64__)
+    goto emulate;
+#endif
+
+    /* Writable pagetables are not yet SMP safe. Use emulator for now. */
+    if ( (ed->eid != 0) || (ed->ed_next_list != NULL) )
+        goto emulate;
+
     /* Get the L2 index at which this L1 p.t. is always mapped. */
     l2_idx = page->u.inuse.type_info & PGT_va_mask;
     if ( unlikely(l2_idx >= PGT_va_unknown) )
@@ -2640,7 +2646,7 @@
      * If last batch made no updates then we are probably stuck. Emulate this 
      * update to ensure we make progress.
      */
-    if ( (ptwr_info[cpu].ptinfo[which].prev_exec_domain == current) &&
+    if ( (ptwr_info[cpu].ptinfo[which].prev_exec_domain == ed) &&
          (ptwr_info[cpu].ptinfo[which].prev_nr_updates  == 0) )
     {
         /* Force non-emul next time, or we can get stuck emulating forever. */
@@ -2653,7 +2659,7 @@
     
     /* For safety, disconnect the L1 p.t. page from current space. */
     if ( (which == PTWR_PT_ACTIVE) && 
-         likely(!shadow_mode_enabled(current->domain)) )
+         likely(!shadow_mode_enabled(ed->domain)) )
     {
         *pl2e = mk_l2_pgentry(l2e & ~_PAGE_PRESENT);
         flush_tlb(); /* XXX Multi-CPU guests? */
diff -Nru a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        2005-03-18 11:03:51 -05:00
+++ b/xen/arch/x86/x86_emulate.c        2005-03-18 11:03:51 -05:00
@@ -18,12 +18,14 @@


-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now.
http://ads.osdn.com/?ad_id=6595&alloc_id=14396&op=click
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxxx
https://lists.sourceforge.net/lists/listinfo/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.