[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Attached patch allows PAE xenlinux to run in the shadow mode using



# HG changeset patch
# User smh22@xxxxxxxxxxxxxxxxxxxx
# Node ID 995e94c4802e5c0376b0483f3b2473a8f7d7808e
# Parent  e023e37b3c7aca1d5702715eed4b586e267a75ee
Attached patch allows PAE xenlinux to run in the shadow mode using
log-dirty guest-refcount, which is required to support
save/restore/relocate. We can turn on/off the mode every 5-sec interval
while doing kernel build (make -j4), for example, and it survives for
hours. 

We are still restoring log-dirty mode for x86_64 xenlinux, which is not
very stable right now, but I believe it should be done very soon. 

We also checked that it did not break 64-bit VMX domains, which uses
different sub-mode of the shadow mode. 

Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Xiaohui Xin <xiaohui.xin@xxxxxxxxx>

diff -r e023e37b3c7a -r 995e94c4802e xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Fri Nov 11 18:02:49 2005
+++ b/xen/arch/x86/shadow.c     Fri Nov 11 18:11:13 2005
@@ -22,7 +22,7 @@
  * Jun Nakajima <jun.nakajima@xxxxxxxxx>
  * Chengyuan Li <chengyuan.li@xxxxxxxxx>
  *
- * Extended to support 64-bit guests.
+ * Extended to support 32-bit PAE and 64-bit guests.
  */
 
 #include <xen/config.h>
@@ -34,6 +34,7 @@
 #include <xen/event.h>
 #include <xen/sched.h>
 #include <xen/trace.h>
+#include <asm/shadow_64.h>
 
 extern void free_shadow_pages(struct domain *d);
 
@@ -44,13 +45,13 @@
 #endif
 
 #if CONFIG_PAGING_LEVELS == 3
-#include <asm/shadow_64.h>
 static unsigned long shadow_l3_table(
     struct domain *d, unsigned long gpfn, unsigned long gmfn);
+static inline void validate_bl2e_change( struct domain *d,
+    guest_root_pgentry_t *new_gle_p, pgentry_64_t *shadow_l3, int index);
 #endif
 
 #if CONFIG_PAGING_LEVELS == 4
-#include <asm/shadow_64.h>
 static unsigned long shadow_l4_table(
     struct domain *d, unsigned long gpfn, unsigned long gmfn);
 static void shadow_map_into_current(struct vcpu *v,
@@ -222,7 +223,7 @@
         {
             if (d->arch.ops->guest_paging_levels == PAGING_L2)
             {
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
                 /* For 32-bit VMX guest, 2 shadow L1s to simulate 1 guest L1
                  * So need allocate 2 continues shadow L1 each time.
                  */
@@ -313,6 +314,8 @@
             goto fail;
         perfc_incr(shadow_l3_pages);
         d->arch.shadow_page_count++;
+        if ( PGT_l3_page_table == PGT_root_page_table )
+            pin = 1;
         break;
 
     case PGT_l4_shadow:
@@ -375,7 +378,7 @@
     {
         if (d->arch.ops->guest_paging_levels == PAGING_L2)
         {
-#if CONFIG_PAGING_LEVELS >=4
+#if CONFIG_PAGING_LEVELS >=3
             free_domheap_pages(page, SL1_ORDER);
 #else
             free_domheap_page(page);
@@ -427,14 +430,10 @@
 
     hl2 = map_domain_page(hl2mfn);
 
-#ifdef __i386__
     if ( shadow_mode_external(d) )
         limit = L2_PAGETABLE_ENTRIES;
     else
         limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-#else
-    limit = 0; /* XXX x86/64 XXX */
-#endif
 
     memset(hl2, 0, limit * sizeof(l1_pgentry_t));
 
@@ -540,7 +539,7 @@
     SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
     return smfn;
 }
-#endif
+#endif /* CONFIG_PAGING_LEVELS == 2 */
 
 static void shadow_map_l1_into_current_l2(unsigned long va)
 {
@@ -549,7 +548,7 @@
     l1_pgentry_t *spl1e;
     l2_pgentry_t sl2e;
     guest_l1_pgentry_t *gpl1e;
-    guest_l2_pgentry_t gl2e;
+    guest_l2_pgentry_t gl2e = {0};
     unsigned long gl1pfn, gl1mfn, sl1mfn;
     int i, init_table = 0;
 
@@ -593,14 +592,14 @@
     ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
 #endif
 
-#if CONFIG_PAGING_LEVELS >=4
+#if CONFIG_PAGING_LEVELS >=3
     if (d->arch.ops->guest_paging_levels == PAGING_L2)
     {
-        /* for 32-bit VMX guest on 64-bit host,
+        /* for 32-bit VMX guest on 64-bit or PAE host,
          * need update two L2 entries each time
          */
         if ( !get_shadow_ref(sl1mfn))
-                BUG();
+            BUG();
         l2pde_general(d, &gl2e, &sl2e, sl1mfn);
         __guest_set_l2e(v, va, &gl2e);
         __shadow_set_l2e(v, va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &sl2e);
@@ -625,19 +624,17 @@
         int index = guest_l1_table_offset(va);
         int min = 1, max = 0;
 
-        unsigned long entries, pt_va;
-        l1_pgentry_t tmp_sl1e;
-        guest_l1_pgentry_t tmp_gl1e;//Prepare for double compile
-
-
-        entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t);
-        pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << 
L1_PAGETABLE_SHIFT;
-        gpl1e = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e);
+        unsigned long tmp_gmfn;
+        l2_pgentry_t tmp_sl2e = {0};
+        guest_l2_pgentry_t tmp_gl2e = {0};
+
+        __guest_get_l2e(v, va, &tmp_gl2e);
+        tmp_gmfn = __gpfn_to_mfn(d, l2e_get_pfn(tmp_gl2e));
+        gpl1e = (guest_l1_pgentry_t *) map_domain_page(tmp_gmfn);
 
         /* If the PGT_l1_shadow has two continual pages */
-        entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t); //1024 entry!!!
-        pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << 
L1_PAGETABLE_SHIFT;
-        spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e);
+        __shadow_get_l2e(v, va, &tmp_sl2e);
+        spl1e = (l1_pgentry_t *) map_domain_page(l2e_get_pfn(tmp_sl2e));
 
         for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ )
         {
@@ -662,10 +659,13 @@
             if ( likely(i > max) )
                 max = i;
             set_guest_back_ptr(d, sl1e, sl1mfn, i);
-          }
+        }
 
         frame_table[sl1mfn].tlbflush_timestamp =
             SHADOW_ENCODE_MIN_MAX(min, max);
+
+        unmap_domain_page(gpl1e);
+        unmap_domain_page(spl1e);
     }
 }
 
@@ -674,7 +674,7 @@
 {
     struct vcpu *v = current;
     struct domain *d = v->domain;
-    l2_pgentry_t sl2e;
+    l2_pgentry_t sl2e = {0};
 
     __shadow_get_l2e(v, va, &sl2e);
     if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
@@ -690,11 +690,23 @@
         }
         else /* check to see if it exists; if so, link it in */
         {
-            l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
-            unsigned long gl1pfn = l2e_get_pfn(gpde);
-            unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
-
-            ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
+            l2_pgentry_t gpde = {0};
+            unsigned long gl1pfn;
+            unsigned long sl1mfn;
+
+            __guest_get_l2e(v, va, &gpde);
+
+            if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
+            {
+                gl1pfn = l2e_get_pfn(gpde);
+                sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
+            }
+            else
+            {
+                // no shadow exists, so there's nothing to do.
+                perfc_incrc(shadow_set_l1e_fail);
+                return;
+            }
 
             if ( sl1mfn )
             {
@@ -738,7 +750,7 @@
     shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
 }
 
-#if CONFIG_PAGING_LEVELS <= 3
+#if CONFIG_PAGING_LEVELS == 2
 static void shadow_invlpg_32(struct vcpu *v, unsigned long va)
 {
     struct domain *d = v->domain;
@@ -767,7 +779,7 @@
 
     shadow_unlock(d);
 }
-#endif
+#endif /* CONFIG_PAGING_LEVELS == 2 */
 
 static struct out_of_sync_entry *
 shadow_alloc_oos_entry(struct domain *d)
@@ -996,7 +1008,10 @@
 
     if (__copy_from_user(&gpte, &guest_pt[index],
                          sizeof(gpte)))
+    {
+        unmap_domain_page(snapshot);
         return 0;
+    }
 
     // This could probably be smarter, but this is sufficent for
     // our current needs.
@@ -1021,7 +1036,7 @@
 static int is_out_of_sync(struct vcpu *v, unsigned long va) /* 
__shadow_out_of_sync */
 {
     struct domain *d = v->domain;
-#if defined (__x86_64__)
+#if CONFIG_PAGING_LEVELS == 4
     unsigned long l2mfn = ((v->arch.flags & TF_kernel_mode)?
                           pagetable_get_pfn(v->arch.guest_table) :
                           pagetable_get_pfn(v->arch.guest_table_user));
@@ -1032,16 +1047,21 @@
     guest_l2_pgentry_t l2e;
     unsigned long l1pfn, l1mfn;
     guest_l1_pgentry_t *guest_pt;
-    guest_l1_pgentry_t tmp_gle;
-    unsigned long pt_va;
 
     ASSERT(shadow_lock_is_acquired(d));
     ASSERT(VALID_M2P(l2pfn));
 
     perfc_incrc(shadow_out_of_sync_calls);
 
-#if CONFIG_PAGING_LEVELS >= 4
-    if (d->arch.ops->guest_paging_levels == PAGING_L4) { /* Mode F */
+#if CONFIG_PAGING_LEVELS >= 3
+
+#define unmap_and_return(x)                                         \
+    if ( guest_pt != (guest_l1_pgentry_t *) v->arch.guest_vtable )  \
+        unmap_domain_page(guest_pt);                                \
+    return (x);
+
+    if (d->arch.ops->guest_paging_levels >= PAGING_L3) 
+    { 
         pgentry_64_t le;
         unsigned long gmfn;
         unsigned long gpfn;
@@ -1051,37 +1071,57 @@
         gpfn = l2pfn;
         guest_pt = (guest_l1_pgentry_t *)v->arch.guest_vtable;
 
-        for (i = PAGING_L4; i >= PAGING_L3; i--) {
+        for ( i = PAGING_L4; i >= PAGING_L3; i-- ) 
+        {
+            if (d->arch.ops->guest_paging_levels == PAGING_L3 
+                && i == PAGING_L4)
+                continue;       /* skip the top-level for 3-level */
+
             if ( page_out_of_sync(&frame_table[gmfn]) &&
-              !snapshot_entry_matches(
-                  d, guest_pt, gpfn, table_offset_64(va, i)) )
-                return 1;
-
+                 !snapshot_entry_matches(
+                     d, guest_pt, gpfn, table_offset_64(va, i)) )
+            {
+                unmap_and_return (1);
+            }
+
+            le = entry_empty();
             __rw_entry(v, va, &le, GUEST_ENTRY | GET_ENTRY | i);
+
             if ( !(entry_get_flags(le) & _PAGE_PRESENT) )
-                return 0;
+            {
+                unmap_and_return (0);
+            }
             gpfn = entry_get_pfn(le);
             gmfn = __gpfn_to_mfn(d, gpfn);
             if ( !VALID_MFN(gmfn) )
-                return 0;
-            /* Todo: check!*/
+            {
+                unmap_and_return (0);
+            }
+            if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable )
+                unmap_domain_page(guest_pt);
             guest_pt = (guest_l1_pgentry_t *)map_domain_page(gmfn);
-
         }
 
         /* L2 */
         if ( page_out_of_sync(&frame_table[gmfn]) &&
              !snapshot_entry_matches(d, guest_pt, gpfn, l2_table_offset(va)) )
+        {
+            unmap_and_return (1);
+        }
+
+        if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable )
+            unmap_domain_page(guest_pt);
+
+    } 
+    else
+#undef unmap_and_return
+#endif /* CONFIG_PAGING_LEVELS >= 3 */
+    {
+        if ( page_out_of_sync(&frame_table[l2mfn]) &&
+             !snapshot_entry_matches(d, (guest_l1_pgentry_t 
*)v->arch.guest_vtable,
+                                     l2pfn, guest_l2_table_offset(va)) )
             return 1;
-
-
-    } else
-#endif
-
-    if ( page_out_of_sync(&frame_table[l2mfn]) &&
-         !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable,
-                                 l2pfn, guest_l2_table_offset(va)) )
-        return 1;
+    }
 
     __guest_get_l2e(v, va, &l2e);
     if ( !(guest_l2e_get_flags(l2e) & _PAGE_PRESENT) ||
@@ -1095,15 +1135,17 @@
     if ( !VALID_MFN(l1mfn) )
         return 0;
 
-    pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(GUEST_L1_PAGETABLE_ENTRIES - 1))
-      << L1_PAGETABLE_SHIFT;
-    guest_pt = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle);
+    guest_pt = (guest_l1_pgentry_t *) map_domain_page(l1mfn);
 
     if ( page_out_of_sync(&frame_table[l1mfn]) &&
          !snapshot_entry_matches(
-             d, guest_pt, l1pfn, guest_l1_table_offset(va)) )
+             d, guest_pt, l1pfn, guest_l1_table_offset(va)) ) 
+    {
+        unmap_domain_page(guest_pt);
         return 1;
-
+    }
+
+    unmap_domain_page(guest_pt);
     return 0;
 }
 
@@ -1257,7 +1299,7 @@
     }
 
     if ( shadow_mode_external(d) ) {
-        if (write_refs-- == 0)
+        if (--write_refs == 0)
             return 0;
 
          // Use the back pointer to locate the shadow page that can contain
@@ -1314,6 +1356,8 @@
 
     for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
     {
+        int max = -1;
+
         if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
             continue;
 
@@ -1335,7 +1379,7 @@
                 continue;
         }
 
-        FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
+       FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
                 stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
 
         // Compare guest's new contents to its snapshot, validating
@@ -1373,10 +1417,8 @@
 
             if ( !shadow_mode_refcounts(d) )
                 revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t 
*)snapshot1);
-
             if ( !smfn )
                 break;
-
 
             changed = 0;
 
@@ -1405,12 +1447,13 @@
             perfc_incrc(resync_l1);
             perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
             perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, 
PT_UPDATES);
-            if ( d->arch.ops->guest_paging_levels == PAGING_L4 &&
+            if ( d->arch.ops->guest_paging_levels >= PAGING_L3 &&
                  unshadow_l1 ) {
-                pgentry_64_t l2e;
+                pgentry_64_t l2e = {0};
 
                 __shadow_get_l2e(entry->v, entry->va, &l2e);
-                if (entry_get_flags(l2e) & _PAGE_PRESENT) {
+
+                if ( entry_get_flags(l2e) & _PAGE_PRESENT ) {
                     entry_remove_flags(l2e, _PAGE_PRESENT);
                     __shadow_set_l2e(entry->v, entry->va, &l2e);
 
@@ -1421,11 +1464,9 @@
 
             break;
         }
-#if defined (__i386__)
+#if CONFIG_PAGING_LEVELS == 2
         case PGT_l2_shadow:
         {
-            int max = -1;
-
             l2_pgentry_t *guest2 = guest;
             l2_pgentry_t *shadow2 = shadow;
             l2_pgentry_t *snapshot2 = snapshot;
@@ -1436,9 +1477,6 @@
             changed = 0;
             for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
             {
-#if CONFIG_X86_PAE
-                BUG();  /* FIXME: need type_info */
-#endif
                 if ( !is_guest_l2_slot(0,i) && !external )
                     continue;
 
@@ -1482,9 +1520,6 @@
             changed = 0;
             for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
             {
-#if CONFIG_X86_PAE
-                BUG();  /* FIXME: need type_info */
-#endif
                 if ( !is_guest_l2_slot(0, i) && !external )
                     continue;
 
@@ -1505,7 +1540,7 @@
             perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
             break;
         }
-#else
+#elif CONFIG_PAGING_LEVELS >= 3
         case PGT_l2_shadow:
         case PGT_l3_shadow:
         {
@@ -1521,19 +1556,35 @@
                       guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
                 {
                     need_flush |= validate_entry_change(
-                      d, &guest_pt[i], &shadow_pt[i],
-                      shadow_type_to_level(stype));
+                        d, &guest_pt[i], &shadow_pt[i],
+                        shadow_type_to_level(stype));
                     changed++;
                 }
+#if CONFIG_PAGING_LEVELS == 3
+                if ( stype == PGT_l3_shadow ) 
+                {
+                    if ( entry_get_value(guest_pt[i]) != 0 ) 
+                        max = i;
+
+                    if ( !(entry_get_flags(guest_pt[i]) & _PAGE_PRESENT) &&
+                         unlikely(entry_get_value(guest_pt[i]) != 0) &&
+                         !unshadow &&
+                         (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
+                        unshadow = 1;
+                }
+#endif
             }
+
+            if ( d->arch.ops->guest_paging_levels == PAGING_L3
+                 && max == -1 && stype == PGT_l3_shadow )
+                unshadow = 1;
+
+            perfc_incrc(resync_l3);
+            perfc_incr_histo(shm_l3_updates, changed, PT_UPDATES);
             break;
-
-
         }
         case PGT_l4_shadow:
         {
-            int max = -1;
-
             guest_root_pgentry_t *guest_root = guest;
             l4_pgentry_t *shadow4 = shadow;
             guest_root_pgentry_t *snapshot_root = snapshot;
@@ -1547,7 +1598,8 @@
                 if ( root_entry_has_changed(
                         new_root_e, snapshot_root[i], PAGE_FLAG_MASK))
                 {
-                    if (d->arch.ops->guest_paging_levels == PAGING_L4) {
+                    if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) 
+                    {
                         need_flush |= validate_entry_change(
                           d, (pgentry_64_t *)&new_root_e,
                           (pgentry_64_t *)&shadow4[i], 
shadow_type_to_level(stype));
@@ -1563,9 +1615,9 @@
 
                 //  Need a better solution in the long term.
                 if ( !(guest_root_get_flags(new_root_e) & _PAGE_PRESENT) &&
-                  unlikely(guest_root_get_intpte(new_root_e) != 0) &&
-                  !unshadow &&
-                  (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
+                     unlikely(guest_root_get_intpte(new_root_e) != 0) &&
+                     !unshadow &&
+                     (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
                     unshadow = 1;
             }
             if ( max == -1 )
@@ -1575,7 +1627,7 @@
             break;
         }
 
-#endif
+#endif /* CONFIG_PAGING_LEVELS >= 3 */
         default:
             BUG();
         }
@@ -1589,7 +1641,7 @@
         {
             perfc_incrc(unshadow_l2_count);
             shadow_unpin(smfn);
-#if defined (__i386__)
+#if CONFIG_PAGING_LEVELS == 2
             if ( unlikely(shadow_mode_external(d)) )
             {
                 unsigned long hl2mfn;
@@ -1660,19 +1712,24 @@
     // Second, resync all L1 pages, then L2 pages, etc...
     //
     need_flush |= resync_all(d, PGT_l1_shadow);
-#if defined (__i386__)
-    if ( shadow_mode_translate(d) )
+
+#if CONFIG_PAGING_LEVELS == 2
+    if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
+         shadow_mode_translate(d) )  
+    {
         need_flush |= resync_all(d, PGT_hl2_shadow);
-#endif
-
-    /*
-     * Fixme: for i386 host
-     */
-    if (d->arch.ops->guest_paging_levels == PAGING_L4) {
-        need_flush |= resync_all(d, PGT_l2_shadow);
+    }
+#endif
+
+    need_flush |= resync_all(d, PGT_l2_shadow);
+
+#if CONFIG_PAGING_LEVELS >= 3
+    if (d->arch.ops->guest_paging_levels >= PAGING_L3) 
+    {
         need_flush |= resync_all(d, PGT_l3_shadow);
-    }
-    need_flush |= resync_all(d, PGT_l4_shadow);
+        need_flush |= resync_all(d, PGT_l4_shadow);
+    }
+#endif
 
     if ( need_flush && !unlikely(shadow_mode_external(d)) )
         local_flush_tlb();
@@ -1749,7 +1806,7 @@
 
     return 1;
 }
-#if CONFIG_PAGING_LEVELS <= 3
+#if CONFIG_PAGING_LEVELS == 2
 static int shadow_fault_32(unsigned long va, struct cpu_user_regs *regs)
 {
     l1_pgentry_t gpte, spte, orig_gpte;
@@ -1888,7 +1945,20 @@
     shadow_unlock(d);
     return 0;
 }
-#endif
+#endif /* CONFIG_PAGING_LEVELS == 2 */
+
+static inline unsigned long va_to_l1mfn(struct vcpu *v, unsigned long va)
+{
+    struct domain *d = v->domain;
+    guest_l2_pgentry_t gl2e = {0};
+
+    __guest_get_l2e(v, va, &gl2e);
+    
+    if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT)) )
+        return INVALID_MFN;
+
+    return __gpfn_to_mfn(d, l2e_get_pfn(gl2e));
+}
 
 static int do_update_va_mapping(unsigned long va,
                                 l1_pgentry_t val,
@@ -1899,8 +1969,6 @@
     int rc = 0;
 
     shadow_lock(d);
-
-    //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void 
*)l1e_get_intpte(val));
 
     // This is actually overkill - we don't need to sync the L1 itself,
     // just everything involved in getting to this L1 (i.e. we need
@@ -1919,7 +1987,6 @@
     if ( shadow_mode_log_dirty(d) )
         __mark_dirty(d, va_to_l1mfn(v, va));
 
-// out:
     shadow_unlock(d);
 
     return rc;
@@ -1955,7 +2022,7 @@
 static void shadow_update_pagetables(struct vcpu *v)
 {
     struct domain *d = v->domain;
-#if defined (__x86_64__)
+#if CONFIG_PAGING_LEVELS == 4
     unsigned long gmfn = ((v->arch.flags & TF_kernel_mode)?
                           pagetable_get_pfn(v->arch.guest_table) :
                           pagetable_get_pfn(v->arch.guest_table_user));
@@ -1991,7 +2058,8 @@
     /*
      *  arch.shadow_table
      */
-    if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) {
+    if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) 
+    {
 #if CONFIG_PAGING_LEVELS == 2
         smfn = shadow_l2_table(d, gpfn, gmfn);
 #elif CONFIG_PAGING_LEVELS == 3
@@ -2013,7 +2081,7 @@
      * arch.shadow_vtable
      */
     if ( max_mode == SHM_external
-#if CONFIG_PAGING_LEVELS >=4
+#if CONFIG_PAGING_LEVELS >=3
          || max_mode & SHM_enable
 #endif
         )
@@ -2068,7 +2136,7 @@
         // XXX - maybe this can be optimized somewhat??
         local_flush_tlb();
     }
-#endif
+#endif /* CONFIG_PAGING_LEVELS == 2 */
 
 #if CONFIG_PAGING_LEVELS == 3
     /* FIXME: PAE code to be written */
@@ -2373,7 +2441,7 @@
                l2e_get_intpte(match));
     }
 
-#ifdef __i386__
+#if CONFIG_PAGING_LEVELS == 2
     if ( shadow_mode_external(d) )
         limit = L2_PAGETABLE_ENTRIES;
     else
@@ -2405,7 +2473,7 @@
 int _check_pagetable(struct vcpu *v, char *s)
 {
     struct domain *d = v->domain;
-#if defined (__x86_64__)
+#if CONFIG_PAGING_LEVELS == 4
     pagetable_t pt = ((v->arch.flags & TF_kernel_mode)?
                       v->arch.guest_table : v->arch.guest_table_user);
 #else
@@ -2447,7 +2515,7 @@
     spl2e = (l2_pgentry_t *) map_domain_page(smfn);
 
     /* Go back and recurse. */
-#ifdef __i386__
+#if CONFIG_PAGING_LEVELS == 2
     if ( shadow_mode_external(d) )
         limit = L2_PAGETABLE_ENTRIES;
     else
@@ -2551,59 +2619,108 @@
 
 #if CONFIG_PAGING_LEVELS == 3
 static unsigned long shadow_l3_table(
-  struct domain *d, unsigned long gpfn, unsigned long gmfn)
-{
-    BUG();                      /* not implemenated yet */
-    return 42;
-}
+    struct domain *d, unsigned long gpfn, unsigned long gmfn)
+{
+    unsigned long smfn;
+    l3_pgentry_t *spl3e;
+
+    perfc_incrc(shadow_l3_table_count);
+
+    if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) )
+    {
+        printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, 
gmfn);
+        BUG(); /* XXX Deal gracefully with failure. */
+    }
+
+    spl3e = (l3_pgentry_t *)map_domain_page(smfn);
+
+    /* Make the self entry */
+    spl3e[PAE_SHADOW_SELF_ENTRY] = l3e_from_pfn(smfn, __PAGE_HYPERVISOR);
+
+    if ( (PGT_base_page_table == PGT_l3_page_table) &&
+         !shadow_mode_external(d) ) {
+        int i;
+        unsigned long g2mfn, s2mfn;
+        l2_pgentry_t *spl2e;
+        l3_pgentry_t *gpl3e;
+
+        /* Get the top entry */
+        gpl3e = (l3_pgentry_t *)map_domain_page(gmfn);
+
+        if ( !(l3e_get_flags(gpl3e[L3_PAGETABLE_ENTRIES - 1]) & _PAGE_PRESENT) 
)
+        {
+            BUG();
+        }
+
+        g2mfn = l3e_get_pfn(gpl3e[L3_PAGETABLE_ENTRIES - 1]);
+
+        /* NB. g2mfn should be same as g2pfn */
+        if (!(s2mfn = __shadow_status(d, g2mfn, PGT_l2_shadow))) {
+            if ( unlikely(!(s2mfn =
+                    alloc_shadow_page(d, g2mfn, g2mfn, PGT_l2_shadow))) ) {
+                printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
+                    g2mfn, g2mfn);
+                BUG(); /* XXX Deal gracefully with failure. */
+            }
+
+            if (!get_shadow_ref(s2mfn))
+                BUG();
+        } 
+            
+        /* Map shadow L2 into shadow L3 */
+        spl3e[L3_PAGETABLE_ENTRIES - 1] = l3e_from_pfn(s2mfn, _PAGE_PRESENT);
+        shadow_update_min_max(smfn, L3_PAGETABLE_ENTRIES -1);
+
+        /*  
+         * Xen private mappings. Do the similar things as
+         * create_pae_xen_mappings().
+         */
+        spl2e = (l2_pgentry_t *)map_domain_page(s2mfn);
+
+        /*
+         * When we free L2 pages, we need to tell if the page contains
+         * Xen private mappings. Use the va_mask part.
+         */
+        frame_table[s2mfn].u.inuse.type_info |= 
+            (unsigned long) 3 << PGT_score_shift; 
+
+        memset(spl2e, 0, 
+               (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)) * 
sizeof(l2_pgentry_t));
+
+        memcpy(&spl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
+           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));       
+
+        for ( i = 0; i < (PERDOMAIN_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
+            spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+                l2e_from_page(
+                    
virt_to_page(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt) + i, 
+                    __PAGE_HYPERVISOR);
+        for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
+            spl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+                (l3e_get_flags(gpl3e[i]) & _PAGE_PRESENT) ?
+                l2e_from_pfn(l3e_get_pfn(gpl3e[i]), __PAGE_HYPERVISOR) :
+                l2e_empty();
+       
+        unmap_domain_page(spl2e);
+        unmap_domain_page(gpl3e);
+    }
+    unmap_domain_page(spl3e);
+
+    return smfn;
+}
+
 static unsigned long gva_to_gpa_pae(unsigned long gva)
 {
     BUG();
     return 43;
 }
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 4
+#endif /* CONFIG_PAGING_LEVELS == 3 */
+
+#if CONFIG_PAGING_LEVELS == 4
 /****************************************************************************/
 /* 64-bit shadow-mode code testing */
 /****************************************************************************/
-/*
- * validate_bl2e_change()
- * The code is for 32-bit VMX gues on 64-bit host.
- * To sync guest L2.
- */
-static inline void
-validate_bl2e_change(
-  struct domain *d,
-  guest_root_pgentry_t *new_gle_p,
-  pgentry_64_t *shadow_l3,
-  int index)
-{
-    int sl3_idx, sl2_idx;
-    unsigned long sl2mfn, sl1mfn;
-    pgentry_64_t *sl2_p;
-
-    /* Using guest l2 pte index to get shadow l3&l2 index
-     * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512
-     */
-    sl3_idx = index / (PAGETABLE_ENTRIES / 2);
-    sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2;
-
-    sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]);
-    sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn);
-
-    validate_pde_change(
-        d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]);
-
-    /* Mapping the second l1 shadow page */
-    if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) {
-       sl1mfn = entry_get_pfn(sl2_p[sl2_idx]);
-       sl2_p[sl2_idx + 1] =
-            entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx]));
-    }
-    unmap_domain_page(sl2_p);
-}
-
 /*
  * init_bl2() is for 32-bit VMX guest on 64-bit host
  * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2
@@ -2699,6 +2816,47 @@
     ESH_LOG("shadow_l4_table(%lx -> %lx)", gmfn, smfn);
     return smfn;
 }
+#endif /* CONFIG_PAGING_LEVELS == 4 */
+
+#if CONFIG_PAGING_LEVELS >= 3
+/*
+ * validate_bl2e_change()
+ * The code is for 32-bit VMX gues on 64-bit host.
+ * To sync guest L2.
+ */
+
+static inline void
+validate_bl2e_change(
+    struct domain *d,
+    guest_root_pgentry_t *new_gle_p,
+    pgentry_64_t *shadow_l3,
+    int index)
+{
+    int sl3_idx, sl2_idx;
+    unsigned long sl2mfn, sl1mfn;
+    pgentry_64_t *sl2_p;
+
+    /* Using guest l2 pte index to get shadow l3&l2 index
+     * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512
+     */
+    sl3_idx = index / (PAGETABLE_ENTRIES / 2);
+    sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2;
+
+    sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]);
+    sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn);
+
+    validate_pde_change(
+        d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]);
+
+    /* Mapping the second l1 shadow page */
+    if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) {
+       sl1mfn = entry_get_pfn(sl2_p[sl2_idx]);
+       sl2_p[sl2_idx + 1] =
+            entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx]));
+    }
+    unmap_domain_page(sl2_p);
+
+}
 
 /*
  * This shadow_mark_va_out_of_sync() is for 2M page shadow
@@ -2714,7 +2872,6 @@
     if ( !get_shadow_ref(writable_pl1e >> L1_PAGETABLE_SHIFT) )
         BUG();
 }
-
 
 static int get_shadow_mfn(struct domain *d, unsigned long gpfn, unsigned long 
*spmfn, u32 flag)
 {
@@ -2764,7 +2921,7 @@
 static void shadow_map_into_current(struct vcpu *v,
   unsigned long va, unsigned int from, unsigned int to)
 {
-    pgentry_64_t gle, sle;
+    pgentry_64_t gle = {0}, sle;
     unsigned long gpfn, smfn;
 
     if (from == PAGING_L1 && to == PAGING_L2) {
@@ -2836,8 +2993,9 @@
 }
 
 
-static void shadow_set_l1e_64(unsigned long va, pgentry_64_t *sl1e_p,
-  int create_l1_shadow)
+static void shadow_set_l1e_64(
+    unsigned long va, pgentry_64_t *sl1e_p,
+    int create_l1_shadow)
 {
     struct vcpu *v = current;
     struct domain *d = v->domain;
@@ -2848,19 +3006,21 @@
     int i;
     unsigned long orig_va = 0;
 
-    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
+    if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) 
+    {
         /* This is for 32-bit VMX guest on 64-bit host */
         orig_va = va;
         va = va & (~((1<<L2_PAGETABLE_SHIFT_32)-1));
     }
 
-    for (i = PAGING_L4; i >= PAGING_L2; i--) {
+    for (i = PAGING_L4; i >= PAGING_L2; i--) 
+    {
         if (!__rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i)) {
             printk("<%s> i = %d\n", __func__, i);
             BUG();
         }
-        if (!(entry_get_flags(sle) & _PAGE_PRESENT)) {
-            if (create_l1_shadow) {
+        if ( !(entry_get_flags(sle) & _PAGE_PRESENT) ) {
+            if ( create_l1_shadow ) {
                 perfc_incrc(shadow_set_l3e_force_map);
                 shadow_map_into_current(v, va, i-1, i);
                 __rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i);
@@ -2870,12 +3030,12 @@
 #endif
             }
         }
-        if(i < PAGING_L4)
+        if( i < PAGING_L4 )
             shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, 
i));
         sle_up = sle;
     }
 
-    if (d->arch.ops->guest_paging_levels == PAGING_L2) {
+    if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) {
         va = orig_va;
     }
 
@@ -2914,7 +3074,7 @@
     l1_pgentry_t sl1e;
     l1_pgentry_t old_sl1e;
     l2_pgentry_t sl2e;
-    unsigned long nx = 0;
+    u64 nx = 0;
     int put_ref_check = 0;
     /* Check if gpfn is 2M aligned */
 
@@ -2929,7 +3089,7 @@
     l2e_remove_flags(tmp_l2e, _PAGE_PSE);
     if (l2e_get_flags(gl2e) & _PAGE_NX) {
         l2e_remove_flags(tmp_l2e, _PAGE_NX);
-        nx = 1UL << 63;
+        nx = 1ULL << 63;
     }
 
 
@@ -3037,114 +3197,161 @@
  * else return 0.
  */
 #if defined( GUEST_PGENTRY_32 )
-static inline int guest_page_fault(struct vcpu *v,
-  unsigned long va, unsigned int error_code,
-  guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
+static inline int guest_page_fault(
+    struct vcpu *v,
+    unsigned long va, unsigned int error_code,
+    guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
 {
     /* The following check for 32-bit guest on 64-bit host */
 
     __guest_get_l2e(v, va, gpl2e);
 
     /* Check the guest L2 page-table entry first*/
-    if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)))
+    if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)) )
         return 1;
 
-    if (error_code & ERROR_W) {
-        if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)))
+    if ( error_code & ERROR_W ) 
+    {
+        if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)) )
             return 1;
     }
-    if (error_code & ERROR_U) {
-        if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)))
+
+    if ( error_code & ERROR_U ) 
+    {
+        if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)) )
             return 1;
     }
 
-    if (guest_l2e_get_flags(*gpl2e) & _PAGE_PSE)
+    if ( guest_l2e_get_flags(*gpl2e) & _PAGE_PSE )
         return 0;
 
     __guest_get_l1e(v, va, gpl1e);
 
     /* Then check the guest L1 page-table entry */
-    if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)))
+    if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)) )
         return 1;
 
-    if (error_code & ERROR_W) {
-        if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)))
+    if ( error_code & ERROR_W ) 
+    {
+        if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)) )
             return 1;
     }
-    if (error_code & ERROR_U) {
-        if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)))
+
+    if ( error_code & ERROR_U ) 
+    {
+        if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)) )
             return 1;
     }
 
     return 0;
 }
 #else
-static inline int guest_page_fault(struct vcpu *v,
-  unsigned long va, unsigned int error_code,
-  guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
+static inline int guest_page_fault(
+    struct vcpu *v,
+    unsigned long va, unsigned int error_code,
+    guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e)
 {
     struct domain *d = v->domain;
-    pgentry_64_t gle, *lva;
-    unsigned long mfn;
+    pgentry_64_t gle;
+    unsigned long gpfn = 0, mfn;
     int i;
 
-    __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4);
-    if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
-        return 1;
-
-    if (error_code & ERROR_W) {
-        if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
+    ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 );
+
+#if CONFIG_PAGING_LEVELS == 4
+    if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) 
+    {
+        __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4);
+        if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) )
             return 1;
-    }
-    if (error_code & ERROR_U) {
-        if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
-            return 1;
-    }
-    for (i = PAGING_L3; i >= PAGING_L1; i--) {
+
+        if ( error_code & ERROR_W )
+        {
+            if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) )
+                return 1;
+        }
+
+        if ( error_code & ERROR_U )
+        {
+            if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) )
+                return 1;
+        }
+        gpfn = entry_get_pfn(gle);
+    }
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3
+    if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) 
+    {
+        gpfn = pagetable_get_pfn(v->arch.guest_table);
+    }
+#endif
+
+    for ( i = PAGING_L3; i >= PAGING_L1; i-- ) 
+    {
+        pgentry_64_t *lva;
         /*
          * If it's not external mode, then mfn should be machine physical.
          */
-        mfn = __gpfn_to_mfn(d, (entry_get_value(gle) >> PAGE_SHIFT));
-
-        lva = (pgentry_64_t *) phys_to_virt(
-          mfn << PAGE_SHIFT);
+        mfn = __gpfn_to_mfn(d, gpfn);
+
+        lva = (pgentry_64_t *) map_domain_page(mfn);
         gle = lva[table_offset_64(va, i)];
-
-        if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)))
+        unmap_domain_page(lva);
+
+        gpfn = entry_get_pfn(gle);
+
+        if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) )
             return 1;
 
-        if (error_code & ERROR_W) {
-            if (unlikely(!(entry_get_flags(gle) & _PAGE_RW)))
-                return 1;
-        }
-        if (error_code & ERROR_U) {
-            if (unlikely(!(entry_get_flags(gle) & _PAGE_USER)))
-                return 1;
-        }
-
-        if (i == PAGING_L2) {
-            if (gpl2e)
+        if ( i < PAGING_L3 ) 
+        {
+            if ( error_code & ERROR_W ) 
+            {
+                if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) ) 
+                {
+                    if ( i == PAGING_L1 )
+                        if ( gpl1e )
+                            gpl1e->l1 = gle.lo;
+                    return 1;
+                }
+            }
+            if ( error_code & ERROR_U ) 
+            {
+                if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) )
+                    return 1;
+            }
+        }
+
+        if ( i == PAGING_L2 ) 
+        {
+            if ( gpl2e )
                 gpl2e->l2 = gle.lo;
-
-            if (likely(entry_get_flags(gle) & _PAGE_PSE))
+            if ( likely(entry_get_flags(gle) & _PAGE_PSE) )
                 return 0;
-
-        }
-
-        if (i == PAGING_L1)
-            if (gpl1e)
+        }
+
+        if ( i == PAGING_L1 )
+            if ( gpl1e )
                 gpl1e->l1 = gle.lo;
     }
+
     return 0;
-}
-#endif
+
+}
+#endif
+
 static int shadow_fault_64(unsigned long va, struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
     struct domain *d = v->domain;
     guest_l2_pgentry_t gl2e;
-    guest_l1_pgentry_t gl1e;
+    guest_l1_pgentry_t gl1e, orig_gl1e;
     l1_pgentry_t sl1e;
+
+    gl1e = guest_l1e_empty(); gl2e = guest_l2e_empty();
+
+    sl1e = l1e_empty();
 
     perfc_incrc(shadow_fault_calls);
 
@@ -3156,7 +3363,7 @@
      */
     shadow_lock(d);
 
-    /* XXX - FIX THIS COMMENT!!!
+    /*
      * STEP 1. Check to see if this fault might have been caused by an
      *         out-of-sync table page entry, or if we should pass this
      *         fault onto the guest.
@@ -3166,66 +3373,121 @@
     /*
      * STEP 2. Check if the fault belongs to guest
      */
-    if ( guest_page_fault(
-            v, va, regs->error_code, &gl2e, &gl1e) ) {
+    if ( guest_page_fault(v, va, regs->error_code, &gl2e, &gl1e) ) 
+    {
+        if ( unlikely(shadow_mode_log_dirty(d)) && l1e_get_intpte(gl1e) != 0 )
+            goto check_writeable;
+        
         goto fail;
     }
 
-    if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) {
-        /*
-         * Handle 4K pages here
-         */
-
-        /* Write fault? */
-        if ( regs->error_code & 2 ) {
-            if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) {
+    if ( unlikely((guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) 
+        goto pse;
+
+    /*
+     * Handle 4K pages here
+     */
+check_writeable:
+    orig_gl1e = gl1e;
+    
+    /* Write fault? */
+    if ( regs->error_code & 2 ) 
+    {
+        int allow_writes = 0;
+
+        if ( unlikely(!(guest_l1e_get_flags(gl1e) & _PAGE_RW)) )
+        {
+            if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gl1e)) )
+            {
+                allow_writes = 1;
+                l1e_add_flags(gl1e, _PAGE_RW);
+            }
+            else
+            {
+                /* Write fault on a read-only mapping. */
+                SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte 
")", 
+                         l1e_get_intpte(gl1e));
+                perfc_incrc(shadow_fault_bail_ro_mapping);
                 goto fail;
             }
-        } else {
-            l1pte_read_fault(d, &gl1e, &sl1e);
-        }
-        /*
-         * STEP 3. Write guest/shadow l2e back
-         */
-        if (unlikely(!__guest_set_l1e(v, va, &gl1e))) {
+        }
+
+        if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) 
+        {
+            SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
+            perfc_incrc(write_fault_bail);
+            shadow_unlock(d);
+            return 0;
+        }
+ 
+        if (allow_writes)
+            l1e_remove_flags(gl1e, _PAGE_RW);
+    }
+    else 
+    {
+        if ( !l1pte_read_fault(d, &gl1e, &sl1e) )
+        {
+            SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
+            perfc_incrc(read_fault_bail);
+            shadow_unlock(d);
+            return 0;
+        }
+    }
+
+    /*
+     * STEP 3. Write the modified shadow PTE and guest PTE back to the tables
+     */
+    if ( l1e_has_changed(orig_gl1e, gl1e, PAGE_FLAG_MASK) )
+    {
+        if (unlikely(!__guest_set_l1e(v, va, &gl1e))) 
             domain_crash_synchronous();
-        }
-
-        ESH_LOG("gl1e: %lx, sl1e: %lx\n", l1e_get_intpte(gl1e), 
l1e_get_intpte(sl1e));
-        shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1);
-        /*
-         *  if necessary, record the page table page as dirty
-         */
-         if ( unlikely(shadow_mode_log_dirty(d)) )
+
+        // if necessary, record the page table page as dirty
+        if ( unlikely(shadow_mode_log_dirty(d)) )
             __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gl2e)));
-
-    } else {
-        /*
-         * Handle 2M pages here
-         */
-        /* Write fault? */
-        if ( regs->error_code & 2 ) {
-            if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) {
-                goto fail;
-            }
-        } else {
-            l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT);
-        }
-
-        /*
-         * STEP 3. Write guest/shadow l2e back
-         */
-
-        if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) {
-            domain_crash_synchronous();
-        }
-
-        /*
-         * Todo: if necessary, record the page table page as dirty
-         */
-
-
-    }
+    }
+
+    shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1);
+
+    perfc_incrc(shadow_fault_fixed);
+    d->arch.shadow_fault_count++;
+
+    shadow_unlock(d);
+
+    return EXCRET_fault_fixed;
+
+pse:
+    /*
+     * Handle 2M pages here
+     */
+    if ( unlikely(!shadow_mode_external(d)) )
+        BUG();
+
+    /* Write fault? */
+    if ( regs->error_code & 2 ) 
+    {
+        if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) 
+        {
+            goto fail;
+        }
+    } 
+    else 
+    {
+        l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT);
+    }
+
+    /*
+     * STEP 3. Write guest/shadow l2e back
+     */
+
+    if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) 
+    {
+        domain_crash_synchronous();
+    }
+
+    /*
+     * Todo: if necessary, record the page table page as dirty
+     */
 
     perfc_incrc(shadow_fault_fixed);
     d->arch.shadow_fault_count++;
@@ -3257,6 +3519,7 @@
     shadow_unlock(d);
 }
 
+#if CONFIG_PAGING_LEVELS == 4
 static unsigned long gva_to_gpa_64(unsigned long gva)
 {
     struct vcpu *v = current;
@@ -3273,13 +3536,11 @@
         gpa = guest_l1e_get_paddr(gl1e) + (gva & ~PAGE_MASK);
 
     return gpa;
-
 }
 
 #ifndef GUEST_PGENTRY_32
-
 struct shadow_ops MODE_F_HANDLER = {
-    .guest_paging_levels              = 4,
+    .guest_paging_levels        = 4,
     .invlpg                     = shadow_invlpg_64,
     .fault                      = shadow_fault_64,
     .update_pagetables          = shadow_update_pagetables,
@@ -3290,9 +3551,11 @@
     .is_out_of_sync             = is_out_of_sync,
     .gva_to_gpa                 = gva_to_gpa_64,
 };
-#endif
-
-#endif
+#endif /* GUEST_PGENTRY_32 */
+#endif /* CONFIG_PAGING_LEVELS == 4 */
+
+#endif /* CONFIG_PAGING_LEVELS >= 3 */
+
 
 #if CONFIG_PAGING_LEVELS == 2
 struct shadow_ops MODE_A_HANDLER = {
@@ -3309,10 +3572,11 @@
 };
 
 #elif CONFIG_PAGING_LEVELS == 3
+
 struct shadow_ops MODE_B_HANDLER = {
-    .guest_paging_levels              = 3,
-    .invlpg                     = shadow_invlpg_32,
-    .fault                      = shadow_fault_32,
+    .guest_paging_levels        = 3,
+    .invlpg                     = shadow_invlpg_64,
+    .fault                      = shadow_fault_64,
     .update_pagetables          = shadow_update_pagetables,
     .sync_all                   = sync_all,
     .remove_all_write_access    = remove_all_write_access,
diff -r e023e37b3c7a -r 995e94c4802e xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Fri Nov 11 18:02:49 2005
+++ b/xen/arch/x86/shadow32.c   Fri Nov 11 18:11:13 2005
@@ -31,6 +31,8 @@
 #include <xen/trace.h>
 
 #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
+#define va_to_l1mfn(_ed, _va) \
+    (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
 
 static void shadow_free_snapshot(struct domain *d,
                                  struct out_of_sync_entry *entry);
diff -r e023e37b3c7a -r 995e94c4802e xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Fri Nov 11 18:02:49 2005
+++ b/xen/arch/x86/shadow_public.c      Fri Nov 11 18:11:13 2005
@@ -64,6 +64,9 @@
 #if CONFIG_PAGING_LEVELS == 2
         if ( d->arch.ops != &MODE_A_HANDLER )
             d->arch.ops = &MODE_A_HANDLER;
+#elif CONFIG_PAGING_LEVELS == 3
+        if ( d->arch.ops != &MODE_B_HANDLER )
+            d->arch.ops = &MODE_B_HANDLER;
 #elif CONFIG_PAGING_LEVELS == 4
         if ( d->arch.ops != &MODE_D_HANDLER )
             d->arch.ops = &MODE_D_HANDLER;
@@ -138,7 +141,92 @@
 }
 /****************************************************************************/
 /****************************************************************************/
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
+
+static void inline
+free_shadow_fl1_table(struct domain *d, unsigned long smfn)
+{
+    l1_pgentry_t *pl1e = map_domain_page(smfn);
+    int i;
+
+    for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
+        put_page_from_l1e(pl1e[i], d);
+}
+
+/*
+ * Free l2, l3, l4 shadow tables
+ */
+
+void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
+
+static void inline
+free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
+{
+    pgentry_64_t *ple = map_domain_page(smfn);
+    int i, external = shadow_mode_external(d);
+
+#if CONFIG_PAGING_LEVELS >=3
+    if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
+    {
+        struct pfn_info *page = &frame_table[smfn];
+        for ( i = 0; i < PDP_ENTRIES; i++ )
+        {
+            if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
+                free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
+        }
+
+        page = &frame_table[entry_get_pfn(ple[0])];
+        free_domheap_pages(page, SL2_ORDER);
+        unmap_domain_page(ple);
+    }
+    else
+#endif
+    {
+        /*
+         * No Xen mappings in external pages
+         */
+        if ( external )
+        {
+            for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
+                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
+                    put_shadow_ref(entry_get_pfn(ple[i]));
+        } 
+        else
+        {
+            for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
+            {
+                /* 
+                 * List the skip/break conditions to avoid freeing
+                 * Xen private mappings.
+                 */
+#if CONFIG_PAGING_LEVELS == 2
+                if ( level == PAGING_L2 && !is_guest_l2_slot(0, i) )
+                    continue;
+#endif
+#if CONFIG_PAGING_LEVELS == 3
+                if ( level == PAGING_L3 && i == L3_PAGETABLE_ENTRIES )
+                    break;
+                if ( level == PAGING_L2 )
+                {
+                    struct pfn_info *page = &frame_table[smfn]; 
+                    if ( is_xen_l2_slot(page->u.inuse.type_info, i) )
+                        continue;
+                }
+#endif
+#if CONFIG_PAGING_LEVELS == 4
+                if ( level == PAGING_L4 && !is_guest_l4_slot(i))
+                    continue;
+#endif
+                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
+                    put_shadow_ref(entry_get_pfn(ple[i]));
+            }
+        }
+        unmap_domain_page(ple);
+    }
+}
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
 /*
  * Convert PAE 3-level page-table to 4-level page-table
  */
@@ -203,55 +291,6 @@
     v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
 }
 
-static void inline
-free_shadow_fl1_table(struct domain *d, unsigned long smfn)
-{
-    l1_pgentry_t *pl1e = map_domain_page(smfn);
-    int i;
-
-    for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
-        put_page_from_l1e(pl1e[i], d);
-}
-
-/*
- * Free l2, l3, l4 shadow tables
- */
-
-void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
-
-static void inline
-free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
-{
-    pgentry_64_t *ple = map_domain_page(smfn);
-    int i, external = shadow_mode_external(d);
-    struct pfn_info *page = &frame_table[smfn];
-
-    if (d->arch.ops->guest_paging_levels == PAGING_L2)
-    {
-#if CONFIG_PAGING_LEVELS >=4
-        for ( i = 0; i < PDP_ENTRIES; i++ )
-        {
-            if (entry_get_flags(ple[i]) & _PAGE_PRESENT )
-                free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
-        }
-   
-        page = &frame_table[entry_get_pfn(ple[0])];
-        free_domheap_pages(page, SL2_ORDER);
-        unmap_domain_page(ple);
-#endif
-    }
-    else
-    {
-        for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
-            if ( external || is_guest_l4_slot(i) )
-                if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
-                    put_shadow_ref(entry_get_pfn(ple[i]));
-
-        unmap_domain_page(ple);
-    }
-}
-
-
 void free_monitor_pagetable(struct vcpu *v)
 {
     unsigned long mfn;
@@ -299,11 +338,9 @@
     mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
     memset(mpl2e, 0, PAGE_SIZE);
 
-#ifdef __i386__ /* XXX screws x86/64 build */
     memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
            &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
            HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
-#endif
 
     mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
         l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
@@ -333,7 +370,7 @@
     unsigned long mfn;
 
     ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
-    
+
     mpl2e = v->arch.monitor_vtable;
 
     /*
@@ -517,13 +554,11 @@
 
     SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
 
-#ifdef __i386__
+#if CONFIG_PAGING_LEVELS == 2
     if ( shadow_mode_external(d) )
         limit = L2_PAGETABLE_ENTRIES;
     else
         limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
-#else
-    limit = 0; /* XXX x86/64 XXX */
 #endif
 
     for ( i = 0; i < limit; i++ )
@@ -584,10 +619,11 @@
 
     ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
 #if CONFIG_PAGING_LEVELS >=4
-    if (type == PGT_fl1_shadow) {
+    if ( type == PGT_fl1_shadow ) 
+    {
         unsigned long mfn;
         mfn = __shadow_status(d, gpfn, PGT_fl1_shadow);
-        if (!mfn)
+        if ( !mfn )
             gpfn |= (1UL << 63);
     }
 #endif
@@ -602,7 +638,7 @@
         free_shadow_l1_table(d, smfn);
         d->arch.shadow_page_count--;
         break;
-#if defined (__i386__)
+#if CONFIG_PAGING_LEVELS == 2
     case PGT_l2_shadow:
         perfc_decr(shadow_l2_pages);
         shadow_demote(d, gpfn, gmfn);
@@ -616,7 +652,8 @@
         free_shadow_hl2_table(d, smfn);
         d->arch.hl2_page_count--;
         break;
-#else
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
     case PGT_l2_shadow:
     case PGT_l3_shadow:
     case PGT_l4_shadow:
@@ -630,7 +667,6 @@
         d->arch.shadow_page_count--;
         break;
 #endif
-
     case PGT_snapshot:
         perfc_decr(apshot_pages);
         break;
@@ -782,7 +818,7 @@
         }
     }
 
-#if defined (__i386__)
+#if CONFIG_PAGING_LEVELS == 2
     // For external shadows, remove the monitor table's refs
     //
     if ( shadow_mode_external(d) )
@@ -928,7 +964,7 @@
     ASSERT(!(d->arch.shadow_mode & ~mode));
 
 #if defined(CONFIG_PAGING_LEVELS)
-    if(!shadow_set_guest_paging_levels(d, 
+    if(!shadow_set_guest_paging_levels(d,
                                        CONFIG_PAGING_LEVELS)) {
         printk("Unsupported guest paging levels\n");
         domain_crash_synchronous(); /* need to take a clean path */
@@ -968,7 +1004,7 @@
         else
             v->arch.shadow_vtable = NULL;
         
-#if defined (__i386__)
+#if CONFIG_PAGING_LEVELS == 2
         /*
          * arch.hl2_vtable
          */
@@ -1408,7 +1444,7 @@
     sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
     if ( sl1mfn )
     {
-        SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
+        SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpde=%" PRIpte,
                  (void *)pa, l1e_get_intpte(gpte));
         l1pte_propagate_from_guest(current->domain, gpte, &spte);
 
@@ -1447,7 +1483,7 @@
 #if CONFIG_PAGING_LEVELS >= 3
 void shadow_l3_normal_pt_update(
     struct domain *d,
-    unsigned long pa, l3_pgentry_t gpde,
+    unsigned long pa, l3_pgentry_t l3e,
     struct domain_mmap_cache *cache)
 {
     unsigned long sl3mfn;
@@ -1458,11 +1494,10 @@
     sl3mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l3_shadow);
     if ( sl3mfn )
     {
-        SH_VVLOG("shadow_l3_normal_pt_update pa=%p, gpde=%" PRIpte,
-                 (void *)pa, l3e_get_intpte(gpde));
-
+        SH_VVLOG("shadow_l3_normal_pt_update pa=%p, l3e=%" PRIpte,
+                 (void *)pa, l3e_get_intpte(l3e));
         spl3e = (pgentry_64_t *) map_domain_page_with_cache(sl3mfn, cache);
-        validate_entry_change(d, (pgentry_64_t *) &gpde,
+        validate_entry_change(d, (pgentry_64_t *) &l3e,
                               &spl3e[(pa & ~PAGE_MASK) / 
sizeof(l3_pgentry_t)], 
                               shadow_type_to_level(PGT_l3_shadow));
         unmap_domain_page_with_cache(spl3e, cache);
@@ -1475,7 +1510,7 @@
 #if CONFIG_PAGING_LEVELS >= 4
 void shadow_l4_normal_pt_update(
     struct domain *d,
-    unsigned long pa, l4_pgentry_t gpde,
+    unsigned long pa, l4_pgentry_t l4e,
     struct domain_mmap_cache *cache)
 {
     unsigned long sl4mfn;
@@ -1486,11 +1521,10 @@
     sl4mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l4_shadow);
     if ( sl4mfn )
     {
-        SH_VVLOG("shadow_l4_normal_pt_update pa=%p, gpde=%" PRIpte,
-                 (void *)pa, l4e_get_intpte(gpde));
-
+        SH_VVLOG("shadow_l4_normal_pt_update pa=%p, l4e=%" PRIpte,
+                 (void *)pa, l4e_get_intpte(l4e));
         spl4e = (pgentry_64_t *)map_domain_page_with_cache(sl4mfn, cache);
-        validate_entry_change(d, (pgentry_64_t *)&gpde,
+        validate_entry_change(d, (pgentry_64_t *)&l4e,
                               &spl4e[(pa & ~PAGE_MASK) / 
sizeof(l4_pgentry_t)], 
                               shadow_type_to_level(PGT_l4_shadow));
         unmap_domain_page_with_cache(spl4e, cache);
@@ -1554,8 +1588,6 @@
 remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
 {
     unsigned long smfn;
-
-    //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
 
     shadow_lock(d);
 
diff -r e023e37b3c7a -r 995e94c4802e xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Fri Nov 11 18:02:49 2005
+++ b/xen/include/asm-x86/page.h        Fri Nov 11 18:11:13 2005
@@ -232,9 +232,6 @@
 #define linear_l3_table(_ed) ((_ed)->arch.guest_vl3table)
 #define linear_l4_table(_ed) ((_ed)->arch.guest_vl4table)
 
-#define va_to_l1mfn(_ed, _va) \
-    (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
-
 #ifndef __ASSEMBLY__
 #if CONFIG_PAGING_LEVELS == 3
 extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
diff -r e023e37b3c7a -r 995e94c4802e xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Fri Nov 11 18:02:49 2005
+++ b/xen/include/asm-x86/shadow.h      Fri Nov 11 18:11:13 2005
@@ -138,6 +138,14 @@
                                        struct domain_mmap_cache *cache);
 #if CONFIG_PAGING_LEVELS >= 3
 #include <asm/page-guest32.h>
+/*
+ * va_mask cannot be used because it's used by the shadow hash.
+ * Use the score area for for now.
+ */
+#define is_xen_l2_slot(t,s)                                                    
\
+    ( ((((t) & PGT_score_mask) >> PGT_score_shift) == 3) &&                    
\
+      ((s) >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
+
 extern unsigned long gva_to_gpa(unsigned long gva);
 extern void shadow_l3_normal_pt_update(struct domain *d,
                                        unsigned long pa, l3_pgentry_t l3e,
@@ -458,7 +466,7 @@
 
 /************************************************************************/
 
-static inline int __mark_dirty(struct domain *d, unsigned int mfn)
+static inline int __mark_dirty(struct domain *d, unsigned long mfn)
 {
     unsigned long pfn;
     int           rc = 0;
@@ -906,7 +914,7 @@
         guest_l2e_add_flags(gpde, _PAGE_ACCESSED);
 
         *gpde_p = gpde;
-    }
+    } 
 
     if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) )
         SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__,
@@ -1355,7 +1363,7 @@
 }
 
 
-static inline void delete_shadow_status( 
+static inline void delete_shadow_status(
     struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int 
stype)
 {
     struct shadow_status *p, *x, *n, *head;
@@ -1454,7 +1462,7 @@
     ASSERT(stype && !(stype & ~PGT_type_mask));
 
     x = head = hash_bucket(d, gpfn);
-   
+
     SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)",
              gpfn, smfn, stype, x, x->next);
     shadow_audit(d, 0);
@@ -1584,7 +1592,7 @@
 {
     struct vcpu *v = current;
     struct domain *d = v->domain;
-    l2_pgentry_t sl2e;
+    l2_pgentry_t sl2e = {0};
 
     __shadow_get_l2e(v, va, &sl2e);
     if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
@@ -1731,7 +1739,7 @@
 #ifdef CONFIG_VMX
     if ( VMX_DOMAIN(v) )
         paging_enabled = vmx_paging_enabled(v);
-            
+
     else
 #endif
         // HACK ALERT: there's currently no easy way to figure out if a domU
@@ -1757,7 +1765,7 @@
         if ( shadow_mode_enabled(d) )
             v->arch.monitor_table = v->arch.shadow_table;
         else
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS == 4
         if ( !(v->arch.flags & TF_kernel_mode) )
             v->arch.monitor_table = v->arch.guest_table_user;
         else
diff -r e023e37b3c7a -r 995e94c4802e xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h   Fri Nov 11 18:02:49 2005
+++ b/xen/include/asm-x86/shadow_64.h   Fri Nov 11 18:11:13 2005
@@ -29,6 +29,15 @@
 #include <asm/shadow.h>
 #include <asm/shadow_ops.h>
 
+extern struct shadow_ops MODE_B_HANDLER;
+
+#if CONFIG_PAGING_LEVELS == 3
+#define L4_PAGETABLE_SHIFT      39
+#define L4_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
+typedef struct { intpte_t l4; } l4_pgentry_t;
+#define is_guest_l4_slot(_s) (1)
+#endif
+
 #define READ_FAULT  0
 #define WRITE_FAULT 1
 
@@ -94,6 +103,11 @@
             return  (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 
1));
         case 3:
             return  (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 
1));
+#if CONFIG_PAGING_LEVELS == 3
+        case 4:
+            return PAE_SHADOW_SELF_ENTRY;
+#endif
+
 #if CONFIG_PAGING_LEVELS >= 4
 #ifndef GUEST_PGENTRY_32
         case 4:
@@ -127,57 +141,73 @@
     }
 }
 
-static inline pgentry_64_t *__entry(
-    struct vcpu *v, u64 va, u32 flag)
+static inline int __entry(
+    struct vcpu *v, u64 va, pgentry_64_t *e_p, u32 flag)
 {
     int i;
     pgentry_64_t *le_e;
-    pgentry_64_t *le_p;
+    pgentry_64_t *le_p = NULL;
     unsigned long mfn;
     int index;
     u32 level = flag & L_MASK;
     struct domain *d = v->domain;
-
-    index = table_offset_64(va, ROOT_LEVEL_64);
-    if (flag & SHADOW_ENTRY)
+    int root_level;
+
+    if ( flag & SHADOW_ENTRY )
+    {
+       root_level =  ROOT_LEVEL_64;
+       index = table_offset_64(va, root_level);
         le_e = (pgentry_64_t *)&v->arch.shadow_vtable[index];
-    else
+    }
+    else /* guest entry */  
+    {
+        root_level = v->domain->arch.ops->guest_paging_levels;
+       index = table_offset_64(va, root_level);
         le_e = (pgentry_64_t *)&v->arch.guest_vtable[index];
-
+    }
     /*
      * If it's not external mode, then mfn should be machine physical.
      */
-    for (i = ROOT_LEVEL_64 - level; i > 0; i--) {
-        if (unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT)))
-            return NULL;
-        mfn = entry_get_value(*le_e) >> PAGE_SHIFT;
-        if ((flag & GUEST_ENTRY) && shadow_mode_translate(d))
+    for (i = root_level - level; i > 0; i--) {
+        if ( unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT)) ) {
+            if ( le_p )
+                unmap_domain_page(le_p);
+            return 0;
+        }
+        mfn = entry_get_pfn(*le_e);
+        if ( (flag & GUEST_ENTRY) && shadow_mode_translate(d) )
             mfn = get_mfn_from_pfn(mfn);
-        le_p = (pgentry_64_t *)phys_to_virt(mfn << PAGE_SHIFT);
+        if ( le_p )
+            unmap_domain_page(le_p);
+        le_p = (pgentry_64_t *)map_domain_page(mfn);
         index = table_offset_64(va, (level + i - 1));
         le_e = &le_p[index];
-
-    }
-    return le_e;
-
-}
-
-static inline pgentry_64_t *__rw_entry(
-    struct vcpu *ed, u64 va, void *e_p, u32 flag)
-{
-    pgentry_64_t *le_e = __entry(ed, va, flag);
+    }
+
+    if ( flag & SET_ENTRY )
+        *le_e = *e_p;
+    else
+        *e_p = *le_e;
+
+    if ( le_p )
+        unmap_domain_page(le_p);
+
+    return 1;
+
+}
+
+static inline int __rw_entry(
+    struct vcpu *v, u64 va, void *e_p, u32 flag)
+{
     pgentry_64_t *e = (pgentry_64_t *)e_p;
-    if (le_e == NULL)
-        return NULL;
 
     if (e) {
-        if (flag & SET_ENTRY)
-            *le_e = *e;
-        else
-            *e = *le_e;
-    }
-    return le_e;
-}
+        return __entry(v, va, e, flag);
+    }
+
+    return 0;
+}
+
 #define __shadow_set_l4e(v, va, value) \
   __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L4)
 #define __shadow_get_l4e(v, va, sl4e) \
@@ -204,7 +234,7 @@
 #define __guest_get_l3e(v, va, sl3e) \
   __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3)
 
-static inline void *  __guest_set_l2e(
+static inline int  __guest_set_l2e(
     struct vcpu *v, u64 va, void *value, int size)
 {
     switch(size) {
@@ -216,21 +246,21 @@
                 l2va = (l2_pgentry_32_t *)v->arch.guest_vtable;
                 if (value)
                     l2va[l2_table_offset_32(va)] = *(l2_pgentry_32_t *)value;
-                return &l2va[l2_table_offset_32(va)];
+                return 1;
             }
         case 8:
             return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | 
PAGING_L2);
         default:
             BUG();
-            return NULL;
-    }
-    return NULL;
+            return 0;
+    }
+    return 0;
 }
 
 #define __guest_set_l2e(v, va, value) \
-  ( __typeof__(value) )__guest_set_l2e(v, (u64)va, value, sizeof(*value))
-
-static inline void * __guest_get_l2e(
+    __guest_set_l2e(v, (u64)va, value, sizeof(*value))
+
+static inline int  __guest_get_l2e(
   struct vcpu *v, u64 va, void *gl2e, int size)
 {
     switch(size) {
@@ -241,21 +271,21 @@
                 l2va = (l2_pgentry_32_t *)v->arch.guest_vtable;
                 if (gl2e)
                     *(l2_pgentry_32_t *)gl2e = l2va[l2_table_offset_32(va)];
-                return &l2va[l2_table_offset_32(va)];
+                return 1;
             }
         case 8:
             return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | 
PAGING_L2);
         default:
             BUG();
-            return NULL;
-    }
-    return NULL;
+            return 0;
+    }
+    return 0;
 }
 
 #define __guest_get_l2e(v, va, gl2e) \
-  (__typeof__ (gl2e))__guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e))
-
-static inline void *  __guest_set_l1e(
+    __guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e))
+
+static inline int  __guest_set_l1e(
   struct vcpu *v, u64 va, void *value, int size)
 {
     switch(size) {
@@ -267,34 +297,34 @@
                 unsigned long l1mfn;
 
                 if (!__guest_get_l2e(v, va, &gl2e))
-                    return NULL;
+                    return 0;
                 if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT)))
-                    return NULL;
+                    return 0;
 
                 l1mfn = get_mfn_from_pfn(
                   l2e_get_pfn(gl2e));
 
-                l1va = (l1_pgentry_32_t *)
-                  phys_to_virt(l1mfn << L1_PAGETABLE_SHIFT);
+                l1va = (l1_pgentry_32_t *)map_domain_page(l1mfn);
                 if (value)
                     l1va[l1_table_offset_32(va)] = *(l1_pgentry_32_t *)value;
-
-                return &l1va[l1_table_offset_32(va)];
+                unmap_domain_page(l1va);
+
+                return 1;
             }
 
         case 8:
             return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | 
PAGING_L1);
         default:
             BUG();
-            return NULL;
-    }
-    return NULL;
+            return 0;
+    }
+    return 0;
 }
 
 #define __guest_set_l1e(v, va, value) \
-  ( __typeof__(value) )__guest_set_l1e(v, (u64)va, value, sizeof(*value))
-
-static inline void *  __guest_get_l1e(
+     __guest_set_l1e(v, (u64)va, value, sizeof(*value))
+
+static inline int  __guest_get_l1e(
   struct vcpu *v, u64 va, void *gl1e, int size)
 {
     switch(size) {
@@ -306,34 +336,33 @@
                 unsigned long l1mfn;
 
                 if (!(__guest_get_l2e(v, va, &gl2e)))
-                    return NULL;
+                    return 0;
 
 
                 if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT)))
-                    return NULL;
+                    return 0;
 
 
                 l1mfn = get_mfn_from_pfn(
                   l2e_get_pfn(gl2e));
-                l1va = (l1_pgentry_32_t *) phys_to_virt(
-                  l1mfn << L1_PAGETABLE_SHIFT);
+                l1va = (l1_pgentry_32_t *) map_domain_page(l1mfn);
                 if (gl1e)
                     *(l1_pgentry_32_t *)gl1e = l1va[l1_table_offset_32(va)];
-
-                return &l1va[l1_table_offset_32(va)];
+                unmap_domain_page(l1va);
+                return 1;
             }
         case 8:
             // 64-bit guest
             return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | 
PAGING_L1);
         default:
             BUG();
-            return NULL;
-    }
-    return NULL;
+            return 0;
+    }
+    return 0;
 }
 
 #define __guest_get_l1e(v, va, gl1e) \
-  ( __typeof__(gl1e) )__guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e))
+    __guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e))
 
 static inline void entry_general(
   struct domain *d,
@@ -365,10 +394,16 @@
                 unmap_domain_page(l1_p);
             }
         } else {
-            sle = entry_from_pfn(
-                smfn,
-                (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & 
~_PAGE_AVAIL);
-            entry_add_flags(gle, _PAGE_ACCESSED);
+            if (d->arch.ops->guest_paging_levels <= PAGING_L3
+                    && level == PAGING_L3) {
+                sle = entry_from_pfn(smfn, entry_get_flags(gle));
+            } else {
+
+                sle = entry_from_pfn(
+                  smfn,
+                  (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & 
~_PAGE_AVAIL);
+                entry_add_flags(gle, _PAGE_ACCESSED);
+            }
         }
         // XXX mafetter: Hmm...
         //     Shouldn't the dirty log be checked/updated here?
@@ -392,7 +427,7 @@
 
     if ( entry_get_flags(gle) & _PAGE_PRESENT ) {
         if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) {
-            smfn =  __shadow_status(d, entry_get_value(gle) >> PAGE_SHIFT, 
PGT_fl1_shadow);
+            smfn =  __shadow_status(d, entry_get_pfn(gle), PGT_fl1_shadow);
         } else {
             smfn =  __shadow_status(d, entry_get_pfn(gle), 
               shadow_level_to_type((level -1 )));

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.