[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Merge



# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Node ID 5c029fda79dca05de04b68a54a827f874aae087a
# Parent  bd207697f0c7b3cce4593073435506f6d5b58ef8
# Parent  87fc080f555b5ddc7041bb5d5703de388cb5efa4
Merge
---
 xen/arch/x86/hvm/svm/svm.c       |   17 
 xen/arch/x86/hvm/svm/vmcb.c      |    9 
 xen/arch/x86/mm/shadow/common.c  |  189 ++++----
 xen/arch/x86/mm/shadow/multi.c   |  843 ++++++++++-----------------------------
 xen/arch/x86/mm/shadow/multi.h   |    4 
 xen/arch/x86/mm/shadow/private.h |   75 ---
 xen/arch/x86/mm/shadow/types.h   |  123 -----
 xen/include/asm-x86/domain.h     |   17 
 xen/include/asm-x86/hvm/vcpu.h   |    5 
 xen/include/asm-x86/mm.h         |   17 
 xen/include/asm-x86/perfc_defn.h |    1 
 xen/include/asm-x86/shadow.h     |    7 
 12 files changed, 364 insertions(+), 943 deletions(-)

diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Wed Oct 18 14:36:20 2006 +0100
@@ -1739,9 +1739,6 @@ static int mov_to_cr(int gpreg, int cr, 
             if (old_base_mfn)
                 put_page(mfn_to_page(old_base_mfn));
 
-            /*
-             * arch.shadow_table should now hold the next CR3 for shadow
-             */
             v->arch.hvm_svm.cpu_cr3 = value;
             update_cr3(v);
             vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
@@ -1787,10 +1784,6 @@ static int mov_to_cr(int gpreg, int cr, 
                             (unsigned long) (mfn << PAGE_SHIFT));
 
                 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
-
-                /*
-                 * arch->shadow_table should hold the next CR3 for shadow
-                 */
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, 
                             "Update CR3 value = %lx, mfn = %lx",
@@ -2355,7 +2348,7 @@ void svm_dump_regs(const char *from, str
 {
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
+    unsigned long pt = v->arch.hvm_vcpu.hw_cr3;
 
     printf("%s: guest registers from %s:\n", __func__, from);
 #if defined (__x86_64__)
@@ -2681,11 +2674,11 @@ asmlinkage void svm_vmexit_handler(struc
         if (do_debug)
         {
             printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
-                   "shadow_table = 0x%08x\n", 
+                   "hw_cr3 = 0x%16lx\n", 
                    __func__,
                    (int) v->arch.guest_table.pfn,
                    (int) v->arch.monitor_table.pfn, 
-                   (int) v->arch.shadow_table.pfn);
+                   (long unsigned int) v->arch.hvm_vcpu.hw_cr3);
 
             svm_dump_vmcb(__func__, vmcb);
             svm_dump_regs(__func__, regs);
@@ -2913,10 +2906,10 @@ asmlinkage void svm_vmexit_handler(struc
     if (do_debug) 
     {
         printk("vmexit_handler():- guest_table = 0x%08x, "
-               "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
+               "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n",
                (int)v->arch.guest_table.pfn,
                (int)v->arch.monitor_table.pfn, 
-               (int)v->arch.shadow_table.pfn);
+               (int)v->arch.hvm_vcpu.hw_cr3);
         printk("svm_vmexit_handler: Returning\n");
     }
 #endif
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Wed Oct 18 14:36:20 2006 +0100
@@ -372,8 +372,8 @@ void svm_do_launch(struct vcpu *v)
     if (svm_dbg_on) 
     {
         unsigned long pt;
-        pt = pagetable_get_paddr(v->arch.shadow_table);
-        printk("%s: shadow_table = %lx\n", __func__, pt);
+        printk("%s: hw_cr3 = %llx\n", __func__, 
+               (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
         pt = pagetable_get_paddr(v->arch.guest_table);
         printk("%s: guest_table  = %lx\n", __func__, pt);
         pt = pagetable_get_paddr(v->domain->arch.phys_table);
@@ -387,8 +387,9 @@ void svm_do_launch(struct vcpu *v)
     {
         printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3);
         printk("init_guest_table: guest_table = 0x%08x, monitor_table = 
0x%08x,"
-                " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn, 
-                (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn);
+                " hw_cr3 = 0x%16llx\n", (int)v->arch.guest_table.pfn, 
+               (int)v->arch.monitor_table.pfn, 
+               (unsigned long long) v->arch.hvm_vcpu.hw_cr3);
     }
 
     v->arch.schedule_tail = arch_svm_do_resume;
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Wed Oct 18 14:36:20 2006 +0100
@@ -283,11 +283,8 @@ __shadow_validate_guest_entry(struct vcp
     if ( page->shadow_flags & SHF_L2H_PAE ) 
         result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
             (v, gmfn, entry, size);
-    if ( page->shadow_flags & SHF_L3_PAE ) 
-        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3)
-            (v, gmfn, entry, size);
 #else /* 32-bit non-PAE hypervisor does not support PAE guests */
-    ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
+    ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 4 
@@ -343,8 +340,11 @@ shadow_validate_guest_pt_write(struct vc
     if ( rc & SHADOW_SET_ERROR ) 
     {
         /* This page is probably not a pagetable any more: tear it out of the 
-         * shadows, along with any tables that reference it */
-        shadow_remove_all_shadows_and_parents(v, gmfn);
+         * shadows, along with any tables that reference it.  
+         * Since the validate call above will have made a "safe" (i.e. zero) 
+         * shadow entry, we can let the domain live even if we can't fully 
+         * unshadow the page. */
+        sh_remove_shadows(v, gmfn, 0, 0);
     }
 }
 
@@ -424,22 +424,16 @@ shadow_validate_guest_pt_write(struct vc
 /* Allocating shadow pages
  * -----------------------
  *
- * Most shadow pages are allocated singly, but there are two cases where we 
- * need to allocate multiple pages together.
- * 
- * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows.
- *    A 32-bit guest l1 table covers 4MB of virtuial address space,
- *    and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB
- *    of virtual address space each).  Similarly, a 32-bit guest l2 table 
- *    (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va 
- *    each).  These multi-page shadows are contiguous and aligned; 
- *    functions for handling offsets into them are defined in shadow.c 
- *    (shadow_l1_index() etc.)
+ * Most shadow pages are allocated singly, but there is one case where
+ * we need to allocate multiple pages together: shadowing 32-bit guest
+ * tables on PAE or 64-bit shadows.  A 32-bit guest l1 table covers 4MB
+ * of virtuial address space, and needs to be shadowed by two PAE/64-bit
+ * l1 tables (covering 2MB of virtual address space each).  Similarly, a
+ * 32-bit guest l2 table (4GB va) needs to be shadowed by four
+ * PAE/64-bit l2 tables (1GB va each).  These multi-page shadows are
+ * contiguous and aligned; functions for handling offsets into them are
+ * defined in shadow.c (shadow_l1_index() etc.)
  *    
- * 2: Shadowing PAE top-level pages.  Each guest page that contains
- *    any PAE top-level pages requires two shadow pages to shadow it.
- *    They contain alternating l3 tables and pae_l3_bookkeeping structs.
- *
  * This table shows the allocation behaviour of the different modes:
  *
  * Xen paging      32b  pae  pae  64b  64b  64b
@@ -449,7 +443,7 @@ shadow_validate_guest_pt_write(struct vc
  *
  * sl1 size         4k   8k   4k   8k   4k   4k
  * sl2 size         4k  16k   4k  16k   4k   4k
- * sl3 size         -    -    8k   -    8k   4k
+ * sl3 size         -    -    -    -    -    4k
  * sl4 size         -    -    -    -    -    4k
  *
  * We allocate memory from xen in four-page units and break them down
@@ -503,7 +497,6 @@ shadow_order(u32 shadow_type)
         0, /* PGC_SH_fl1_pae_shadow */
         0, /* PGC_SH_l2_pae_shadow  */
         0, /* PGC_SH_l2h_pae_shadow */
-        1, /* PGC_SH_l3_pae_shadow  */
         0, /* PGC_SH_l1_64_shadow   */
         0, /* PGC_SH_fl1_64_shadow  */
         0, /* PGC_SH_l2_64_shadow   */
@@ -546,7 +539,8 @@ void shadow_unhook_mappings(struct vcpu 
 #endif
         break;
 #if CONFIG_PAGING_LEVELS >= 3
-    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
         SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
         break;
 #endif
@@ -587,18 +581,8 @@ void shadow_prealloc(struct domain *d, u
         pg = list_entry(l, struct page_info, list);
         smfn = page_to_mfn(pg);
 
-#if CONFIG_PAGING_LEVELS >= 3
-        if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow )
-        {
-            /* For PAE, we need to unpin each subshadow on this shadow */
-            SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn);
-        } 
-        else 
-#endif /* 32-bit code always takes this branch */
-        {
-            /* Unpin this top-level shadow */
-            sh_unpin(v, smfn);
-        }
+        /* Unpin this top-level shadow */
+        sh_unpin(v, smfn);
 
         /* See if that freed up a chunk of appropriate size */
         if ( chunk_is_available(d, order) ) return;
@@ -620,8 +604,12 @@ void shadow_prealloc(struct domain *d, u
         shadow_unhook_mappings(v, smfn);
 
         /* Need to flush TLB if we've altered our own tables */
-        if ( !shadow_mode_external(d) 
-             && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) )
+        if ( !shadow_mode_external(d) &&
+             (pagetable_get_pfn(current->arch.shadow_table[0]) == mfn_x(smfn)
+              || pagetable_get_pfn(current->arch.shadow_table[1]) == 
mfn_x(smfn)
+              || pagetable_get_pfn(current->arch.shadow_table[2]) == 
mfn_x(smfn)
+              || pagetable_get_pfn(current->arch.shadow_table[3]) == 
mfn_x(smfn)
+                 ) )
             local_flush_tlb();
         
         /* See if that freed up a chunk of appropriate size */
@@ -732,6 +720,15 @@ void shadow_free(struct domain *d, mfn_t
 
     for ( i = 0; i < 1<<order; i++ ) 
     {
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+        struct vcpu *v;
+        for_each_vcpu(d, v) 
+        {
+            /* No longer safe to look for a writeable mapping in this shadow */
+            if ( v->arch.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) 
+                v->arch.shadow.last_writeable_pte_smfn = 0;
+        }
+#endif
         /* Strip out the type: this is now a free shadow page */
         pg[i].count_info = 0;
         /* Remember the TLB timestamp so we will know whether to flush 
@@ -920,9 +917,20 @@ p2m_next_level(struct domain *d, mfn_t *
 #if CONFIG_PAGING_LEVELS == 3
         if (type == PGT_l2_page_table)
         {
+            struct vcpu *v;
             /* We have written to the p2m l3: need to sync the per-vcpu
              * copies of it in the monitor tables */
             p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry);
+            /* Also, any vcpus running on shadows of the p2m need to 
+             * reload their CR3s so the change propagates to the shadow */
+            ASSERT(shadow_lock_is_acquired(d));
+            for_each_vcpu(d, v) 
+            {
+                if ( pagetable_get_pfn(v->arch.guest_table) 
+                     == pagetable_get_pfn(d->arch.phys_table) 
+                     && v->arch.shadow.mode != NULL )
+                    v->arch.shadow.mode->update_cr3(v);
+            }
         }
 #endif
         /* The P2M can be shadowed: keep the shadows synced */
@@ -1711,9 +1719,6 @@ void sh_destroy_shadow(struct vcpu *v, m
     case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
         SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
         break;
-    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
-        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn);
-        break;
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 4
@@ -1768,7 +1773,6 @@ int shadow_remove_write_access(struct vc
 #endif
         NULL, /* l2_pae  */
         NULL, /* l2h_pae */
-        NULL, /* l3_pae  */
 #if CONFIG_PAGING_LEVELS >= 4
         SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64   */
         SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64  */
@@ -1825,12 +1829,11 @@ int shadow_remove_write_access(struct vc
         unsigned long gfn;
         /* Heuristic: there is likely to be only one writeable mapping,
          * and that mapping is likely to be in the current pagetable,
-         * either in the guest's linear map (linux, windows) or in a
-         * magic slot used to map high memory regions (linux HIGHTPTE) */
+         * in the guest's linear map (on non-HIGHPTE linux and windows)*/
 
 #define GUESS(_a, _h) do {                                              \
-            if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) )          \
-                perfc_incrc(shadow_writeable_h_ ## _h);                \
+            if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) )      \
+                perfc_incrc(shadow_writeable_h_ ## _h);                 \
             if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
                 return 1;                                               \
         } while (0)
@@ -1880,9 +1883,35 @@ int shadow_remove_write_access(struct vc
 #endif /* CONFIG_PAGING_LEVELS >= 3 */
 
 #undef GUESS
-
-    }
-#endif
+    }
+
+    if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
+        return 1;
+
+    /* Second heuristic: on HIGHPTE linux, there are two particular PTEs
+     * (entries in the fixmap) where linux maps its pagetables.  Since
+     * we expect to hit them most of the time, we start the search for
+     * the writeable mapping by looking at the same MFN where the last
+     * brute-force search succeeded. */
+
+    if ( v->arch.shadow.last_writeable_pte_smfn != 0 )
+    {
+        unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
+        mfn_t last_smfn = _mfn(v->arch.shadow.last_writeable_pte_smfn);
+        int shtype = (mfn_to_page(last_smfn)->count_info & PGC_SH_type_mask) 
+            >> PGC_SH_type_shift;
+
+        if ( callbacks[shtype] ) 
+            callbacks[shtype](v, last_smfn, gmfn);
+
+        if ( (pg->u.inuse.type_info & PGT_count_mask) != old_count )
+            perfc_incrc(shadow_writeable_h_5);
+    }
+
+    if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
+        return 1;
+
+#endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
     
     /* Brute-force search of all the shadows, by walking the hash */
     perfc_incrc(shadow_writeable_bf);
@@ -1932,7 +1961,6 @@ int shadow_remove_all_mappings(struct vc
 #endif
         NULL, /* l2_pae  */
         NULL, /* l2h_pae */
-        NULL, /* l3_pae  */
 #if CONFIG_PAGING_LEVELS >= 4
         SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64   */
         SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64  */
@@ -2005,7 +2033,8 @@ static int sh_remove_shadow_via_pointer(
     ASSERT((pg->count_info & PGC_SH_type_mask) > 0);
     ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow);
     ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow);
-    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_pae_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2h_pae_shadow);
     ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow);
     
     if (pg->up == 0) return 0;
@@ -2034,7 +2063,6 @@ static int sh_remove_shadow_via_pointer(
     case PGC_SH_l1_pae_shadow:
     case PGC_SH_l2_pae_shadow:
     case PGC_SH_l2h_pae_shadow:
-    case PGC_SH_l3_pae_shadow:
         SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
         break;
 #if CONFIG_PAGING_LEVELS >= 4
@@ -2058,17 +2086,20 @@ static int sh_remove_shadow_via_pointer(
     return rc;
 }
 
-void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all)
+void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
 /* Remove the shadows of this guest page.  
- * If all != 0, find all shadows, if necessary by walking the tables.
- * Otherwise, just try the (much faster) heuristics, which will remove 
- * at most one reference to each shadow of the page. */
+ * If fast != 0, just try the quick heuristic, which will remove 
+ * at most one reference to each shadow of the page.  Otherwise, walk
+ * all the shadow tables looking for refs to shadows of this gmfn.
+ * If all != 0, kill the domain if we can't find all the shadows.
+ * (all != 0 implies fast == 0)
+ */
 {
     struct page_info *pg;
     mfn_t smfn;
     u32 sh_flags;
     unsigned char t;
-
+    
     /* Dispatch table for getting per-type functions: each level must
      * be called with the function to remove a lower-level shadow. */
     static hash_callback_t callbacks[16] = {
@@ -2085,11 +2116,9 @@ void sh_remove_shadows(struct vcpu *v, m
 #if CONFIG_PAGING_LEVELS >= 3
         SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae  */
         SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
-        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae  */
 #else 
         NULL, /* l2_pae  */
         NULL, /* l2h_pae */
-        NULL, /* l3_pae  */
 #endif
         NULL, /* l1_64   */
         NULL, /* fl1_64  */
@@ -2115,9 +2144,8 @@ void sh_remove_shadows(struct vcpu *v, m
         ((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift))
          | (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae  */
         0, /* fl1_pae */
-        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae  */
-        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae  */
-        0, /* l3_pae  */
+        0, /* l2_pae  */
+        0, /* l2h_pae  */
         1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64   */
         0, /* fl1_64  */
         1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64   */
@@ -2128,6 +2156,7 @@ void sh_remove_shadows(struct vcpu *v, m
     };
 
     ASSERT(shadow_lock_is_acquired(v->domain));
+    ASSERT(!(all && fast));
 
     pg = mfn_to_page(gmfn);
 
@@ -2147,29 +2176,26 @@ void sh_remove_shadows(struct vcpu *v, m
      * call will remove at most one shadow, and terminate immediately when
      * it does remove it, so we never walk the hash after doing a deletion.  */
 #define DO_UNSHADOW(_type) do {                                 \
-    t = (_type) >> PGC_SH_type_shift;                          \
-    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);              \
-    if ( !sh_remove_shadow_via_pointer(v, smfn) && all )       \
+    t = (_type) >> PGC_SH_type_shift;                           \
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);               \
+    if ( !sh_remove_shadow_via_pointer(v, smfn) && !fast )      \
         hash_foreach(v, masks[t], callbacks, smfn);             \
 } while (0)
 
     /* Top-level shadows need to be unpinned */
-#define DO_UNPIN(_type) do {                                             \
+#define DO_UNPIN(_type) do {                                            \
     t = (_type) >> PGC_SH_type_shift;                                   \
     smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);                       \
     if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned )                \
         sh_unpin(v, smfn);                                              \
-    if ( (_type) == PGC_SH_l3_pae_shadow )                              \
-        SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); \
 } while (0)
 
     if ( sh_flags & SHF_L1_32 )   DO_UNSHADOW(PGC_SH_l1_32_shadow);
     if ( sh_flags & SHF_L2_32 )   DO_UNPIN(PGC_SH_l2_32_shadow);
 #if CONFIG_PAGING_LEVELS >= 3
     if ( sh_flags & SHF_L1_PAE )  DO_UNSHADOW(PGC_SH_l1_pae_shadow);
-    if ( sh_flags & SHF_L2_PAE )  DO_UNSHADOW(PGC_SH_l2_pae_shadow);
-    if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow);
-    if ( sh_flags & SHF_L3_PAE )  DO_UNPIN(PGC_SH_l3_pae_shadow);
+    if ( sh_flags & SHF_L2_PAE )  DO_UNPIN(PGC_SH_l2_pae_shadow);
+    if ( sh_flags & SHF_L2H_PAE ) DO_UNPIN(PGC_SH_l2h_pae_shadow);
 #if CONFIG_PAGING_LEVELS >= 4
     if ( sh_flags & SHF_L1_64 )   DO_UNSHADOW(PGC_SH_l1_64_shadow);
     if ( sh_flags & SHF_L2_64 )   DO_UNSHADOW(PGC_SH_l2_64_shadow);
@@ -2181,20 +2207,14 @@ void sh_remove_shadows(struct vcpu *v, m
 #undef DO_UNSHADOW
 #undef DO_UNPIN
 
-
-#if CONFIG_PAGING_LEVELS > 2
-    /* We may have caused some PAE l3 entries to change: need to 
-     * fix up the copies of them in various places */
-    if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) )
-        sh_pae_recopy(v->domain);
-#endif
-
     /* If that didn't catch the shadows, something is wrong */
-    if ( all && (pg->count_info & PGC_page_table) )
-    {
-        SHADOW_ERROR("can't find all shadows of mfn %05lx 
(shadow_flags=%08x)\n",
+    if ( !fast && (pg->count_info & PGC_page_table) )
+    {
+        SHADOW_ERROR("can't find all shadows of mfn %05lx "
+                     "(shadow_flags=%08x)\n",
                       mfn_x(gmfn), pg->shadow_flags);
-        domain_crash(v->domain);
+        if ( all ) 
+            domain_crash(v->domain);
     }
 }
 
@@ -3118,7 +3138,6 @@ void shadow_audit_tables(struct vcpu *v)
         SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
         SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2_pae  */
         SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2h_pae */
-        SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3),  /* l3_pae  */
 #if CONFIG_PAGING_LEVELS >= 4
         SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4),  /* l1_64   */
         SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64  */
@@ -3143,7 +3162,7 @@ void shadow_audit_tables(struct vcpu *v)
         {
         case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
         case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
-                        |SHF_L2H_PAE|SHF_L3_PAE); break;
+                        |SHF_L2H_PAE); break;
         case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64  
                         |SHF_L3_64|SHF_L4_64); break;
         default: BUG();
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Wed Oct 18 14:36:20 2006 +0100
@@ -20,20 +20,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-
-// DESIGN QUESTIONS:
-// Why use subshadows for PAE guests?
-// - reduces pressure in the hash table
-// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
-// - would need to find space in the page_info to store 7 more bits of
-//   backpointer
-// - independent shadows of 32 byte chunks makes it non-obvious how to quickly
-//   figure out when to demote the guest page from l3 status
-//
-// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
-// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
-//   space for both PV and HVM guests.
-//
 
 #include <xen/config.h>
 #include <xen/types.h>
@@ -118,9 +104,6 @@ static char *fetch_type_names[] = {
 #endif
 
 /* XXX forward declarations */
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, 
int clear_res);
-#endif
 static inline void sh_update_linear_entries(struct vcpu *v);
 
 /**************************************************************************/
@@ -129,8 +112,6 @@ static inline void sh_update_linear_entr
  * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
  * FL1's:       maps the *gfn* of the start of a superpage to the mfn of a
  *              shadow L1 which maps its "splinters".
- * PAE CR3s:    maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
- *              PAE L3 info page for that CR3 value.
  */
 
 static inline mfn_t 
@@ -215,7 +196,6 @@ delete_fl1_shadow_status(struct vcpu *v,
 {
     SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
                    gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
-
     shadow_hash_delete(v, gfn_x(gfn),
                         PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
 }
@@ -429,18 +409,16 @@ static void sh_audit_gw(struct vcpu *v, 
     if ( !(SHADOW_AUDIT_ENABLE) )
         return;
 
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
     if ( valid_mfn(gw->l4mfn)
          && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, 
                                                 PGC_SH_l4_shadow))) )
         (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
-#endif /* PAE or 64... */
     if ( valid_mfn(gw->l3mfn)
          && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, 
                                                 PGC_SH_l3_shadow))) )
         (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
-#endif /* All levels... */
+#endif /* PAE or 64... */
     if ( valid_mfn(gw->l2mfn) )
     {
         if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
@@ -498,8 +476,7 @@ static u32 guest_set_ad_bits(struct vcpu
     flags = guest_l1e_get_flags(*ep);
 
     /* PAE l3s do not have A and D bits */
-    if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
-        return flags;
+    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
 
     /* Need the D bit as well for writes, in L1es and PSE L2es. */
     if ( ft == ft_demand_write  
@@ -646,37 +623,13 @@ shadow_l2_index(mfn_t *smfn, u32 guest_i
 #endif
 }
 
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
 
 static inline u32
 shadow_l3_index(mfn_t *smfn, u32 guest_index)
 {
-#if GUEST_PAGING_LEVELS == 3
-    u32 group_id;
-
-    // Because we use twice the space in L3 shadows as was consumed in guest
-    // L3s, the number of guest entries per shadow page is
-    // SHADOW_L2_PAGETABLE_ENTRIES/2.  (Note this is *not*
-    // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
-    //
-    *smfn = _mfn(mfn_x(*smfn) +
-                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
-
-    // We store PAE L3 shadows in groups of 4, alternating shadows and
-    // pae_l3_bookkeeping structs.  So the effective shadow index is
-    // the the group_id * 8 + the offset within the group.
-    //
-    guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
-    group_id = guest_index / 4;
-    return (group_id * 8) + (guest_index % 4);
-#else
     return guest_index;
-#endif
-}
-
-#endif // GUEST_PAGING_LEVELS >= 3
-
-#if GUEST_PAGING_LEVELS >= 4
+}
 
 static inline u32
 shadow_l4_index(mfn_t *smfn, u32 guest_index)
@@ -722,6 +675,9 @@ do {                                    
     u32 pass_thru_flags;
     u32 sflags;
 
+    /* We don't shadow PAE l3s */
+    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
+
     // XXX -- might want to think about PAT support for HVM guests...
 
 #ifndef NDEBUG
@@ -757,29 +713,16 @@ do {                                    
     if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
         gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
     
-    // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
-    //
-    if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
-        pass_thru_flags = _PAGE_PRESENT;
-    else
-    {
-        pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
-                           _PAGE_RW | _PAGE_PRESENT);
-        if ( guest_supports_nx(v) )
-            pass_thru_flags |= _PAGE_NX_BIT;
-    }
-
-    // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
-    // L3e's; they are all implied.  So we emulate them here.
-    //
-    if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
-        gflags = pass_thru_flags;
 
     // Propagate bits from the guest to the shadow.
     // Some of these may be overwritten, below.
     // Since we know the guest's PRESENT bit is set, we also set the shadow's
     // SHADOW_PRESENT bit.
     //
+    pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
+                       _PAGE_RW | _PAGE_PRESENT);
+    if ( guest_supports_nx(v) )
+        pass_thru_flags |= _PAGE_NX_BIT;
     sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
 
     // Copy the guest's RW bit into the SHADOW_RW bit.
@@ -800,8 +743,7 @@ do {                                    
     // If the A or D bit has not yet been set in the guest, then we must
     // prevent the corresponding kind of access.
     //
-    if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
-                  !(gflags & _PAGE_ACCESSED)) )
+    if ( unlikely(!(gflags & _PAGE_ACCESSED)) )
         sflags &= ~_PAGE_PRESENT;
 
     /* D bits exist in L1es and PSE L2es */
@@ -890,9 +832,7 @@ l4e_propagate_from_guest(struct vcpu *v,
                   fetch_type_names[ft], gl4e->l4, sl4p->l4);
     ASSERT(sflags != -1);
 }
-#endif // GUEST_PAGING_LEVELS >= 4
-
-#if GUEST_PAGING_LEVELS >= 3
+
 static void
 l3e_propagate_from_guest(struct vcpu *v,
                          guest_l3e_t *gl3e,
@@ -912,7 +852,7 @@ l3e_propagate_from_guest(struct vcpu *v,
                   fetch_type_names[ft], gl3e->l3, sl3p->l3);
     ASSERT(sflags != -1);
 }
-#endif // GUEST_PAGING_LEVELS >= 3
+#endif // GUEST_PAGING_LEVELS >= 4
 
 static void
 l2e_propagate_from_guest(struct vcpu *v, 
@@ -1081,9 +1021,6 @@ shadow_write_entries(void *d, void *s, i
         safe_write_entry(dst++, src++);
 
     if ( map != NULL ) sh_unmap_domain_page(map);
-
-    /* XXX TODO:
-     * Update min/max field in page_info struct of this mfn */
 }
 
 static inline int
@@ -1195,9 +1132,7 @@ static int shadow_set_l4e(struct vcpu *v
     }
     return flags;
 }
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-#if GUEST_PAGING_LEVELS >= 3
+
 static int shadow_set_l3e(struct vcpu *v, 
                           shadow_l3e_t *sl3e, 
                           shadow_l3e_t new_sl3e, 
@@ -1224,28 +1159,6 @@ static int shadow_set_l3e(struct vcpu *v
     shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
     flags |= SHADOW_SET_CHANGED;
 
-#if GUEST_PAGING_LEVELS == 3 
-    /* We wrote a guest l3e in a PAE pagetable.  This table is copied in
-     * the linear pagetable entries of its l2s, and may also be copied
-     * to a low memory location to make it fit in CR3.  Report that we
-     * need to resync those copies (we can't wait for the guest to flush
-     * the TLB because it might be an increase in rights). */
-    {
-        struct vcpu *vcpu;
-
-        struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
-        for_each_vcpu(v->domain, vcpu)
-        {
-            if (info->vcpus & (1 << vcpu->vcpu_id))
-            {
-                // Remember that this flip/update needs to occur.
-                vcpu->arch.shadow.pae_flip_pending = 1;
-                flags |= SHADOW_SET_L3PAE_RECOPY;
-            }
-        }
-    }
-#endif
-
     if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) 
     {
         /* We lost a reference to an old mfn. */
@@ -1260,7 +1173,7 @@ static int shadow_set_l3e(struct vcpu *v
     }
     return flags;
 }
-#endif /* GUEST_PAGING_LEVELS >= 3 */ 
+#endif /* GUEST_PAGING_LEVELS >= 4 */ 
 
 static int shadow_set_l2e(struct vcpu *v, 
                           shadow_l2e_t *sl2e, 
@@ -1535,51 +1448,7 @@ do {                                    
 
 #endif /* different kinds of l2 */
 
-#if GUEST_PAGING_LEVELS == 3
-
-/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
-#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code)             \
-do {                                                                    \
-    int _i;                                                             \
-    for ( _i = 0; _i < 4; _i++ )                                        \
-    {                                                                   \
-        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
-            {_code}                                                     \
-        if ( _done ) break;                                             \
-        _sl3e++;                                                        \
-        increment_ptr_to_guest_entry(_gl3p);                            \
-    }                                                                   \
-} while (0)
-
-/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
-#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
-do {                                                                    \
-    int _i, _j, _k, __done = 0;                                         \
-    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
-           == PGC_SH_l3_pae_shadow);                                   \
-    /* The subshadows are split, 64 on each page of the shadow */       \
-    for ( _j = 0; _j < 2 && !__done; _j++ )                             \
-    {                                                                   \
-        void *_sp = sh_map_domain_page(_sl3mfn);                       \
-        for ( _i = 0; _i < 64; _i++ )                                   \
-        {                                                               \
-            /* Every second 32-byte region is a bookkeeping entry */    \
-            _sl3e = (shadow_l3e_t *)(_sp + (64 * _i));                  \
-            if ( (sl3p_to_info(_sl3e))->refcount > 0 )                  \
-                SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p,                   \
-                                        ({ __done = (_done); __done; }), \
-                                        _code);                         \
-            else                                                        \
-                for ( _k = 0 ; _k < 4 ; _k++ )                          \
-                    increment_ptr_to_guest_entry(_gl3p);                \
-            if ( __done ) break;                                        \
-        }                                                               \
-        sh_unmap_domain_page(_sp);                                     \
-        _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1);                             \
-    }                                                                   \
-} while (0)
-
-#elif GUEST_PAGING_LEVELS == 4
+#if GUEST_PAGING_LEVELS == 4
 
 /* 64-bit l3: touch all entries */
 #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
@@ -1711,8 +1580,8 @@ void sh_install_xen_entries_in_l2h(struc
     
     /* We don't set up a linear mapping here because we can't until this
      * l2h is installed in an l3e.  sh_update_linear_entries() handles
-     * the linear mappings when the l3 is loaded.  We zero them here, just as
-     * a safety measure.
+     * the linear mappings when CR3 (and so the fourth l3e) is loaded.  
+     * We zero them here, just as a safety measure.
      */
     for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
         sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
@@ -1739,37 +1608,6 @@ void sh_install_xen_entries_in_l2h(struc
     }
     
     sh_unmap_domain_page(sl2e);
-}
-
-void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
-{
-    shadow_l3e_t *sl3e;
-    guest_l3e_t *gl3e = v->arch.guest_vtable;
-    shadow_l3e_t new_sl3e;
-    gfn_t l2gfn;
-    mfn_t l2gmfn, l2smfn;
-    int r;
-
-    ASSERT(!shadow_mode_external(v->domain));
-    ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
-    l2gfn = guest_l3e_get_gfn(gl3e[3]);
-    l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn));
-    l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
-    if ( !valid_mfn(l2smfn) )
-    {
-        /* must remove write access to this page before shadowing it */
-        // XXX -- should check to see whether this is better with level==0 or
-        // level==2...
-        if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 )
-            flush_tlb_mask(v->domain->domain_dirty_cpumask);
- 
-        l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
-    }
-    l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
-                             ft_prefetch);
-    sl3e = sh_map_domain_page(sl3mfn);
-    r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
-    sh_unmap_domain_page(sl3e);
 }
 #endif
 
@@ -1827,8 +1665,6 @@ void sh_install_xen_entries_in_l2(struct
 
 
 
-
-
 /**************************************************************************/
 /* Create a shadow of a given guest page.
  */
@@ -1839,7 +1675,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
     SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
                   mfn_x(gmfn), shadow_type, mfn_x(smfn));
 
-    if ( shadow_type != PGC_SH_guest_root_type )
+    if ( shadow_type != PGC_SH_l2_32_shadow 
+         && shadow_type != PGC_SH_l2_pae_shadow 
+         && shadow_type != PGC_SH_l2h_pae_shadow 
+         && shadow_type != PGC_SH_l4_64_shadow )
         /* Lower-level shadow, not yet linked form a higher level */
         mfn_to_page(smfn)->up = 0;
 
@@ -1853,8 +1692,6 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
             sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
 #endif
 #if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
-        case PGC_SH_l3_shadow:
-            sh_install_xen_entries_in_l3(v, gmfn, smfn); break;
         case PGC_SH_l2h_shadow:
             sh_install_xen_entries_in_l2h(v, smfn); break;
 #endif
@@ -1988,20 +1825,16 @@ static shadow_l4e_t * shadow_get_and_cre
                                                 mfn_t *sl4mfn)
 {
     /* There is always a shadow of the top level table.  Get it. */
-    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
+    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
     /* Reading the top level table is always valid. */
     return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
 }
-#endif /* GUEST_PAGING_LEVELS >= 4 */
-
-
-#if GUEST_PAGING_LEVELS >= 3
+
 static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, 
                                                 walk_t *gw, 
                                                 mfn_t *sl3mfn,
                                                 fetch_type_t ft)
 {
-#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
     mfn_t sl4mfn;
     shadow_l4e_t *sl4e;
     if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
@@ -2032,19 +1865,8 @@ static shadow_l3e_t * shadow_get_and_cre
     }
     /* Now follow it down a level.  Guaranteed to succeed. */
     return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
-#else /* PAE... */
-    /* There is always a shadow of the top level table.  Get it. */
-    *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
-    /* This next line is important: the shadow l3 table is in an 8k
-     * shadow and we need to return the right mfn of the pair. This call
-     * will set it for us as a side-effect. */
-    (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
-    ASSERT(v->arch.shadow_vtable);
-    return ((shadow_l3e_t *)v->arch.shadow_vtable) 
-        + shadow_l3_table_offset(gw->va);
+}
 #endif /* GUEST_PAGING_LEVELS >= 4 */
-}
-#endif /* GUEST_PAGING_LEVELS >= 3 */
 
 
 static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, 
@@ -2052,7 +1874,7 @@ static shadow_l2e_t * shadow_get_and_cre
                                                 mfn_t *sl2mfn,
                                                 fetch_type_t ft)
 {
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
     mfn_t sl3mfn = _mfn(INVALID_MFN);
     shadow_l3e_t *sl3e;
     if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
@@ -2080,17 +1902,22 @@ static shadow_l2e_t * shadow_get_and_cre
                                  *sl2mfn, &new_sl3e, ft);
         r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
         ASSERT((r & SHADOW_SET_FLUSH) == 0);
-#if GUEST_PAGING_LEVELS == 3 
-        /* Need to sync up the linear maps, as we are about to use them */
-        ASSERT( r & SHADOW_SET_L3PAE_RECOPY );
-        sh_pae_recopy(v->domain);
-#endif
     }
     /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
+#elif GUEST_PAGING_LEVELS == 3 /* PAE... */
+    /* We never demand-shadow PAE l3es: they are only created in
+     * sh_update_cr3().  Check if the relevant sl3e is present. */
+    shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) 
+        + shadow_l3_linear_offset(gw->va);
+    if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) 
+        return NULL;
+    *sl2mfn = shadow_l3e_get_mfn(*sl3e);
+    ASSERT(valid_mfn(*sl2mfn));
     return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
 #else /* 32bit... */
     /* There is always a shadow of the top level table.  Get it. */
-    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
+    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
     /* This next line is important: the guest l2 has a 16k
      * shadow, we need to return the right mfn of the four. This
      * call will set it for us as a side-effect. */
@@ -2213,9 +2040,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
     /* Put the memory back in the pool */
     shadow_free(v->domain, smfn);
 }
-#endif    
-
-#if GUEST_PAGING_LEVELS >= 3
+
 void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
 {
     shadow_l3e_t *sl3e;
@@ -2230,10 +2055,6 @@ void sh_destroy_l3_shadow(struct vcpu *v
     gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
-#if GUEST_PAGING_LEVELS == 3
-    /* Take this shadow off the list of root shadows */
-    list_del_init(&mfn_to_page(smfn)->list);
-#endif
 
     /* Decrement refcounts of all the old entries */
     sl3mfn = smfn; 
@@ -2247,53 +2068,8 @@ void sh_destroy_l3_shadow(struct vcpu *v
     /* Put the memory back in the pool */
     shadow_free(v->domain, smfn);
 }
-#endif    
-
-
-#if GUEST_PAGING_LEVELS == 3
-static void sh_destroy_l3_subshadow(struct vcpu *v, 
-                                     shadow_l3e_t *sl3e)
-/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
-{
-    int i;
-    mfn_t sl3mfn = _mfn(maddr_from_mapped_domain_page(sl3e) >> PAGE_SHIFT);
-    ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); 
-    for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) 
-        if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) 
-            shadow_set_l3e(v, &sl3e[i], shadow_l3e_empty(), sl3mfn);
-}
-#endif
-
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
-/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
-{
-    int i, j;
-    struct pae_l3_bookkeeping *bk;
-    
-    ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask) 
-           == PGC_SH_l3_pae_shadow);
-    /* The subshadows are split, 64 on each page of the shadow */
-    for ( i = 0; i < 2; i++ ) 
-    {
-        void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i));
-        for ( j = 0; j < 64; j++ )
-        {
-            /* Every second 32-byte region is a bookkeeping entry */
-            bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
-            if ( bk->pinned )
-                sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
-            /* Check whether we've just freed the whole shadow */
-            if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 ) 
-            {
-                sh_unmap_domain_page(p);
-                return;
-            }
-        }
-        sh_unmap_domain_page(p);
-    }
-}
-#endif
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
 
 void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
 {
@@ -2311,7 +2087,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
     gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
     delete_shadow_status(v, gmfn, t, smfn);
     shadow_demote(v, gmfn, t);
-#if GUEST_PAGING_LEVELS == 2
+#if (GUEST_PAGING_LEVELS == 2) || (GUEST_PAGING_LEVELS == 3)
     /* Take this shadow off the list of root shadows */
     list_del_init(&mfn_to_page(smfn)->list);
 #endif
@@ -2421,31 +2197,14 @@ void sh_unhook_32b_mappings(struct vcpu 
 
 #elif GUEST_PAGING_LEVELS == 3
 
-void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
-/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
-{
-    shadow_l3e_t *sl3e;
-    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
-        if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
-            mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
-            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) 
-                 == PGC_SH_l2h_pae_shadow ) 
-            {
-                /* High l2: need to pick particular l2es to unhook */
-                shadow_l2e_t *sl2e;
-                SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
-                    (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
-                });
-            }
-            else
-            {
-                /* Normal l2: can safely unhook the whole l3e */
-                (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
-            }
-        }
+void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn)
+/* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
+{
+    shadow_l2e_t *sl2e;
+    int xen_mappings = !shadow_mode_external(v->domain);
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
+        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
     });
-    /* We've changed PAE L3 entries: must sync up various copies of them */
-    sh_pae_recopy(v->domain);
 }
 
 #elif GUEST_PAGING_LEVELS == 4
@@ -2523,9 +2282,8 @@ static int validate_gl4e(struct vcpu *v,
     result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
     return result;
 }
-#endif // GUEST_PAGING_LEVELS >= 4
-
-#if GUEST_PAGING_LEVELS >= 3
+
+
 static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
 {
     shadow_l3e_t new_sl3e;
@@ -2535,16 +2293,6 @@ static int validate_gl3e(struct vcpu *v,
     int result = 0;
 
     perfc_incrc(shadow_validate_gl3e_calls);
-
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-    {
-        /* If we've updated a subshadow which is unreferenced then 
-           we don't care what value is being written - bail. */
-        struct pae_l3_bookkeeping *info = sl3p_to_info(se); 
-        if(!info->refcount)
-            return result; 
-    }
-#endif
 
     if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
     {
@@ -2559,16 +2307,9 @@ static int validate_gl3e(struct vcpu *v,
                              sl2mfn, &new_sl3e, ft_prefetch);
     result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
 
-#if GUEST_PAGING_LEVELS == 3
-    /* We have changed a PAE l3 entry: need to sync up the possible copies 
-     * of it */
-    if ( result & SHADOW_SET_L3PAE_RECOPY )
-        sh_pae_recopy(v->domain);
-#endif
-
     return result;
 }
-#endif // GUEST_PAGING_LEVELS >= 3
+#endif // GUEST_PAGING_LEVELS >= 4
 
 static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
 {
@@ -2755,12 +2496,12 @@ sh_map_and_validate_gl3e(struct vcpu *v,
 sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
                           void *new_gl3p, u32 size)
 {
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
     return sh_map_and_validate(v, gl3mfn, new_gl3p, size, 
                                 PGC_SH_l3_shadow, 
                                 shadow_l3_index, 
                                 validate_gl3e);
-#else // ! GUEST_PAGING_LEVELS >= 3
+#else // ! GUEST_PAGING_LEVELS >= 4
     SHADOW_PRINTK("called in wrong paging mode!\n");
     BUG();
     return 0;
@@ -2822,10 +2563,10 @@ static inline void check_for_early_unsha
     {
         u32 flags = mfn_to_page(gmfn)->shadow_flags;
         mfn_t smfn;
-        if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) )
+        if ( !(flags & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
         {
             perfc_incrc(shadow_early_unshadow);
-            sh_remove_shadows(v, gmfn, 0 /* Can fail to unshadow */ );
+            sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
             return;
         }
         /* SHF_unhooked_mappings is set to make sure we only unhook
@@ -2840,9 +2581,14 @@ static inline void check_for_early_unsha
                 smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow);
                 shadow_unhook_mappings(v, smfn);
             }
-            if ( flags & SHF_L3_PAE ) 
+            if ( flags & SHF_L2_PAE ) 
             {
-                smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow);
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l2_pae_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+            if ( flags & SHF_L2H_PAE ) 
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l2h_pae_shadow);
                 shadow_unhook_mappings(v, smfn);
             }
             if ( flags & SHF_L4_64 ) 
@@ -3134,7 +2880,6 @@ static int sh_page_fault(struct vcpu *v,
     shadow_audit_tables(v);
     reset_early_unshadow(v);
     shadow_unlock(d);
-    sh_log_mmio(v, gpa);
     handle_mmio(va, gpa);
     return EXCRET_fault_fixed;
 
@@ -3183,8 +2928,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
             return 0;
     }
 #elif SHADOW_PAGING_LEVELS == 3
-    if ( !(shadow_l3e_get_flags(
-          ((shadow_l3e_t *)v->arch.shadow_vtable)[shadow_l3_linear_offset(va)])
+    if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)])
            & _PAGE_PRESENT) )
         // no need to flush anything if there's no SL2...
         return 0;
@@ -3247,34 +2991,6 @@ sh_gva_to_gpa(struct vcpu *v, unsigned l
     else
         return (gfn << PAGE_SHIFT) | (va & ~PAGE_MASK);
 }
-
-
-// XXX -- should this be in this file?
-//        Or should it be moved to shadow-common.c?
-//
-/* returns a lowmem machine address of the copied HVM L3 root table
- * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
- * otherwise blank out any entries with reserved bits in them.  */
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-static unsigned long
-hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
-{
-    int i, f;
-    int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
-    l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-    memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
-    for ( i = 0; i < 4; i++ )
-    {
-        f = l3e_get_flags(l3tab[i]);
-        if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
-            new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
-        else
-            new_l3e = l3e_empty();
-        safe_write_entry(&copy[i], &new_l3e);
-    }
-    return __pa(copy);
-}
-#endif
 
 
 static inline void
@@ -3330,7 +3046,7 @@ sh_update_linear_entries(struct vcpu *v)
         if ( v == current ) 
         {
             __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
                              __PAGE_HYPERVISOR);
         } 
         else
@@ -3338,7 +3054,7 @@ sh_update_linear_entries(struct vcpu *v)
             l4_pgentry_t *ml4e;
             ml4e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
             ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
                              __PAGE_HYPERVISOR);
             sh_unmap_domain_page(ml4e);
         }
@@ -3379,13 +3095,8 @@ sh_update_linear_entries(struct vcpu *v)
             sh_unmap_domain_page(ml4e);
         }
 
-#if GUEST_PAGING_LEVELS == 2
         /* Shadow l3 tables are made up by update_cr3 */
-        sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-#else
-        /* Always safe to use shadow_vtable, because it's globally mapped */
-        sl3e = v->arch.shadow_vtable;
-#endif
+        sl3e = v->arch.shadow.l3table;
 
         for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
         {
@@ -3424,14 +3135,14 @@ sh_update_linear_entries(struct vcpu *v)
 #if GUEST_PAGING_LEVELS == 2
         /* Shadow l3 tables were built by update_cr3 */
         if ( shadow_mode_external(d) )
-            shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+            shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
         else
             BUG(); /* PV 2-on-3 is not supported yet */
         
 #else /* GUEST_PAGING_LEVELS == 3 */
         
-        /* Always safe to use *_vtable, because they're globally mapped */
-        shadow_l3e = v->arch.shadow_vtable;
+        shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
+        /* Always safe to use guest_vtable, because it's globally mapped */
         guest_l3e = v->arch.guest_vtable;
 
 #endif /* GUEST_PAGING_LEVELS */
@@ -3510,7 +3221,7 @@ sh_update_linear_entries(struct vcpu *v)
         if ( v == current ) 
         {
             __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
                              __PAGE_HYPERVISOR);
         } 
         else
@@ -3518,7 +3229,7 @@ sh_update_linear_entries(struct vcpu *v)
             l2_pgentry_t *ml2e;
             ml2e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
             ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = 
-                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
                              __PAGE_HYPERVISOR);
             sh_unmap_domain_page(ml2e);
         }
@@ -3530,69 +3241,7 @@ sh_update_linear_entries(struct vcpu *v)
 }
 
 
-// XXX -- should this be in this file?
-//        Or should it be moved to shadow-common.c?
-//
-#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-void sh_pae_recopy(struct domain *d)
-/* Called whenever we write to the l3 entries of a PAE pagetable which 
- * is currently in use.  Each vcpu that is using the table needs to 
- * resync its copies of the l3s in linear maps and any low-memory
- * copies it might have made for fitting into 32bit CR3.
- * Since linear maps are also resynced when we change CR3, we don't
- * need to worry about changes to PAE l3es that are not currently in use.*/
-{
-    struct vcpu *v;
-    cpumask_t flush_mask = CPU_MASK_NONE;
-    ASSERT(shadow_lock_is_acquired(d));
-    
-    for_each_vcpu(d, v)
-    {
-        if ( !v->arch.shadow.pae_flip_pending ) 
-            continue;
-
-        cpu_set(v->processor, flush_mask);
-        
-        SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
-
-        /* This vcpu has a copy in its linear maps */
-        sh_update_linear_entries(v);
-        if ( hvm_guest(v) )
-        {
-            /* This vcpu has a copy in its HVM PAE l3 */
-            v->arch.hvm_vcpu.hw_cr3 = 
-                hvm_pae_copy_root(v, v->arch.shadow_vtable,
-                                  !shadow_vcpu_mode_translate(v));
-        }
-#if CONFIG_PAGING_LEVELS == 3
-        else 
-        {
-            /* This vcpu might have copied the l3 to below 4GB */
-            if ( v->arch.cr3 >> PAGE_SHIFT 
-                 != pagetable_get_pfn(v->arch.shadow_table) )
-            {
-                /* Recopy to where that copy is. */
-                int i;
-                l3_pgentry_t *dst, *src;
-                dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
-                src = v->arch.shadow_vtable;
-                for ( i = 0 ; i < 4 ; i++ ) 
-                    safe_write_entry(dst + i, src + i);
-            }
-        }
-#endif
-        v->arch.shadow.pae_flip_pending = 0;        
-    }
-
-    flush_tlb_mask(flush_mask);
-}
-#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
-
-
-/* removes:
- *     vcpu->arch.guest_vtable
- *     vcpu->arch.shadow_table
- *     vcpu->arch.shadow_vtable
+/* Removes vcpu->arch.guest_vtable and vcpu->arch.shadow_table[].
  * Does all appropriate management/bookkeeping/refcounting/etc...
  */
 static void
@@ -3600,6 +3249,7 @@ sh_detach_old_tables(struct vcpu *v)
 {
     struct domain *d = v->domain;
     mfn_t smfn;
+    int i = 0;
 
     ////
     //// vcpu->arch.guest_vtable
@@ -3620,56 +3270,80 @@ sh_detach_old_tables(struct vcpu *v)
     }
 
     ////
-    //// vcpu->arch.shadow_table
+    //// vcpu->arch.shadow_table[]
     ////
-    smfn = pagetable_get_mfn(v->arch.shadow_table);
-    if ( mfn_x(smfn) )
-    {
-        ASSERT(v->arch.shadow_vtable);
+
 
 #if GUEST_PAGING_LEVELS == 3
-        // PAE guests do not (necessarily) use an entire page for their
-        // 4-entry L3s, so we have to deal with them specially.
-        //
-        sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
-#else
-        sh_put_ref(v, smfn, 0);
-#endif
-
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-        {
-            struct pae_l3_bookkeeping *info =
-                sl3p_to_info(v->arch.shadow_vtable);
-            ASSERT(test_bit(v->vcpu_id, &info->vcpus));
-            clear_bit(v->vcpu_id, &info->vcpus);
-        }
-#endif
-        v->arch.shadow_table = pagetable_null();
-    }
-
-    ////
-    //// vcpu->arch.shadow_vtable
-    ////
-    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
-         v->arch.shadow_vtable )
-    {
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        /* A: so sh_update_linear_entries can operate on other vcpus */
-        sh_unmap_domain_page_global(v->arch.shadow_vtable);
-        v->arch.shadow_vtable = NULL;
-    }
-}
+    /* PAE guests have four shadow_table entries */
+    for ( i = 0 ; i < 4 ; i++ )
+#endif
+    {
+        smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
+        if ( mfn_x(smfn) )
+            sh_put_ref(v, smfn, 0);
+        v->arch.shadow_table[i] = pagetable_null();
+    }
+}
+
+/* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
+static void
+sh_set_toplevel_shadow(struct vcpu *v, 
+                       int slot,
+                       mfn_t gmfn, 
+                       unsigned int root_type) 
+{
+    mfn_t smfn = get_shadow_status(v, gmfn, root_type);
+    struct domain *d = v->domain;
+    ASSERT(pagetable_is_null(v->arch.shadow_table[slot]));
+    if ( valid_mfn(smfn) )
+    {
+        /* Pull this root shadow to the front of the list of roots. */
+        list_del(&mfn_to_page(smfn)->list);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    else
+    {
+        /* This guest MFN is a pagetable.  Must revoke write access. */
+        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) != 0 )
+            flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+        /* Make sure there's enough free shadow memory. */
+        shadow_prealloc(d, SHADOW_MAX_ORDER); 
+        /* Shadow the page. */
+        smfn = sh_make_shadow(v, gmfn, root_type);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    ASSERT(valid_mfn(smfn));
+    
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    /* Once again OK to unhook entries from this table if we see fork/exit */
+    ASSERT(sh_mfn_is_a_page_table(gmfn));
+    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
+#endif
+
+    /* Take a ref to this page: it will be released in sh_detach_old_tables. */
+    sh_get_ref(smfn, 0);
+    sh_pin(smfn);
+
+    /* Done.  Install it */
+    SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n",
+                  GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot,
+                  mfn_x(gmfn), mfn_x(smfn));
+    v->arch.shadow_table[slot] = pagetable_from_mfn(smfn);
+}
+
 
 static void
 sh_update_cr3(struct vcpu *v)
-/* Updates vcpu->arch.shadow_table after the guest has changed CR3.
+/* Updates vcpu->arch.cr3 after the guest has changed CR3.
  * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
  * if appropriate).
- * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
+ * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works,
+ * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards.
  */
 {
     struct domain *d = v->domain;
-    mfn_t gmfn, smfn;
+    mfn_t gmfn;
 #if GUEST_PAGING_LEVELS == 3
     u32 guest_idx=0;
 #endif
@@ -3770,159 +3444,102 @@ sh_update_cr3(struct vcpu *v)
 #endif
 
     ////
-    //// vcpu->arch.shadow_table
+    //// vcpu->arch.shadow_table[]
     ////
-    smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type);
-    if ( valid_mfn(smfn) )
-    {
-        /* Pull this root shadow to the front of the list of roots. */
-        list_del(&mfn_to_page(smfn)->list);
-        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
-    }
-    else
-    {
-        /* This guest MFN is a pagetable.  Must revoke write access. */
-        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) 
-             != 0 )
-            flush_tlb_mask(d->domain_dirty_cpumask); 
-        /* Make sure there's enough free shadow memory. */
-        shadow_prealloc(d, SHADOW_MAX_ORDER); 
-        /* Shadow the page. */
-        smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type);
-        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
-    }
-    ASSERT(valid_mfn(smfn));
-    v->arch.shadow_table = pagetable_from_mfn(smfn);
-
-#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
-    /* Once again OK to unhook entries from this table if we see fork/exit */
-    ASSERT(sh_mfn_is_a_page_table(gmfn));
-    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
-#endif
-
-
-    ////
-    //// vcpu->arch.shadow_vtable
-    ////
-    if ( shadow_mode_external(d) )
-    {
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-        mfn_t adjusted_smfn = smfn;
-        u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        v->arch.shadow_vtable =
-            (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) +
-            shadow_idx;
+
+#if GUEST_PAGING_LEVELS == 2
+    sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l2_shadow);
+#elif GUEST_PAGING_LEVELS == 3
+    /* PAE guests have four shadow_table entries, based on the 
+     * current values of the guest's four l3es. */
+    {
+        int i;
+        guest_l3e_t *gl3e = (guest_l3e_t*)v->arch.guest_vtable;
+        for ( i = 0; i < 4; i++ ) 
+        {
+            ASSERT(pagetable_is_null(v->arch.shadow_table[i]));
+            if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
+            {
+                gfn_t gl2gfn = guest_l3e_get_gfn(gl3e[i]);
+                mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
+                if ( valid_mfn(gl2mfn) )                
+                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
+                                           ? PGC_SH_l2h_shadow 
+                                           : PGC_SH_l2_shadow);
+            }
+        }
+    }
+#elif GUEST_PAGING_LEVELS == 4
+    sh_set_toplevel_shadow(v, 0, gmfn, PGC_SH_l4_shadow);
 #else
-        // Q: why does this need to use (un)map_domain_page_*global* ?
-        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
-#endif
-    }
-    else
-    {
-#if SHADOW_PAGING_LEVELS == 4
-        v->arch.shadow_vtable = __sh_linear_l4_table;
-#elif GUEST_PAGING_LEVELS == 3
-        // XXX - why does this need a global map?
-        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
+#error This should never happen 
+#endif
+
+#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+#endif
+
+    /// 
+    /// v->arch.shadow.l3table
+    ///
+#if SHADOW_PAGING_LEVELS == 3
+        {
+            mfn_t smfn;
+            int i;
+            for ( i = 0; i < 4; i++ )
+            {
+#if GUEST_PAGING_LEVELS == 2
+                /* 2-on-3: make a PAE l3 that points at the four-page l2 */
+                smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[0]) + i);
 #else
-        v->arch.shadow_vtable = __sh_linear_l2_table;
-#endif
-    }
-
-#if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-    // Now that shadow_vtable is in place, check that the sl3e[3] is properly
-    // shadowed and installed in PAE PV guests...
-    if ( !shadow_mode_external(d) &&
-         !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) &
-           _PAGE_PRESENT) )
-    {
-        sh_install_xen_entries_in_l3(v, gmfn, smfn);
-    }
-#endif
-
-    ////
-    //// Take a ref to the new shadow table, and pin it.
-    ////
-    //
-    // This ref is logically "held" by v->arch.shadow_table entry itself.
-    // Release the old ref.
-    //
-#if GUEST_PAGING_LEVELS == 3
-    // PAE guests do not (necessarily) use an entire page for their
-    // 4-entry L3s, so we have to deal with them specially.
-    //
-    // XXX - might want to revisit this if/when we do multiple compilation for
-    //       HVM-vs-PV guests, as PAE PV guests could get away without doing
-    //       subshadows.
-    //
-    sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
-    sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
-#else
-    sh_get_ref(smfn, 0);
-    sh_pin(smfn);
-#endif
-
-#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
-    // PAE 3-on-3 shadows have to keep track of which vcpu's are using
-    // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY
-    // case from validate_gl3e().  Search for SHADOW_SET_L3PAE_RECOPY
-    // in the code for more info.
-    //
-    {
-        struct pae_l3_bookkeeping *info =
-            sl3p_to_info(v->arch.shadow_vtable);
-        ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
-        set_bit(v->vcpu_id, &info->vcpus);
-    }
-#endif
-
-    debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
-                      __func__, gmfn, smfn);
+                /* 3-on-3: make a PAE l3 that points at the four l2 pages */
+                smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
+#endif
+                v->arch.shadow.l3table[i] = 
+                    (mfn_x(smfn) == 0) 
+                    ? shadow_l3e_empty()
+                    : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT);
+            }
+        }
+#endif /* SHADOW_PAGING_LEVELS == 3 */
+
 
     ///
-    /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
+    /// v->arch.cr3
     ///
     if ( shadow_mode_external(d) )
     {
-        ASSERT(hvm_guest(v));
         make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
-
-#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
-#if SHADOW_PAGING_LEVELS != 3
-#error unexpected combination of GUEST and SHADOW paging levels
-#endif
-        /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
-        {
-            mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
-            int i;
-
-            ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
-                   virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
-            for (i = 0; i < 4; i++)
-            {
-                v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
-                    shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
-            }
-        }
-#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
-        /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
-         * If paging is disabled, clear l3e reserved bits; otherwise 
-         * remove entries that have reserved bits set. */
-        v->arch.hvm_vcpu.hw_cr3 =
-            hvm_pae_copy_root(v, v->arch.shadow_vtable, 
-                              !shadow_vcpu_mode_translate(v));
-#else
-        /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
-        v->arch.hvm_vcpu.hw_cr3 =
-            pagetable_get_paddr(v->arch.shadow_table);
-#endif
     }
     else // not shadow_mode_external...
     {
         /* We don't support PV except guest == shadow == config levels */
         BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
-        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
+#if SHADOW_PAGING_LEVELS == 3
+        /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated.
+         * Don't use make_cr3 because (a) we know it's below 4GB, and
+         * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */
+        ASSERT(virt_to_maddr(&v->arch.shadow.l3table) <= 0xffffffe0ULL);
+        v->arch.cr3 = virt_to_maddr(&v->arch.shadow.l3table);
+#else
+        /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
+        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0]));
+#endif
+    }
+
+
+    ///
+    /// v->arch.hvm_vcpu.hw_cr3
+    ///
+    if ( shadow_mode_external(d) )
+    {
+        ASSERT(hvm_guest(v));
+#if SHADOW_PAGING_LEVELS == 3
+        /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */
+        v->arch.hvm_vcpu.hw_cr3 = virt_to_maddr(&v->arch.shadow.l3table);
+#else
+        /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
+        v->arch.hvm_vcpu.hw_cr3 = pagetable_get_paddr(v->arch.shadow_table[0]);
+#endif
     }
 
     /* Fix up the linear pagetable mappings */
@@ -3950,7 +3567,6 @@ static int sh_guess_wrmap(struct vcpu *v
 
 
     /* Carefully look in the shadow linear map for the l1e we expect */
-    if ( v->arch.shadow_vtable == NULL ) return 0;
 #if GUEST_PAGING_LEVELS >= 4
     sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
     if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
@@ -3959,7 +3575,7 @@ static int sh_guess_wrmap(struct vcpu *v
     if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
         return 0;
 #elif GUEST_PAGING_LEVELS == 3
-    sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable) 
+    sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table) 
         + shadow_l3_linear_offset(vaddr);
     if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
         return 0;
@@ -3988,6 +3604,7 @@ int sh_remove_write_access(struct vcpu *
     shadow_l1e_t *sl1e;
     int done = 0;
     int flags;
+    mfn_t base_sl1mfn = sl1mfn; /* Because sl1mfn changes in the foreach */
     
     SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
     {
@@ -3997,6 +3614,10 @@ int sh_remove_write_access(struct vcpu *
              && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
         {
             shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 
+            /* Remember the last shadow that we shot a writeable mapping in */
+            v->arch.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn);
+#endif
             if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info
                   & PGT_count_mask) == 0 )
                 /* This breaks us cleanly out of the FOREACH macro */
@@ -4044,13 +3665,11 @@ void sh_clear_shadow_entry(struct vcpu *
     case PGC_SH_l2h_shadow:
 #endif
         shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
     case PGC_SH_l3_shadow:
         shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
-#if GUEST_PAGING_LEVELS >= 4
     case PGC_SH_l4_shadow:
         shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
-#endif
 #endif
     default: BUG(); /* Called with the wrong kind of shadow. */
     }
@@ -4081,7 +3700,7 @@ int sh_remove_l1_shadow(struct vcpu *v, 
     return done;
 }
 
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
 int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
 /* Remove all mappings of this l2 shadow from this l3 shadow */
 {
@@ -4104,7 +3723,6 @@ int sh_remove_l2_shadow(struct vcpu *v, 
     return done;
 }
 
-#if GUEST_PAGING_LEVELS >= 4
 int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
 /* Remove all mappings of this l3 shadow from this l4 shadow */
 {
@@ -4127,7 +3745,6 @@ int sh_remove_l3_shadow(struct vcpu *v, 
     return done;
 }
 #endif /* 64bit guest */ 
-#endif /* PAE guest */
 
 /**************************************************************************/
 /* Handling HVM guest writes to pagetables  */
@@ -4448,7 +4065,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
     return 0;
 }
 
-#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
 int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
 {
     guest_l3e_t *gl3e, *gp;
@@ -4486,9 +4103,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
     sh_unmap_domain_page(gp);
     return 0;
 }
-#endif /* GUEST_PAGING_LEVELS >= 3 */
-
-#if GUEST_PAGING_LEVELS >= 4
+
 int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
 {
     guest_l4e_t *gl4e, *gp;
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/multi.h
--- a/xen/arch/x86/mm/shadow/multi.h    Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/multi.h    Wed Oct 18 14:36:20 2006 +0100
@@ -49,10 +49,6 @@ extern void
 extern void 
 SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
     struct vcpu *v, mfn_t smfn);
-
-extern void
-SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows, 3, 3)
-    (struct vcpu *v, mfn_t smfn);
 
 extern void 
 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/private.h  Wed Oct 18 14:36:20 2006 +0100
@@ -178,77 +178,6 @@ extern void shadow_audit_p2m(struct doma
 
 
 /******************************************************************************
- * Mechanism for double-checking the optimized pagefault path: this
- * structure contains a record of actions taken by the fault handling
- * code.  In paranoid mode, the fast-path code fills out one of these
- * structures (but doesn't take any actual action) and then the normal 
- * path fills in another.  When the fault handler finishes, the 
- * two are compared */
-
-#ifdef SHADOW_OPTIMIZATION_PARANOIA
-
-typedef struct shadow_action_log sh_log_t;
-struct shadow_action_log {
-    paddr_t ad[CONFIG_PAGING_LEVELS];  /* A & D bits propagated here */
-    paddr_t mmio;                      /* Address of an mmio operation */
-    int rv;                            /* Result of the fault handler */
-};
-
-/* There are two logs, one for the fast path, one for the normal path */
-enum sh_log_type { log_slow = 0, log_fast= 1 };
-
-/* Alloc and zero the logs */
-static inline void sh_init_log(struct vcpu *v) 
-{
-    if ( unlikely(!v->arch.shadow.action_log) ) 
-        v->arch.shadow.action_log = xmalloc_array(sh_log_t, 2);
-    ASSERT(v->arch.shadow.action_log);
-    memset(v->arch.shadow.action_log, 0, 2 * sizeof (sh_log_t));
-}
-
-/* Log an A&D-bit update */
-static inline void sh_log_ad(struct vcpu *v, paddr_t e, unsigned int level)
-{
-    v->arch.shadow.action_log[v->arch.shadow.action_index].ad[level] = e;
-}
-
-/* Log an MMIO address */
-static inline void sh_log_mmio(struct vcpu *v, paddr_t m)
-{
-    v->arch.shadow.action_log[v->arch.shadow.action_index].mmio = m;
-}
-
-/* Log the result */
-static inline void sh_log_rv(struct vcpu *v, int rv)
-{
-    v->arch.shadow.action_log[v->arch.shadow.action_index].rv = rv;
-}
-
-/* Set which mode we're in */
-static inline void sh_set_log_mode(struct vcpu *v, enum sh_log_type t) 
-{
-    v->arch.shadow.action_index = t;
-}
-
-/* Know not to take action, because we're only checking the mechanism */
-static inline int sh_take_no_action(struct vcpu *v) 
-{
-    return (v->arch.shadow.action_index == log_fast);
-}
-
-#else /* Non-paranoid mode: these logs do not exist */
-
-#define sh_init_log(_v) do { (void)(_v); } while(0)
-#define sh_set_log_mode(_v,_t) do { (void)(_v); } while(0)
-#define sh_log_ad(_v,_e,_l) do { (void)(_v),(void)(_e),(void)(_l); } while (0)
-#define sh_log_mmio(_v,_m) do { (void)(_v),(void)(_m); } while (0)
-#define sh_log_rv(_v,_r) do { (void)(_v),(void)(_r); } while (0)
-#define sh_take_no_action(_v) (((void)(_v)), 0)
-
-#endif /* SHADOW_OPTIMIZATION_PARANOIA */
-
-
-/******************************************************************************
  * Macro for dealing with the naming of the internal names of the
  * shadow code's external entry points.
  */
@@ -336,13 +265,9 @@ void shadow_convert_to_log_dirty(struct 
  * non-Xen mappings in this top-level shadow mfn */
 void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn);
 
-/* Re-sync copies of PAE shadow L3 tables if they have been changed */
-void sh_pae_recopy(struct domain *d);
-
 /* Install the xen mappings in various flavours of shadow */
 void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
 void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
-void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn);
 void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
 
 
diff -r bd207697f0c7 -r 5c029fda79dc xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h    Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/arch/x86/mm/shadow/types.h    Wed Oct 18 14:36:20 2006 +0100
@@ -215,8 +215,7 @@ static inline shadow_l4e_t shadow_l4e_fr
      shadow_l1_linear_offset(SH_LINEAR_PT_VIRT_START)); \
 })
 
-// shadow linear L3 and L4 tables only exist in 4 level paging...
-#if SHADOW_PAGING_LEVELS == 4
+#if SHADOW_PAGING_LEVELS >= 4
 #define sh_linear_l3_table(v) ({ \
     ASSERT(current == (v)); \
     ((shadow_l3e_t *) \
@@ -386,7 +385,6 @@ static inline guest_l4e_t guest_l4e_from
 #define PGC_SH_fl1_shadow PGC_SH_fl1_pae_shadow
 #define PGC_SH_l2_shadow  PGC_SH_l2_pae_shadow
 #define PGC_SH_l2h_shadow PGC_SH_l2h_pae_shadow
-#define PGC_SH_l3_shadow  PGC_SH_l3_pae_shadow
 #else
 #define PGC_SH_l1_shadow  PGC_SH_l1_64_shadow
 #define PGC_SH_fl1_shadow PGC_SH_fl1_64_shadow
@@ -404,14 +402,6 @@ valid_gfn(gfn_t m)
 {
     return VALID_GFN(gfn_x(m));
 }
-
-#if GUEST_PAGING_LEVELS == 2
-#define PGC_SH_guest_root_type PGC_SH_l2_32_shadow
-#elif GUEST_PAGING_LEVELS == 3
-#define PGC_SH_guest_root_type PGC_SH_l3_pae_shadow
-#else
-#define PGC_SH_guest_root_type PGC_SH_l4_64_shadow
-#endif
 
 /* Translation between mfns and gfns */
 static inline mfn_t
@@ -490,8 +480,6 @@ struct shadow_walk_t
 #define sh_map_and_validate_gl1e   INTERNAL_NAME(sh_map_and_validate_gl1e)
 #define sh_destroy_l4_shadow       INTERNAL_NAME(sh_destroy_l4_shadow)
 #define sh_destroy_l3_shadow       INTERNAL_NAME(sh_destroy_l3_shadow)
-#define sh_destroy_l3_subshadow    INTERNAL_NAME(sh_destroy_l3_subshadow)
-#define sh_unpin_all_l3_subshadows INTERNAL_NAME(sh_unpin_all_l3_subshadows)
 #define sh_destroy_l2_shadow       INTERNAL_NAME(sh_destroy_l2_shadow)
 #define sh_destroy_l1_shadow       INTERNAL_NAME(sh_destroy_l1_shadow)
 #define sh_unhook_32b_mappings     INTERNAL_NAME(sh_unhook_32b_mappings)
@@ -533,115 +521,6 @@ struct shadow_walk_t
                               SHADOW_PAGING_LEVELS)
 
 
-#if GUEST_PAGING_LEVELS == 3
-/*
- * Accounting information stored in the shadow of PAE Guest L3 pages.
- * Because these "L3 pages" are only 32-bytes, it is inconvenient to keep
- * various refcounts, etc., on the page_info of their page.  We provide extra
- * bookkeeping space in the shadow itself, and this is the structure
- * definition for that bookkeeping information.
- */
-struct pae_l3_bookkeeping {
-    u32 vcpus;                  /* bitmap of which vcpus are currently storing
-                                 * copies of this 32-byte page */
-    u32 refcount;               /* refcount for this 32-byte page */
-    u8 pinned;                  /* is this 32-byte page pinned or not? */
-};
-
-// Convert a shadow entry pointer into a pae_l3_bookkeeping pointer.
-#define sl3p_to_info(_ptr) ((struct pae_l3_bookkeeping *)         \
-                            (((unsigned long)(_ptr) & ~31) + 32))
-
-static void sh_destroy_l3_subshadow(struct vcpu *v, 
-                                     shadow_l3e_t *sl3e);
-
-/* Increment a subshadow ref
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. */
-static inline void sh_get_ref_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-    /* First ref to the subshadow takes a ref to the full shadow */
-    if ( bk->refcount == 0 ) 
-        sh_get_ref(smfn, 0);
-    if ( unlikely(++(bk->refcount) == 0) )
-    {
-        SHADOW_PRINTK("shadow l3 subshadow ref overflow, smfn=%" SH_PRI_mfn " 
sh=%p\n", 
-                       mfn_x(smfn), sl3e);
-        domain_crash_synchronous();
-    }
-}
-
-/* Decrement a subshadow ref.
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow.  Calling this may cause the 
- * entire shadow to disappear, so the caller must immediately unmap 
- * the pointer after calling. */ 
-static inline void sh_put_ref_l3_subshadow(struct vcpu *v, 
-                                            shadow_l3e_t *sl3e,
-                                            mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk;
-
-    bk = sl3p_to_info(sl3e);
-
-    ASSERT(bk->refcount > 0);
-    if ( --(bk->refcount) == 0 )
-    {
-        /* Need to destroy this subshadow */
-        sh_destroy_l3_subshadow(v, sl3e);
-        /* Last ref to the subshadow had a ref to the full shadow */
-        sh_put_ref(v, smfn, 0);
-    }
-}
-
-/* Pin a subshadow 
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow. */
-static inline void sh_pin_l3_subshadow(shadow_l3e_t *sl3e, mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-#if 0
-    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
-                      __func__, mfn_x(smfn),
-                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
-#endif
-
-    if ( !bk->pinned )
-    {
-        bk->pinned = 1;
-        sh_get_ref_l3_subshadow(sl3e, smfn);
-    }
-}
-
-/* Unpin a sub-shadow. 
- * Called with a pointer to the subshadow, and the mfn of the
- * *first* page of the overall shadow.  Calling this may cause the 
- * entire shadow to disappear, so the caller must immediately unmap 
- * the pointer after calling. */ 
-static inline void sh_unpin_l3_subshadow(struct vcpu *v, 
-                                          shadow_l3e_t *sl3e,
-                                          mfn_t smfn)
-{
-    struct pae_l3_bookkeeping *bk = sl3p_to_info(sl3e);
-
-#if 0
-    debugtrace_printk("%s smfn=%05lx offset=%ld\n",
-                      __func__, mfn_x(smfn),
-                      ((unsigned long)sl3e & ~PAGE_MASK) / 64);
-#endif
-
-    if ( bk->pinned )
-    {
-        bk->pinned = 0;
-        sh_put_ref_l3_subshadow(v, sl3e, smfn);
-    }
-}
-
-#endif /* GUEST_PAGING_LEVELS == 3 */
-
 #if SHADOW_PAGING_LEVELS == 3
 #define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20)
 #endif
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/domain.h      Wed Oct 18 14:36:20 2006 +0100
@@ -134,18 +134,20 @@ struct pae_l3_cache { };
 #endif
 
 struct shadow_vcpu {
+#if CONFIG_PAGING_LEVELS >= 3
+    /* PAE guests: per-vcpu shadow top-level table */
+    l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
+#endif
     /* Pointers to mode-specific entry points. */
     struct shadow_paging_mode *mode;
     /* Last MFN that we emulated a write to. */
     unsigned long last_emulated_mfn;
+    /* MFN of the last shadow that we shot a writeable mapping in */
+    unsigned long last_writeable_pte_smfn;
     /* HVM guest: paging enabled (CR0.PG)?  */
     unsigned int translate_enabled:1;
     /* Emulated fault needs to be propagated to guest? */
     unsigned int propagate_fault:1;
-#if CONFIG_PAGING_LEVELS >= 3
-    /* Shadow update requires this PAE cpu to recopy/install its L3 table. */
-    unsigned int pae_flip_pending:1;
-#endif
 };
 
 struct arch_vcpu
@@ -190,13 +192,12 @@ struct arch_vcpu
     pagetable_t guest_table;            /* (MFN) guest notion of cr3 */
     /* guest_table holds a ref to the page, and also a type-count unless
      * shadow refcounts are in use */
-    pagetable_t shadow_table;           /* (MFN) shadow of guest */
+    pagetable_t shadow_table[4];        /* (MFN) shadow(s) of guest */
     pagetable_t monitor_table;          /* (MFN) hypervisor PT (for HVM) */
     unsigned long cr3;                     /* (MA) value to install in HW CR3 
*/
 
-    void *guest_vtable;                 /* virtual address of pagetable */
-    void *shadow_vtable;                /* virtual address of shadow_table */
-    root_pgentry_t *monitor_vtable;            /* virtual address of 
monitor_table */
+    void *guest_vtable;                 /* virtual addr of pagetable */
+    root_pgentry_t *monitor_vtable;            /* virtual addr of 
monitor_table */
 
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/hvm/vcpu.h    Wed Oct 18 14:36:20 2006 +0100
@@ -41,11 +41,6 @@ struct hvm_vcpu {
 
     int                 xen_port;
 
-#if CONFIG_PAGING_LEVELS >= 3
-    l3_pgentry_t hvm_lowmem_l3tab[4]
-    __attribute__((__aligned__(32)));
-#endif
-
     /* Flags */
     int                 flag_dr_dirty;
 
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/mm.h  Wed Oct 18 14:36:20 2006 +0100
@@ -114,15 +114,14 @@ struct page_info
 #define PGC_SH_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
 #define PGC_SH_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
 #define PGC_SH_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
-#define PGC_SH_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
-#define PGC_SH_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
-#define PGC_SH_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
-#define PGC_SH_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
-#define PGC_SH_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
-#define PGC_SH_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
-#define PGC_SH_max_shadow    (13U<<28)
-#define PGC_SH_p2m_table     (14U<<28) /* in use as the p2m table */
-#define PGC_SH_monitor_table (15U<<28) /* in use as a monitor table */
+#define PGC_SH_l1_64_shadow   (8U<<28) /* shadowing a 64-bit L1 page */
+#define PGC_SH_fl1_64_shadow  (9U<<28) /* L1 shadow for 64-bit 2M superpg */
+#define PGC_SH_l2_64_shadow  (10U<<28) /* shadowing a 64-bit L2 page */
+#define PGC_SH_l3_64_shadow  (11U<<28) /* shadowing a 64-bit L3 page */
+#define PGC_SH_l4_64_shadow  (12U<<28) /* shadowing a 64-bit L4 page */
+#define PGC_SH_max_shadow    (12U<<28)
+#define PGC_SH_p2m_table     (13U<<28) /* in use as the p2m table */
+#define PGC_SH_monitor_table (14U<<28) /* in use as a monitor table */
 #define PGC_SH_unused        (15U<<28)
 
 #define PGC_SH_type_mask     (15U<<28)
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h  Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/perfc_defn.h  Wed Oct 18 14:36:20 2006 +0100
@@ -71,6 +71,7 @@ PERFCOUNTER_CPU(shadow_writeable_h_2,  "
 PERFCOUNTER_CPU(shadow_writeable_h_2,  "shadow writeable: 32pae w2k3")
 PERFCOUNTER_CPU(shadow_writeable_h_3,  "shadow writeable: 64b w2k3")
 PERFCOUNTER_CPU(shadow_writeable_h_4,  "shadow writeable: 32b linux low")
+PERFCOUNTER_CPU(shadow_writeable_h_5,  "shadow writeable: 32b linux high")
 PERFCOUNTER_CPU(shadow_writeable_bf,   "shadow writeable brute-force")
 PERFCOUNTER_CPU(shadow_mappings,       "shadow removes all mappings")
 PERFCOUNTER_CPU(shadow_mappings_bf,    "shadow rm-mappings brute-force")
diff -r bd207697f0c7 -r 5c029fda79dc xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Wed Oct 18 13:43:35 2006 +0100
+++ b/xen/include/asm-x86/shadow.h      Wed Oct 18 14:36:20 2006 +0100
@@ -72,7 +72,6 @@
 #define SHADOW_SET_CHANGED            0x1
 #define SHADOW_SET_FLUSH              0x2
 #define SHADOW_SET_ERROR              0x4
-#define SHADOW_SET_L3PAE_RECOPY       0x8
 
 // How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
 #ifdef __x86_64__
@@ -406,7 +405,6 @@ shadow_update_cr3(struct vcpu *v)
  * for HVM guests, arch.monitor_table and hvm's guest CR3.
  *
  * Update ref counts to shadow tables appropriately.
- * For PAE, relocate L3 entries, if necessary, into low memory.
  */
 static inline void update_cr3(struct vcpu *v)
 {
@@ -549,13 +547,13 @@ shadow_remove_all_shadows_and_parents(st
  * Unshadow it, and recursively unshadow pages that reference it. */
 
 /* Remove all shadows of the guest mfn. */
-extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all);
+extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
 static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
 {
     int was_locked = shadow_lock_is_acquired(v->domain);
     if ( !was_locked )
         shadow_lock(v->domain);
-    sh_remove_shadows(v, gmfn, 1);
+    sh_remove_shadows(v, gmfn, 0, 1);
     if ( !was_locked )
         shadow_unlock(v->domain);
 }
@@ -587,7 +585,6 @@ shadow_guest_physmap_remove_page(struct 
 #define SHF_FL1_PAE (1u << PGC_SH_type_to_index(PGC_SH_fl1_pae_shadow))
 #define SHF_L2_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l2_pae_shadow))
 #define SHF_L2H_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2h_pae_shadow))
-#define SHF_L3_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l3_pae_shadow))
 #define SHF_L1_64   (1u << PGC_SH_type_to_index(PGC_SH_l1_64_shadow))
 #define SHF_FL1_64  (1u << PGC_SH_type_to_index(PGC_SH_fl1_64_shadow))
 #define SHF_L2_64   (1u << PGC_SH_type_to_index(PGC_SH_l2_64_shadow))

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.