[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [XEN] Rename shadow2 to shadow and move the various source



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxxx
# Node ID fab84f9c0ce6d5d1fb89b31ef071148f781ae5a6
# Parent  5b9ff5e8653aa37d812a26526440fc4976e10b7a
[XEN] Rename shadow2 to shadow and move the various source
files into a sensible directory hierarchy.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/x86/shadow2-common.c         | 3407 -------------------------
 xen/arch/x86/shadow2.c                | 4492 ----------------------------------
 xen/include/asm-x86/page-guest32.h    |  105 
 xen/include/asm-x86/shadow2-multi.h   |  116 
 xen/include/asm-x86/shadow2-private.h |  593 ----
 xen/include/asm-x86/shadow2-types.h   |  692 -----
 xen/include/asm-x86/shadow2.h         |  626 ----
 tools/libxc/xc_hvm_build.c            |    2 
 xen/arch/x86/Makefile                 |   21 
 xen/arch/x86/domain.c                 |   46 
 xen/arch/x86/domain_build.c           |    8 
 xen/arch/x86/domctl.c                 |    2 
 xen/arch/x86/hvm/hvm.c                |    6 
 xen/arch/x86/hvm/platform.c           |    4 
 xen/arch/x86/hvm/svm/svm.c            |   28 
 xen/arch/x86/hvm/vmx/vmcs.c           |    4 
 xen/arch/x86/hvm/vmx/vmx.c            |   20 
 xen/arch/x86/mm.c                     |  142 -
 xen/arch/x86/mm/Makefile              |    1 
 xen/arch/x86/mm/shadow/Makefile       |   15 
 xen/arch/x86/mm/shadow/common.c       | 3407 +++++++++++++++++++++++++
 xen/arch/x86/mm/shadow/multi.c        | 4492 ++++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/shadow/multi.h        |  116 
 xen/arch/x86/mm/shadow/page-guest32.h |  105 
 xen/arch/x86/mm/shadow/private.h      |  593 ++++
 xen/arch/x86/mm/shadow/types.h        |  692 +++++
 xen/arch/x86/traps.c                  |    8 
 xen/include/asm-x86/domain.h          |   18 
 xen/include/asm-x86/mm.h              |   82 
 xen/include/asm-x86/perfc_defn.h      |  102 
 xen/include/asm-x86/shadow.h          |  614 ++++
 31 files changed, 10257 insertions(+), 10302 deletions(-)

diff -r 5b9ff5e8653a -r fab84f9c0ce6 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Sun Aug 27 06:56:01 2006 +0100
+++ b/tools/libxc/xc_hvm_build.c        Mon Aug 28 12:09:36 2006 +0100
@@ -441,7 +441,7 @@ static int xc_hvm_build_internal(int xc_
         goto error_out;
     }
 
-    /* HVM domains must be put into shadow2 mode at the start of day */
+    /* HVM domains must be put into shadow mode at the start of day */
     if ( xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_ENABLE,
                            NULL, 0, NULL, 
                            XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT  |
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/Makefile     Mon Aug 28 12:09:36 2006 +0100
@@ -2,6 +2,7 @@ subdir-y += cpu
 subdir-y += cpu
 subdir-y += genapic
 subdir-y += hvm
+subdir-y += mm
 subdir-y += oprofile
 
 subdir-$(x86_32) += x86_32
@@ -41,23 +42,6 @@ obj-y += usercopy.o
 obj-y += usercopy.o
 obj-y += x86_emulate.o
 
-ifneq ($(pae),n)
-obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s3.o shadow2_g3_on_s3.o
-else
-obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s2.o
-endif
-
-obj-$(x86_64) += shadow2-common.o shadow2_g4_on_s4.o shadow2_g3_on_s3.o \
-                 shadow2_g2_on_s3.o
-
-guest_levels  = $(subst g,,$(filter g%,$(subst ., ,$(subst _, ,$(subst 
shadow2_,,$(1))))))
-shadow_levels = $(subst s,,$(filter s%,$(subst ., ,$(subst _, ,$(subst 
shadow2_,,$(1))))))
-shadow2_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1)) \
-                -DSHADOW_PAGING_LEVELS=$(call shadow_levels,$(1))
-
-shadow2_%.o: shadow2.c $(HDRS) Makefile
-       $(CC) $(CFLAGS) $(call shadow2_defns,$(@F)) -c $< -o $@
-
 obj-$(crash_debug) += gdbstub.o
 
 $(TARGET): $(TARGET)-syms boot/mkelf32
@@ -86,9 +70,6 @@ boot/mkelf32: boot/mkelf32.c
 boot/mkelf32: boot/mkelf32.c
        $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
 
-shadow_guest32.o: shadow.c
-shadow_guest32pae.o: shadow.c
-
 .PHONY: clean
 clean::
        rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/domain.c     Mon Aug 28 12:09:36 2006 +0100
@@ -200,12 +200,12 @@ int arch_domain_create(struct domain *d)
 
 #endif /* __x86_64__ */
 
-    shadow2_lock_init(d);
-    for ( i = 0; i <= SHADOW2_MAX_ORDER; i++ )
-        INIT_LIST_HEAD(&d->arch.shadow2.freelists[i]);
-    INIT_LIST_HEAD(&d->arch.shadow2.p2m_freelist);
-    INIT_LIST_HEAD(&d->arch.shadow2.p2m_inuse);
-    INIT_LIST_HEAD(&d->arch.shadow2.toplevel_shadows);
+    shadow_lock_init(d);
+    for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
+        INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
+    INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
+    INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
+    INIT_LIST_HEAD(&d->arch.shadow.toplevel_shadows);
 
     if ( !is_idle_domain(d) )
     {
@@ -236,7 +236,7 @@ int arch_domain_create(struct domain *d)
 
 void arch_domain_destroy(struct domain *d)
 {
-    shadow2_final_teardown(d);
+    shadow_final_teardown(d);
 
     free_xenheap_pages(
         d->arch.mm_perdomain_pt,
@@ -342,10 +342,10 @@ int arch_set_info_guest(
         }
     }    
 
-    /* Shadow2: make sure the domain has enough shadow memory to
+    /* Shadow: make sure the domain has enough shadow memory to
      * boot another vcpu */
-    if ( shadow2_mode_enabled(d) 
-         && d->arch.shadow2.total_pages < shadow2_min_acceptable_pages(d) )
+    if ( shadow_mode_enabled(d) 
+         && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) )
     {
         destroy_gdt(v);
         return -ENOMEM;
@@ -357,8 +357,8 @@ int arch_set_info_guest(
     /* Don't redo final setup */
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
 
-    if ( shadow2_mode_enabled(d) )
-        shadow2_update_paging_modes(v);
+    if ( shadow_mode_enabled(d) )
+        shadow_update_paging_modes(v);
 
     update_cr3(v);
 
@@ -936,11 +936,11 @@ void domain_relinquish_resources(struct 
     for_each_vcpu ( d, v )
     {
         /* Drop ref to guest_table (from new_guest_cr3(), svm/vmx cr3 handling,
-         * or sh2_update_paging_modes()) */
+         * or sh_update_paging_modes()) */
         pfn = pagetable_get_pfn(v->arch.guest_table);
         if ( pfn != 0 )
         {
-            if ( shadow2_mode_refcounts(d) )
+            if ( shadow_mode_refcounts(d) )
                 put_page(mfn_to_page(pfn));
             else
                 put_page_and_type(mfn_to_page(pfn));
@@ -962,7 +962,7 @@ void domain_relinquish_resources(struct 
         hvm_relinquish_guest_resources(d);
 
     /* Tear down shadow mode stuff. */
-    shadow2_teardown(d);
+    shadow_teardown(d);
 
     /*
      * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
@@ -981,18 +981,18 @@ void domain_relinquish_resources(struct 
 
 void arch_dump_domain_info(struct domain *d)
 {
-    if ( shadow2_mode_enabled(d) )
-    {
-        printk("    shadow2 mode: ");
-        if ( d->arch.shadow2.mode & SHM2_enable )
+    if ( shadow_mode_enabled(d) )
+    {
+        printk("    shadow mode: ");
+        if ( d->arch.shadow.mode & SHM2_enable )
             printk("enabled ");
-        if ( shadow2_mode_refcounts(d) )
+        if ( shadow_mode_refcounts(d) )
             printk("refcounts ");
-        if ( shadow2_mode_log_dirty(d) )
+        if ( shadow_mode_log_dirty(d) )
             printk("log_dirty ");
-        if ( shadow2_mode_translate(d) )
+        if ( shadow_mode_translate(d) )
             printk("translate ");
-        if ( shadow2_mode_external(d) )
+        if ( shadow_mode_external(d) )
             printk("external ");
         printk("\n");
     }
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/domain_build.c       Mon Aug 28 12:09:36 2006 +0100
@@ -679,8 +679,8 @@ int construct_dom0(struct domain *d,
         (void)alloc_vcpu(d, i, i);
 
     /* Set up CR3 value for write_ptbase */
-    if ( shadow2_mode_enabled(v->domain) )
-        shadow2_update_paging_modes(v);
+    if ( shadow_mode_enabled(v->domain) )
+        shadow_update_paging_modes(v);
     else
         update_cr3(v);
 
@@ -791,8 +791,8 @@ int construct_dom0(struct domain *d,
     new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start);
 
     if ( opt_dom0_shadow )
-        if ( shadow2_test_enable(d) == 0 ) 
-            shadow2_update_paging_modes(v);
+        if ( shadow_test_enable(d) == 0 ) 
+            shadow_update_paging_modes(v);
 
     if ( supervisor_mode_kernel )
     {
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/domctl.c     Mon Aug 28 12:09:36 2006 +0100
@@ -39,7 +39,7 @@ long arch_do_domctl(
         d = find_domain_by_id(domctl->domain);
         if ( d != NULL )
         {
-            ret = shadow2_domctl(d, &domctl->u.shadow_op, u_domctl);
+            ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl);
             put_domain(d);
             copy_to_guest(u_domctl, domctl, 1);
         } 
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Mon Aug 28 12:09:36 2006 +0100
@@ -384,8 +384,8 @@ int hvm_copy(void *buf, unsigned long va
         if (count > size)
             count = size;
 
-        gfn = shadow2_gva_to_gfn(v, vaddr);
-        mfn = mfn_x(sh2_vcpu_gfn_to_mfn(v, gfn));
+        gfn = shadow_gva_to_gfn(v, vaddr);
+        mfn = mfn_x(sh_vcpu_gfn_to_mfn(v, gfn));
 
         if (mfn == INVALID_MFN)
             return 0;
@@ -539,7 +539,7 @@ void hvm_do_hypercall(struct cpu_user_re
         return;
     }
 
-    if ( current->arch.shadow2.mode->guest_levels == 4 )
+    if ( current->arch.shadow.mode->guest_levels == 4 )
     {
         pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi,
                                                        pregs->rsi,
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/hvm/platform.c       Mon Aug 28 12:09:36 2006 +0100
@@ -721,7 +721,7 @@ void send_pio_req(struct cpu_user_regs *
 
     if (pvalid) {
         if (hvm_paging_enabled(current))
-            p->u.data = shadow2_gva_to_gpa(current, value);
+            p->u.data = shadow_gva_to_gpa(current, value);
         else
             p->u.pdata = (void *) value; /* guest VA == guest PA */
     } else
@@ -771,7 +771,7 @@ void send_mmio_req(
 
     if (pvalid) {
         if (hvm_paging_enabled(v))
-            p->u.data = shadow2_gva_to_gpa(v, value);
+            p->u.data = shadow_gva_to_gpa(v, value);
         else
             p->u.pdata = (void *) value; /* guest VA == guest PA */
     } else
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Aug 28 12:09:36 2006 +0100
@@ -29,7 +29,7 @@
 #include <xen/domain_page.h>
 #include <asm/current.h>
 #include <asm/io.h>
-#include <asm/shadow2.h>
+#include <asm/shadow.h>
 #include <asm/regs.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
@@ -746,10 +746,10 @@ static void svm_final_setup_guest(struct
     if ( v != d->vcpu[0] )
         return;
 
-    if ( !shadow2_mode_external(d) )
+    if ( !shadow_mode_external(d) )
     {
         DPRINTK("Can't init HVM for dom %u vcpu %u: "
-                "not in shadow2 external mode\n", d->domain_id, v->vcpu_id);
+                "not in shadow external mode\n", d->domain_id, v->vcpu_id);
         domain_crash(d);
     }
 
@@ -914,7 +914,7 @@ static int svm_do_page_fault(unsigned lo
                 va, eip, (unsigned long)regs->error_code);
 //#endif
 
-    result = shadow2_fault(va, regs); 
+    result = shadow_fault(va, regs); 
 
     if( result ) {
         /* Let's make sure that the Guest TLB is flushed */
@@ -1562,7 +1562,7 @@ static int svm_set_cr0(unsigned long val
         v->arch.guest_table = pagetable_from_pfn(mfn);
         if ( old_base_mfn )
             put_page(mfn_to_page(old_base_mfn));
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
                     (unsigned long) (mfn << PAGE_SHIFT));
@@ -1588,14 +1588,14 @@ static int svm_set_cr0(unsigned long val
             svm_inject_exception(v, TRAP_gp_fault, 1, 0);
             return 0;
         }
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
         vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
     }
     else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
     {
         /* we should take care of this kind of situation */
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
         vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
     }
@@ -1706,7 +1706,7 @@ static int mov_to_cr(int gpreg, int cr, 
             mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
             if (mfn != pagetable_get_pfn(v->arch.guest_table))
                 __hvm_bug(regs);
-            shadow2_update_cr3(v);
+            shadow_update_cr3(v);
         }
         else 
         {
@@ -1771,7 +1771,7 @@ static int mov_to_cr(int gpreg, int cr, 
                 v->arch.guest_table = pagetable_from_pfn(mfn);
                 if ( old_base_mfn )
                     put_page(mfn_to_page(old_base_mfn));
-                shadow2_update_paging_modes(v);
+                shadow_update_paging_modes(v);
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
                             (unsigned long) (mfn << PAGE_SHIFT));
@@ -1808,7 +1808,7 @@ static int mov_to_cr(int gpreg, int cr, 
         if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
         {
             set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-            shadow2_update_paging_modes(v);
+            shadow_update_paging_modes(v);
         }
         break;
     }
@@ -2149,7 +2149,7 @@ void svm_handle_invlpg(const short invlp
 
     /* Overkill, we may not this */
     set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-    shadow2_invlpg(v, g_vaddr);
+    shadow_invlpg(v, g_vaddr);
 }
 
 
@@ -2520,7 +2520,7 @@ void walk_shadow_and_guest_pt(unsigned l
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     unsigned long gpa;
 
-    gpa = shadow2_gva_to_gpa(current, gva);
+    gpa = shadow_gva_to_gpa(current, gva);
     printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
     if( !svm_paging_enabled(v) || mmio_space(gpa) )
         return;
@@ -2591,7 +2591,7 @@ asmlinkage void svm_vmexit_handler(struc
         if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) 
         {
             if (svm_paging_enabled(v) && 
-                !mmio_space(shadow2_gva_to_gpa(current, vmcb->exitinfo2)))
+                !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2)))
             {
                 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,"
                        "I1=%llx,I2=%llx,INT=%llx, "
@@ -2601,7 +2601,7 @@ asmlinkage void svm_vmexit_handler(struc
                        (unsigned long long) vmcb->exitinfo1,
                        (unsigned long long) vmcb->exitinfo2,
                        (unsigned long long) vmcb->exitintinfo.bytes,
-                       (unsigned long long) shadow2_gva_to_gpa(current, 
vmcb->exitinfo2));
+                       (unsigned long long) shadow_gva_to_gpa(current, 
vmcb->exitinfo2));
             }
             else 
             {
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Mon Aug 28 12:09:36 2006 +0100
@@ -35,7 +35,7 @@
 #include <xen/event.h>
 #include <xen/kernel.h>
 #include <xen/keyhandler.h>
-#include <asm/shadow2.h>
+#include <asm/shadow.h>
 
 static int vmcs_size;
 static int vmcs_order;
@@ -272,7 +272,7 @@ static void vmx_do_launch(struct vcpu *v
     error |= __vmwrite(GUEST_TR_BASE, 0);
     error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
 
-    shadow2_update_paging_modes(v);
+    shadow_update_paging_modes(v);
     printk("%s(): GUEST_CR3<=%08lx, HOST_CR3<=%08lx\n",
            __func__, v->arch.hvm_vcpu.hw_cr3, v->arch.cr3);
     __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Aug 28 12:09:36 2006 +0100
@@ -40,7 +40,7 @@
 #include <asm/hvm/vmx/vmx.h>
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/hvm/vmx/cpu.h>
-#include <asm/shadow2.h>
+#include <asm/shadow.h>
 #include <public/sched.h>
 #include <public/hvm/ioreq.h>
 #include <asm/hvm/vpic.h>
@@ -66,10 +66,10 @@ static int vmx_initialize_guest_resource
     if ( v->vcpu_id != 0 )
         return 1;
 
-    if ( !shadow2_mode_external(d) )
+    if ( !shadow_mode_external(d) )
     {
         DPRINTK("Can't init HVM for dom %u vcpu %u: "
-                "not in shadow2 external mode\n", 
+                "not in shadow external mode\n", 
                 d->domain_id, v->vcpu_id);
         domain_crash(d);
     }
@@ -865,7 +865,7 @@ static int vmx_do_page_fault(unsigned lo
     }
 #endif
 
-    result = shadow2_fault(va, regs);
+    result = shadow_fault(va, regs);
 
     TRACE_VMEXIT (2,result);
 #if 0
@@ -1039,7 +1039,7 @@ static void vmx_vmexit_do_invlpg(unsigne
      * We do the safest things first, then try to update the shadow
      * copying from guest
      */
-    shadow2_invlpg(v, va);
+    shadow_invlpg(v, va);
 }
 
 
@@ -1301,7 +1301,7 @@ vmx_world_restore(struct vcpu *v, struct
 
  skip_cr3:
 
-    shadow2_update_paging_modes(v);
+    shadow_update_paging_modes(v);
     if (!vmx_paging_enabled(v))
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
     else
@@ -1504,7 +1504,7 @@ static int vmx_set_cr0(unsigned long val
         v->arch.guest_table = pagetable_from_pfn(mfn);
         if (old_base_mfn)
             put_page(mfn_to_page(old_base_mfn));
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
                     (unsigned long) (mfn << PAGE_SHIFT));
@@ -1577,7 +1577,7 @@ static int vmx_set_cr0(unsigned long val
     else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
     {
         __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
     }
 
     return 1;
@@ -1662,7 +1662,7 @@ static int mov_to_cr(int gp, int cr, str
             mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
             if (mfn != pagetable_get_pfn(v->arch.guest_table))
                 __hvm_bug(regs);
-            shadow2_update_cr3(v);
+            shadow_update_cr3(v);
         } else {
             /*
              * If different, make a shadow. Check if the PDBR is valid
@@ -1755,7 +1755,7 @@ static int mov_to_cr(int gp, int cr, str
          * all TLB entries except global entries.
          */
         if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
-            shadow2_update_paging_modes(v);
+            shadow_update_paging_modes(v);
         break;
     }
     default:
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/mm.c Mon Aug 28 12:09:36 2006 +0100
@@ -454,12 +454,12 @@ int map_ldt_shadow_page(unsigned int off
 
     res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
 
-    if ( !res && unlikely(shadow2_mode_refcounts(d)) )
-    {
-        shadow2_lock(d);
-        shadow2_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0);
+    if ( !res && unlikely(shadow_mode_refcounts(d)) )
+    {
+        shadow_lock(d);
+        shadow_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0);
         res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
-        shadow2_unlock(d);
+        shadow_unlock(d);
     }
 
     if ( unlikely(!res) )
@@ -527,7 +527,7 @@ get_linear_pagetable(
     struct page_info *page;
     unsigned long pfn;
 
-    ASSERT( !shadow2_mode_refcounts(d) );
+    ASSERT( !shadow_mode_refcounts(d) );
 
     if ( (root_get_flags(re) & _PAGE_RW) )
     {
@@ -602,12 +602,12 @@ get_page_from_l1e(
         d = dom_io;
     }
 
-    /* Foreign mappings into guests in shadow2 external mode don't
+    /* Foreign mappings into guests in shadow external mode don't
      * contribute to writeable mapping refcounts.  (This allows the
      * qemu-dm helper process in dom0 to map the domain's memory without
      * messing up the count of "real" writable mappings.) */
     okay = (((l1e_get_flags(l1e) & _PAGE_RW) && 
-             !(unlikely(shadow2_mode_external(d) && (d != current->domain))))
+             !(unlikely(shadow_mode_external(d) && (d != current->domain))))
             ? get_page_and_type(page, d, PGT_writable_page)
             : get_page(page, d));
     if ( !okay )
@@ -771,9 +771,9 @@ void put_page_from_l1e(l1_pgentry_t l1e,
     }
 
     /* Remember we didn't take a type-count of foreign writable mappings
-     * to shadow2 external domains */
+     * to shadow external domains */
     if ( (l1e_get_flags(l1e) & _PAGE_RW) && 
-         !(unlikely((e != d) && shadow2_mode_external(e))) )
+         !(unlikely((e != d) && shadow_mode_external(e))) )
     {
         put_page_and_type(page);
     }
@@ -830,7 +830,7 @@ static int alloc_l1_table(struct page_in
     l1_pgentry_t  *pl1e;
     int            i;
 
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
 
     pl1e = map_domain_page(pfn);
 
@@ -883,7 +883,7 @@ static int create_pae_xen_mappings(l3_pg
      *     a. alloc_l3_table() calls this function and this check will fail
      *     b. mod_l3_entry() disallows updates to slot 3 in an existing table
      *
-     * XXX -- this needs revisiting for shadow2_mode_refcount()==true...
+     * XXX -- this needs revisiting for shadow_mode_refcount()==true...
      */
     page = l3e_get_page(l3e3);
     BUG_ON(page->u.inuse.type_info & PGT_pinned);
@@ -1007,7 +1007,7 @@ static int alloc_l2_table(struct page_in
     l2_pgentry_t  *pl2e;
     int            i;
 
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
     
     pl2e = map_domain_page(pfn);
 
@@ -1059,7 +1059,7 @@ static int alloc_l3_table(struct page_in
     l3_pgentry_t  *pl3e;
     int            i;
 
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
 
 #ifdef CONFIG_X86_PAE
     /*
@@ -1120,7 +1120,7 @@ static int alloc_l4_table(struct page_in
     unsigned long vaddr;
     int            i;
 
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
 
     for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
     {
@@ -1234,8 +1234,8 @@ static inline int update_l1e(l1_pgentry_
                              struct vcpu *v)
 {
     int rv = 1;
-    if ( unlikely(shadow2_mode_enabled(v->domain)) )
-        shadow2_lock(v->domain);
+    if ( unlikely(shadow_mode_enabled(v->domain)) )
+        shadow_lock(v->domain);
 #ifndef PTE_UPDATE_WITH_CMPXCHG
     rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e)));
 #else
@@ -1266,10 +1266,10 @@ static inline int update_l1e(l1_pgentry_
         }
     }
 #endif
-    if ( unlikely(shadow2_mode_enabled(v->domain)) )
-    {
-        shadow2_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
-        shadow2_unlock(v->domain);    
+    if ( unlikely(shadow_mode_enabled(v->domain)) )
+    {
+        shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
+        shadow_unlock(v->domain);    
     }
     return rv;
 }
@@ -1339,13 +1339,13 @@ static int mod_l1_entry(l1_pgentry_t *pl
 #endif
 #define UPDATE_ENTRY(_t,_p,_o,_n,_m)  ({                            \
     int rv;                                                         \
-    if ( unlikely(shadow2_mode_enabled(current->domain)) )          \
-        shadow2_lock(current->domain);                              \
+    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
+        shadow_lock(current->domain);                              \
     rv = _UPDATE_ENTRY(_t, _p, _o, _n);                             \
-    if ( unlikely(shadow2_mode_enabled(current->domain)) )          \
+    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
     {                                                               \
-        shadow2_validate_guest_entry(current, _mfn(_m), (_p));      \
-        shadow2_unlock(current->domain);                            \
+        shadow_validate_guest_entry(current, _mfn(_m), (_p));      \
+        shadow_unlock(current->domain);                            \
     }                                                               \
     rv;                                                             \
 })
@@ -1581,21 +1581,21 @@ void free_page_type(struct page_info *pa
          */
         this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
 
-        if ( unlikely(shadow2_mode_enabled(owner)
-                 && !shadow2_lock_is_acquired(owner)) )
+        if ( unlikely(shadow_mode_enabled(owner)
+                 && !shadow_lock_is_acquired(owner)) )
         {
             /* Raw page tables are rewritten during save/restore. */
-            if ( !shadow2_mode_translate(owner) )
+            if ( !shadow_mode_translate(owner) )
                 mark_dirty(owner, page_to_mfn(page));
 
-            if ( shadow2_mode_refcounts(owner) )
+            if ( shadow_mode_refcounts(owner) )
                 return;
 
             gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
             ASSERT(VALID_M2P(gmfn));
-            shadow2_lock(owner);
-            shadow2_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
-            shadow2_unlock(owner);
+            shadow_lock(owner);
+            shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
+            shadow_unlock(owner);
         }
     }
 
@@ -1760,7 +1760,7 @@ int get_page_type(struct page_info *page
 #endif
                     /* Fixme: add code to propagate va_unknown to subtables. */
                     if ( ((type & PGT_type_mask) >= PGT_l2_page_table) &&
-                         !shadow2_mode_refcounts(page_get_owner(page)) )
+                         !shadow_mode_refcounts(page_get_owner(page)) )
                         return 0;
                     /* This table is possibly mapped at multiple locations. */
                     nx &= ~PGT_va_mask;
@@ -1810,7 +1810,7 @@ int new_guest_cr3(unsigned long mfn)
     if ( hvm_guest(v) && !hvm_paging_enabled(v) )
         domain_crash_synchronous();
 
-    if ( shadow2_mode_refcounts(d) )
+    if ( shadow_mode_refcounts(d) )
     {
         okay = get_page_from_pagenr(mfn, d);
         if ( unlikely(!okay) )
@@ -1858,7 +1858,7 @@ int new_guest_cr3(unsigned long mfn)
 
     if ( likely(old_base_mfn != 0) )
     {
-        if ( shadow2_mode_refcounts(d) )
+        if ( shadow_mode_refcounts(d) )
             put_page(mfn_to_page(old_base_mfn));
         else
             put_page_and_type(mfn_to_page(old_base_mfn));
@@ -2043,7 +2043,7 @@ int do_mmuext_op(
             type = PGT_root_page_table;
 
         pin_page:
-            if ( shadow2_mode_refcounts(FOREIGNDOM) )
+            if ( shadow_mode_refcounts(FOREIGNDOM) )
                 break;
 
             okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
@@ -2065,7 +2065,7 @@ int do_mmuext_op(
             break;
 
         case MMUEXT_UNPIN_TABLE:
-            if ( shadow2_mode_refcounts(d) )
+            if ( shadow_mode_refcounts(d) )
                 break;
 
             if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
@@ -2078,11 +2078,11 @@ int do_mmuext_op(
             {
                 put_page_and_type(page);
                 put_page(page);
-                if ( shadow2_mode_enabled(d) )
+                if ( shadow_mode_enabled(d) )
                 {
-                    shadow2_lock(d);
-                    shadow2_remove_all_shadows(v, _mfn(mfn));
-                    shadow2_unlock(d);
+                    shadow_lock(d);
+                    shadow_remove_all_shadows(v, _mfn(mfn));
+                    shadow_unlock(d);
                 }
             }
             else
@@ -2125,8 +2125,8 @@ int do_mmuext_op(
             break;
     
         case MMUEXT_INVLPG_LOCAL:
-            if ( !shadow2_mode_enabled(d) 
-                 || shadow2_invlpg(v, op.arg1.linear_addr) != 0 )
+            if ( !shadow_mode_enabled(d) 
+                 || shadow_invlpg(v, op.arg1.linear_addr) != 0 )
                 local_flush_tlb_one(op.arg1.linear_addr);
             break;
 
@@ -2173,7 +2173,7 @@ int do_mmuext_op(
             unsigned long ptr  = op.arg1.linear_addr;
             unsigned long ents = op.arg2.nr_ents;
 
-            if ( shadow2_mode_external(d) )
+            if ( shadow_mode_external(d) )
             {
                 MEM_LOG("ignoring SET_LDT hypercall from external "
                         "domain %u", d->domain_id);
@@ -2319,7 +2319,7 @@ int do_mmu_update(
             case PGT_l3_page_table:
             case PGT_l4_page_table:
             {
-                if ( shadow2_mode_refcounts(d) )
+                if ( shadow_mode_refcounts(d) )
                 {
                     DPRINTK("mmu update on shadow-refcounted domain!");
                     break;
@@ -2372,16 +2372,16 @@ int do_mmu_update(
                 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
                     break;
 
-                if ( unlikely(shadow2_mode_enabled(d)) )
-                    shadow2_lock(d);
+                if ( unlikely(shadow_mode_enabled(d)) )
+                    shadow_lock(d);
 
                 *(intpte_t *)va = req.val;
                 okay = 1;
 
-                if ( unlikely(shadow2_mode_enabled(d)) )
+                if ( unlikely(shadow_mode_enabled(d)) )
                 {
-                    shadow2_validate_guest_entry(v, _mfn(mfn), va);
-                    shadow2_unlock(d);
+                    shadow_validate_guest_entry(v, _mfn(mfn), va);
+                    shadow_unlock(d);
                 }
 
                 put_page_type(page);
@@ -2405,8 +2405,8 @@ int do_mmu_update(
                 break;
             }
 
-            if ( shadow2_mode_translate(FOREIGNDOM) )
-                shadow2_guest_physmap_add_page(FOREIGNDOM, gpfn, mfn);
+            if ( shadow_mode_translate(FOREIGNDOM) )
+                shadow_guest_physmap_add_page(FOREIGNDOM, gpfn, mfn);
             else 
                 set_gpfn_from_mfn(mfn, gpfn);
             okay = 1;
@@ -2492,7 +2492,7 @@ static int create_grant_pte_mapping(
         goto failed;
     } 
 
-    if ( !shadow2_mode_refcounts(d) )
+    if ( !shadow_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
     put_page_type(page);
@@ -2590,7 +2590,7 @@ static int create_grant_va_mapping(
                     l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) )
         return GNTST_general_error;
 
-    if ( !shadow2_mode_refcounts(d) )
+    if ( !shadow_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
     return GNTST_okay;
@@ -2714,10 +2714,10 @@ int do_update_va_mapping(unsigned long v
 
     perfc_incrc(calls_to_update_va);
 
-    if ( unlikely(!__addr_ok(va) && !shadow2_mode_external(d)) )
+    if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
         return -EINVAL;
 
-    if ( unlikely(shadow2_mode_refcounts(d)) )
+    if ( unlikely(shadow_mode_refcounts(d)) )
     {
         DPRINTK("Grant op on a shadow-refcounted domain\n");
         return -EINVAL; 
@@ -2725,11 +2725,11 @@ int do_update_va_mapping(unsigned long v
 
     LOCK_BIGLOCK(d);
 
-    if ( likely(rc == 0) && unlikely(shadow2_mode_enabled(d)) )
+    if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
     {
         if ( unlikely(this_cpu(percpu_mm_info).foreign &&
-                      (shadow2_mode_translate(d) ||
-                       shadow2_mode_translate(
+                      (shadow_mode_translate(d) ||
+                       shadow_mode_translate(
                            this_cpu(percpu_mm_info).foreign))) )
         {
             /*
@@ -2770,8 +2770,8 @@ int do_update_va_mapping(unsigned long v
         switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
         {
         case UVMF_LOCAL:
-            if ( !shadow2_mode_enabled(d) 
-                 || (shadow2_invlpg(current, va) != 0) ) 
+            if ( !shadow_mode_enabled(d) 
+                 || (shadow_invlpg(current, va) != 0) ) 
                 local_flush_tlb_one(va);
             break;
         case UVMF_ALL:
@@ -3006,7 +3006,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
             break;
         }
 
-        if ( !shadow2_mode_translate(d) || (mfn == 0) )
+        if ( !shadow_mode_translate(d) || (mfn == 0) )
         {
             put_domain(d);
             return -EINVAL;
@@ -3196,21 +3196,21 @@ static int ptwr_emulated_update(
     pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
     if ( do_cmpxchg )
     {
-        if ( shadow2_mode_enabled(d) )
-            shadow2_lock(d);
+        if ( shadow_mode_enabled(d) )
+            shadow_lock(d);
         ol1e = l1e_from_intpte(old);
         if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
         {
-            if ( shadow2_mode_enabled(d) )
-                shadow2_unlock(d);
+            if ( shadow_mode_enabled(d) )
+                shadow_unlock(d);
             unmap_domain_page(pl1e);
             put_page_from_l1e(nl1e, d);
             return X86EMUL_CMPXCHG_FAILED;
         }
-        if ( unlikely(shadow2_mode_enabled(v->domain)) )
-        {
-            shadow2_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
-            shadow2_unlock(v->domain);    
+        if ( unlikely(shadow_mode_enabled(v->domain)) )
+        {
+            shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
+            shadow_unlock(v->domain);    
         }
     }
     else
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/arch/x86/traps.c      Mon Aug 28 12:09:36 2006 +0100
@@ -870,8 +870,8 @@ static int fixup_page_fault(unsigned lon
 
     if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
     {
-        if ( shadow2_mode_external(d) && guest_mode(regs) )
-            return shadow2_fault(addr, regs);
+        if ( shadow_mode_external(d) && guest_mode(regs) )
+            return shadow_fault(addr, regs);
         if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
             return handle_gdt_ldt_mapping_fault(
                 addr - GDT_LDT_VIRT_START, regs);
@@ -890,8 +890,8 @@ static int fixup_page_fault(unsigned lon
          ptwr_do_page_fault(d, addr, regs) )
         return EXCRET_fault_fixed;
 
-    if ( shadow2_mode_enabled(d) )
-        return shadow2_fault(addr, regs);
+    if ( shadow_mode_enabled(d) )
+        return shadow_fault(addr, regs);
 
     return 0;
 }
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/include/asm-x86/domain.h      Mon Aug 28 12:09:36 2006 +0100
@@ -59,10 +59,10 @@ extern void hypercall_page_initialise(st
 
 struct shadow_domain {
     u32               mode;  /* flags to control shadow operation */
-    spinlock_t        lock;  /* shadow2 domain lock */
+    spinlock_t        lock;  /* shadow domain lock */
     int               locker; /* processor which holds the lock */
     const char       *locker_function; /* Func that took it */
-    struct list_head  freelists[SHADOW2_MAX_ORDER + 1]; 
+    struct list_head  freelists[SHADOW_MAX_ORDER + 1]; 
     struct list_head  p2m_freelist;
     struct list_head  p2m_inuse;
     struct list_head  toplevel_shadows;
@@ -70,10 +70,10 @@ struct shadow_domain {
     unsigned int      free_pages;   /* number of pages on freelists */
     unsigned int      p2m_pages;    /* number of pages in p2m map */
 
-    /* Shadow2 hashtable */
-    struct shadow2_hash_entry *hash_table;
-    struct shadow2_hash_entry *hash_freelist;
-    struct shadow2_hash_entry *hash_allocations;
+    /* Shadow hashtable */
+    struct shadow_hash_entry *hash_table;
+    struct shadow_hash_entry *hash_freelist;
+    struct shadow_hash_entry *hash_allocations;
     int hash_walking;  /* Some function is walking the hash table */
 
     /* Shadow log-dirty bitmap */
@@ -107,7 +107,7 @@ struct arch_domain
     /* Shadow-translated guest: Pseudophys base address of reserved area. */
     unsigned long first_reserved_pfn;
 
-    struct shadow_domain shadow2;
+    struct shadow_domain shadow;
 
     /* Shadow translated domain: P2M mapping */
     pagetable_t phys_table;
@@ -135,7 +135,7 @@ struct pae_l3_cache { };
 
 struct shadow_vcpu {
     /* Pointers to mode-specific entry points. */
-    struct shadow2_paging_mode *mode;
+    struct shadow_paging_mode *mode;
     /* Last MFN that we emulated a write to. */
     unsigned long last_emulated_mfn;
     /* HVM guest: paging enabled (CR0.PG)?  */
@@ -201,7 +201,7 @@ struct arch_vcpu
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
 
-    struct shadow_vcpu shadow2;
+    struct shadow_vcpu shadow;
 } __cacheline_aligned;
 
 /* shorthands to improve code legibility */
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/include/asm-x86/mm.h  Mon Aug 28 12:09:36 2006 +0100
@@ -22,7 +22,7 @@ struct page_info
     /* Each frame can be threaded onto a doubly-linked list. */
     union {
         struct list_head list;
-        /* Shadow2 uses this field as an up-pointer in lower-level shadows */
+        /* Shadow uses this field as an up-pointer in lower-level shadows */
         paddr_t up;
     };
 
@@ -59,7 +59,7 @@ struct page_info
         /* Only used on guest pages with a shadow.
          * Guest pages with a shadow must have a non-zero type count, so this
          * does not conflict with the tlbflush timestamp. */
-        u32 shadow2_flags;
+        u32 shadow_flags;
 
         // XXX -- we expect to add another field here, to be used for min/max
         // purposes, which is only used for shadow pages.
@@ -76,7 +76,7 @@ struct page_info
 #define PGT_ldt_page        (6U<<29) /* using this page in an LDT? */
 #define PGT_writable_page   (7U<<29) /* has writable mappings of this page? */
 
-#ifndef SHADOW2
+#ifndef SHADOW
 #define PGT_l1_shadow       PGT_l1_page_table
 #define PGT_l2_shadow       PGT_l2_page_table
 #define PGT_l3_shadow       PGT_l3_page_table
@@ -117,7 +117,7 @@ struct page_info
  /* 16-bit count of uses of this frame as its current type. */
 #define PGT_count_mask      ((1U<<16)-1)
 
-#ifndef SHADOW2
+#ifndef SHADOW
 #ifdef __x86_64__
 #define PGT_high_mfn_shift  52
 #define PGT_high_mfn_mask   (0xfffUL << PGT_high_mfn_shift)
@@ -132,7 +132,7 @@ struct page_info
 #define PGT_score_shift     23
 #define PGT_score_mask      (((1U<<4)-1)<<PGT_score_shift)
 #endif
-#endif /* SHADOW2 */
+#endif /* SHADOW */
 
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
@@ -146,38 +146,38 @@ struct page_info
  /* 29-bit count of references to this frame. */
 #define PGC_count_mask      ((1U<<29)-1)
 
-/* shadow2 uses the count_info on shadow pages somewhat differently */
-/* NB: please coordinate any changes here with the SH2F's in shadow2.h */
-#define PGC_SH2_none           (0U<<28) /* on the shadow2 free list */
-#define PGC_SH2_min_shadow     (1U<<28)
-#define PGC_SH2_l1_32_shadow   (1U<<28) /* shadowing a 32-bit L1 guest page */
-#define PGC_SH2_fl1_32_shadow  (2U<<28) /* L1 shadow for a 32b 4M superpage */
-#define PGC_SH2_l2_32_shadow   (3U<<28) /* shadowing a 32-bit L2 guest page */
-#define PGC_SH2_l1_pae_shadow  (4U<<28) /* shadowing a pae L1 page */
-#define PGC_SH2_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
-#define PGC_SH2_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
-#define PGC_SH2_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
-#define PGC_SH2_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
-#define PGC_SH2_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
-#define PGC_SH2_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
-#define PGC_SH2_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
-#define PGC_SH2_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
-#define PGC_SH2_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
-#define PGC_SH2_max_shadow    (13U<<28)
-#define PGC_SH2_p2m_table     (14U<<28) /* in use as the p2m table */
-#define PGC_SH2_monitor_table (15U<<28) /* in use as a monitor table */
-#define PGC_SH2_unused        (15U<<28)
-
-#define PGC_SH2_type_mask     (15U<<28)
-#define PGC_SH2_type_shift          28
-
-#define PGC_SH2_pinned         (1U<<27)
-
-#define _PGC_SH2_log_dirty          26
-#define PGC_SH2_log_dirty      (1U<<26)
+/* shadow uses the count_info on shadow pages somewhat differently */
+/* NB: please coordinate any changes here with the SHF's in shadow.h */
+#define PGC_SH_none           (0U<<28) /* on the shadow free list */
+#define PGC_SH_min_shadow     (1U<<28)
+#define PGC_SH_l1_32_shadow   (1U<<28) /* shadowing a 32-bit L1 guest page */
+#define PGC_SH_fl1_32_shadow  (2U<<28) /* L1 shadow for a 32b 4M superpage */
+#define PGC_SH_l2_32_shadow   (3U<<28) /* shadowing a 32-bit L2 guest page */
+#define PGC_SH_l1_pae_shadow  (4U<<28) /* shadowing a pae L1 page */
+#define PGC_SH_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
+#define PGC_SH_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
+#define PGC_SH_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
+#define PGC_SH_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
+#define PGC_SH_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
+#define PGC_SH_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
+#define PGC_SH_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
+#define PGC_SH_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
+#define PGC_SH_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
+#define PGC_SH_max_shadow    (13U<<28)
+#define PGC_SH_p2m_table     (14U<<28) /* in use as the p2m table */
+#define PGC_SH_monitor_table (15U<<28) /* in use as a monitor table */
+#define PGC_SH_unused        (15U<<28)
+
+#define PGC_SH_type_mask     (15U<<28)
+#define PGC_SH_type_shift          28
+
+#define PGC_SH_pinned         (1U<<27)
+
+#define _PGC_SH_log_dirty          26
+#define PGC_SH_log_dirty      (1U<<26)
 
 /* 26 bit ref count for shadow pages */
-#define PGC_SH2_count_mask    ((1U<<26) - 1)
+#define PGC_SH_count_mask    ((1U<<26) - 1)
 
 /* We trust the slab allocator in slab.c, and our use of it. */
 #define PageSlab(page)     (1)
@@ -201,9 +201,9 @@ static inline u32 pickle_domptr(struct d
 
 /* The order of the largest allocation unit we use for shadow pages */
 #if CONFIG_PAGING_LEVELS == 2
-#define SHADOW2_MAX_ORDER 0 /* Only ever need 4k allocations */
+#define SHADOW_MAX_ORDER 0 /* Only ever need 4k allocations */
 #else  
-#define SHADOW2_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
+#define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
 #endif
 
 #define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
@@ -227,7 +227,7 @@ extern int shadow_remove_all_write_acces
 extern int shadow_remove_all_write_access(
     struct domain *d, unsigned long gmfn, unsigned long mfn);
 extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
-extern int _shadow2_mode_refcounts(struct domain *d);
+extern int _shadow_mode_refcounts(struct domain *d);
 
 static inline void put_page(struct page_info *page)
 {
@@ -259,7 +259,7 @@ static inline int get_page(struct page_i
              unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
              unlikely(d != _domain) )                /* Wrong owner? */
         {
-            if ( !_shadow2_mode_refcounts(domain) )
+            if ( !_shadow_mode_refcounts(domain) )
                 DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" 
                         PRtype_info "\n",
                         page_to_mfn(page), domain, unpickle_domptr(d),
@@ -345,11 +345,11 @@ int check_descriptor(struct desc_struct 
 
 
 #define mfn_to_gmfn(_d, mfn)                            \
-    ( (shadow2_mode_translate(_d))                      \
+    ( (shadow_mode_translate(_d))                      \
       ? get_gpfn_from_mfn(mfn)                          \
       : (mfn) )
 
-#define gmfn_to_mfn(_d, gpfn)  mfn_x(sh2_gfn_to_mfn(_d, gpfn))
+#define gmfn_to_mfn(_d, gpfn)  mfn_x(sh_gfn_to_mfn(_d, gpfn))
 
 
 /*
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h  Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/include/asm-x86/perfc_defn.h  Mon Aug 28 12:09:36 2006 +0100
@@ -30,59 +30,59 @@ PERFCOUNTER_CPU(exception_fixed,        
 PERFCOUNTER_CPU(exception_fixed,        "pre-exception fixed")
 
 
-/* Shadow2 counters */
-PERFCOUNTER_CPU(shadow2_alloc,          "calls to shadow2_alloc")
-PERFCOUNTER_CPU(shadow2_alloc_tlbflush, "shadow2_alloc flushed TLBs")
+/* Shadow counters */
+PERFCOUNTER_CPU(shadow_alloc,          "calls to shadow_alloc")
+PERFCOUNTER_CPU(shadow_alloc_tlbflush, "shadow_alloc flushed TLBs")
 
 /* STATUS counters do not reset when 'P' is hit */
-PERFSTATUS(shadow2_alloc_count,         "number of shadow pages in use")
-PERFCOUNTER_CPU(shadow2_free,           "calls to shadow2_free")
-PERFCOUNTER_CPU(shadow2_prealloc_1,     "shadow2 recycles old shadows")
-PERFCOUNTER_CPU(shadow2_prealloc_2,     "shadow2 recycles in-use shadows")
-PERFCOUNTER_CPU(shadow2_linear_map_failed, "shadow2 hit read-only linear map")
-PERFCOUNTER_CPU(shadow2_a_update,       "shadow2 A bit update")
-PERFCOUNTER_CPU(shadow2_ad_update,      "shadow2 A&D bit update")
-PERFCOUNTER_CPU(shadow2_fault,          "calls to shadow2_fault")
-PERFCOUNTER_CPU(shadow2_fault_bail_bad_gfn, "shadow2_fault guest bad gfn")
-PERFCOUNTER_CPU(shadow2_fault_bail_not_present, 
-                                        "shadow2_fault guest not-present")
-PERFCOUNTER_CPU(shadow2_fault_bail_nx,  "shadow2_fault guest NX fault")
-PERFCOUNTER_CPU(shadow2_fault_bail_ro_mapping, "shadow2_fault guest R/W fault")
-PERFCOUNTER_CPU(shadow2_fault_bail_user_supervisor, 
-                                        "shadow2_fault guest U/S fault")
-PERFCOUNTER_CPU(shadow2_fault_emulate_read, "shadow2_fault emulates a read")
-PERFCOUNTER_CPU(shadow2_fault_emulate_write, "shadow2_fault emulates a write")
-PERFCOUNTER_CPU(shadow2_fault_emulate_failed, "shadow2_fault emulator fails")
-PERFCOUNTER_CPU(shadow2_fault_mmio,     "shadow2_fault handled as mmio")
-PERFCOUNTER_CPU(shadow2_fault_fixed,    "shadow2_fault fixed fault")
-PERFCOUNTER_CPU(shadow2_ptwr_emulate,   "shadow2 causes ptwr to emulate")
-PERFCOUNTER_CPU(shadow2_validate_gl1e_calls, "calls to shadow2_validate_gl1e")
-PERFCOUNTER_CPU(shadow2_validate_gl2e_calls, "calls to shadow2_validate_gl2e")
-PERFCOUNTER_CPU(shadow2_validate_gl3e_calls, "calls to shadow2_validate_gl3e")
-PERFCOUNTER_CPU(shadow2_validate_gl4e_calls, "calls to shadow2_validate_gl4e")
-PERFCOUNTER_CPU(shadow2_hash_lookups,   "calls to shadow2_hash_lookup")
-PERFCOUNTER_CPU(shadow2_hash_lookup_head, "shadow2 hash hit in bucket head")
-PERFCOUNTER_CPU(shadow2_hash_lookup_miss, "shadow2 hash misses")
-PERFCOUNTER_CPU(shadow2_get_shadow_status, "calls to get_shadow_status")
-PERFCOUNTER_CPU(shadow2_hash_inserts,   "calls to shadow2_hash_insert")
-PERFCOUNTER_CPU(shadow2_hash_deletes,   "calls to shadow2_hash_delete")
-PERFCOUNTER_CPU(shadow2_writeable,      "shadow2 removes write access")
-PERFCOUNTER_CPU(shadow2_writeable_h_1,  "shadow2 writeable: 32b w2k3")
-PERFCOUNTER_CPU(shadow2_writeable_h_2,  "shadow2 writeable: 32pae w2k3")
-PERFCOUNTER_CPU(shadow2_writeable_h_3,  "shadow2 writeable: 64b w2k3")
-PERFCOUNTER_CPU(shadow2_writeable_h_4,  "shadow2 writeable: 32b linux low")
-PERFCOUNTER_CPU(shadow2_writeable_bf,   "shadow2 writeable brute-force")
-PERFCOUNTER_CPU(shadow2_mappings,       "shadow2 removes all mappings")
-PERFCOUNTER_CPU(shadow2_mappings_bf,    "shadow2 rm-mappings brute-force")
-PERFCOUNTER_CPU(shadow2_early_unshadow, "shadow2 unshadows for fork/exit")
-PERFCOUNTER_CPU(shadow2_early_unshadow_top, "shadow2 unhooks for fork/exit")
-PERFCOUNTER_CPU(shadow2_unshadow,       "shadow2 unshadows a page")
-PERFCOUNTER_CPU(shadow2_up_pointer,     "shadow2 unshadow by up-pointer")
-PERFCOUNTER_CPU(shadow2_unshadow_bf,    "shadow2 unshadow brute-force")
-PERFCOUNTER_CPU(shadow2_get_page_fail,  "shadow2_get_page_from_l1e failed")
-PERFCOUNTER_CPU(shadow2_guest_walk,     "shadow2 walks guest tables")
-PERFCOUNTER_CPU(shadow2_walk_cache_hit, "shadow2 walk-cache hits")
-PERFCOUNTER_CPU(shadow2_walk_cache_miss, "shadow2 walk-cache misses")
+PERFSTATUS(shadow_alloc_count,         "number of shadow pages in use")
+PERFCOUNTER_CPU(shadow_free,           "calls to shadow_free")
+PERFCOUNTER_CPU(shadow_prealloc_1,     "shadow recycles old shadows")
+PERFCOUNTER_CPU(shadow_prealloc_2,     "shadow recycles in-use shadows")
+PERFCOUNTER_CPU(shadow_linear_map_failed, "shadow hit read-only linear map")
+PERFCOUNTER_CPU(shadow_a_update,       "shadow A bit update")
+PERFCOUNTER_CPU(shadow_ad_update,      "shadow A&D bit update")
+PERFCOUNTER_CPU(shadow_fault,          "calls to shadow_fault")
+PERFCOUNTER_CPU(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn")
+PERFCOUNTER_CPU(shadow_fault_bail_not_present, 
+                                        "shadow_fault guest not-present")
+PERFCOUNTER_CPU(shadow_fault_bail_nx,  "shadow_fault guest NX fault")
+PERFCOUNTER_CPU(shadow_fault_bail_ro_mapping, "shadow_fault guest R/W fault")
+PERFCOUNTER_CPU(shadow_fault_bail_user_supervisor, 
+                                        "shadow_fault guest U/S fault")
+PERFCOUNTER_CPU(shadow_fault_emulate_read, "shadow_fault emulates a read")
+PERFCOUNTER_CPU(shadow_fault_emulate_write, "shadow_fault emulates a write")
+PERFCOUNTER_CPU(shadow_fault_emulate_failed, "shadow_fault emulator fails")
+PERFCOUNTER_CPU(shadow_fault_mmio,     "shadow_fault handled as mmio")
+PERFCOUNTER_CPU(shadow_fault_fixed,    "shadow_fault fixed fault")
+PERFCOUNTER_CPU(shadow_ptwr_emulate,   "shadow causes ptwr to emulate")
+PERFCOUNTER_CPU(shadow_validate_gl1e_calls, "calls to shadow_validate_gl1e")
+PERFCOUNTER_CPU(shadow_validate_gl2e_calls, "calls to shadow_validate_gl2e")
+PERFCOUNTER_CPU(shadow_validate_gl3e_calls, "calls to shadow_validate_gl3e")
+PERFCOUNTER_CPU(shadow_validate_gl4e_calls, "calls to shadow_validate_gl4e")
+PERFCOUNTER_CPU(shadow_hash_lookups,   "calls to shadow_hash_lookup")
+PERFCOUNTER_CPU(shadow_hash_lookup_head, "shadow hash hit in bucket head")
+PERFCOUNTER_CPU(shadow_hash_lookup_miss, "shadow hash misses")
+PERFCOUNTER_CPU(shadow_get_shadow_status, "calls to get_shadow_status")
+PERFCOUNTER_CPU(shadow_hash_inserts,   "calls to shadow_hash_insert")
+PERFCOUNTER_CPU(shadow_hash_deletes,   "calls to shadow_hash_delete")
+PERFCOUNTER_CPU(shadow_writeable,      "shadow removes write access")
+PERFCOUNTER_CPU(shadow_writeable_h_1,  "shadow writeable: 32b w2k3")
+PERFCOUNTER_CPU(shadow_writeable_h_2,  "shadow writeable: 32pae w2k3")
+PERFCOUNTER_CPU(shadow_writeable_h_3,  "shadow writeable: 64b w2k3")
+PERFCOUNTER_CPU(shadow_writeable_h_4,  "shadow writeable: 32b linux low")
+PERFCOUNTER_CPU(shadow_writeable_bf,   "shadow writeable brute-force")
+PERFCOUNTER_CPU(shadow_mappings,       "shadow removes all mappings")
+PERFCOUNTER_CPU(shadow_mappings_bf,    "shadow rm-mappings brute-force")
+PERFCOUNTER_CPU(shadow_early_unshadow, "shadow unshadows for fork/exit")
+PERFCOUNTER_CPU(shadow_early_unshadow_top, "shadow unhooks for fork/exit")
+PERFCOUNTER_CPU(shadow_unshadow,       "shadow unshadows a page")
+PERFCOUNTER_CPU(shadow_up_pointer,     "shadow unshadow by up-pointer")
+PERFCOUNTER_CPU(shadow_unshadow_bf,    "shadow unshadow brute-force")
+PERFCOUNTER_CPU(shadow_get_page_fail,  "shadow_get_page_from_l1e failed")
+PERFCOUNTER_CPU(shadow_guest_walk,     "shadow walks guest tables")
+PERFCOUNTER_CPU(shadow_walk_cache_hit, "shadow walk-cache hits")
+PERFCOUNTER_CPU(shadow_walk_cache_miss, "shadow walk-cache misses")
 
 
 /*#endif*/ /* __XEN_PERFC_DEFN_H__ */
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Sun Aug 27 06:56:01 2006 +0100
+++ b/xen/include/asm-x86/shadow.h      Mon Aug 28 12:09:36 2006 +0100
@@ -1,7 +1,9 @@
 /******************************************************************************
  * include/asm-x86/shadow.h
  * 
- * Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -21,26 +23,608 @@
 #ifndef _XEN_SHADOW_H
 #define _XEN_SHADOW_H
 
-/* This file is just a wrapper around the new Shadow2 header,
- * providing names that must be defined in any shadow implementation. */
-
-#include <asm/shadow2.h>
+#include <public/domctl.h> 
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <asm/flushtlb.h>
 
 /* How to make sure a page is not referred to in a shadow PT */
 /* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ 
 #define shadow_drop_references(_d, _p)                      \
-    shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
+    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
 #define shadow_sync_and_drop_references(_d, _p)             \
-    shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
-
-/* Whether we are translating the domain's frame numbers for it */
-#define shadow_mode_translate(d)  shadow2_mode_translate(d)
-
-/* ...and  if so, how to add and remove entries in the mapping */
+    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
+
+/* How to add and remove entries in the p2m mapping. */
 #define guest_physmap_add_page(_d, _p, _m)                  \
-    shadow2_guest_physmap_add_page((_d), (_p), (_m))
+    shadow_guest_physmap_add_page((_d), (_p), (_m))
 #define guest_physmap_remove_page(_d, _p, _m   )            \
-    shadow2_guest_physmap_remove_page((_d), (_p), (_m))
+    shadow_guest_physmap_remove_page((_d), (_p), (_m))
+
+/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
+
+#define SHM2_shift 10
+/* We're in one of the shadow modes */
+#define SHM2_enable    (1U << SHM2_shift)
+/* Refcounts based on shadow tables instead of guest tables */
+#define SHM2_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << SHM2_shift)
+/* Enable log dirty mode */
+#define SHM2_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << SHM2_shift)
+/* Xen does p2m translation, not guest */
+#define SHM2_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << SHM2_shift)
+/* Xen does not steal address space from the domain for its own booking;
+ * requires VT or similar mechanisms */
+#define SHM2_external  (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << SHM2_shift)
+
+#define shadow_mode_enabled(_d)   ((_d)->arch.shadow.mode)
+#define shadow_mode_refcounts(_d) ((_d)->arch.shadow.mode & SHM2_refcounts)
+#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow.mode & SHM2_log_dirty)
+#define shadow_mode_translate(_d) ((_d)->arch.shadow.mode & SHM2_translate)
+#define shadow_mode_external(_d)  ((_d)->arch.shadow.mode & SHM2_external)
+
+/* Xen traps & emulates all reads of all page table pages:
+ *not yet supported
+ */
+#define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; })
+
+// flags used in the return value of the shadow_set_lXe() functions...
+#define SHADOW_SET_CHANGED            0x1
+#define SHADOW_SET_FLUSH              0x2
+#define SHADOW_SET_ERROR              0x4
+#define SHADOW_SET_L3PAE_RECOPY       0x8
+
+// How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
+#ifdef __x86_64__
+#define pv_32bit_guest(_v) 0 // not yet supported
+#else
+#define pv_32bit_guest(_v) !hvm_guest(v)
+#endif
+
+/* The shadow lock.
+ *
+ * This lock is per-domain.  It is intended to allow us to make atomic
+ * updates to the software TLB that the shadow tables provide.
+ * 
+ * Specifically, it protects:
+ *   - all changes to shadow page table pages
+ *   - the shadow hash table
+ *   - the shadow page allocator 
+ *   - all changes to guest page table pages; if/when the notion of
+ *     out-of-sync pages is added to this code, then the shadow lock is
+ *     protecting all guest page table pages which are not listed as
+ *     currently as both guest-writable and out-of-sync...
+ *     XXX -- need to think about this relative to writable page tables.
+ *   - all changes to the page_info->tlbflush_timestamp
+ *   - the page_info->count fields on shadow pages
+ *   - the shadow dirty bit array and count
+ *   - XXX
+ */
+#ifndef CONFIG_SMP
+#error shadow.h currently requires CONFIG_SMP
+#endif
+
+#define shadow_lock_init(_d)                                   \
+    do {                                                        \
+        spin_lock_init(&(_d)->arch.shadow.lock);               \
+        (_d)->arch.shadow.locker = -1;                         \
+        (_d)->arch.shadow.locker_function = "nobody";          \
+    } while (0)
+
+#define shadow_lock_is_acquired(_d)                            \
+    (current->processor == (_d)->arch.shadow.locker)
+
+#define shadow_lock(_d)                                                 \
+    do {                                                                 \
+        if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
+        {                                                                \
+            printk("Error: shadow lock held by %s\n",                   \
+                   (_d)->arch.shadow.locker_function);                  \
+            BUG();                                                       \
+        }                                                                \
+        spin_lock(&(_d)->arch.shadow.lock);                             \
+        ASSERT((_d)->arch.shadow.locker == -1);                         \
+        (_d)->arch.shadow.locker = current->processor;                  \
+        (_d)->arch.shadow.locker_function = __func__;                   \
+    } while (0)
+
+#define shadow_unlock(_d)                                              \
+    do {                                                                \
+        ASSERT((_d)->arch.shadow.locker == current->processor);        \
+        (_d)->arch.shadow.locker = -1;                                 \
+        (_d)->arch.shadow.locker_function = "nobody";                  \
+        spin_unlock(&(_d)->arch.shadow.lock);                          \
+    } while (0)
+
+/* 
+ * Levels of self-test and paranoia
+ * XXX should go in config files somewhere?  
+ */
+#define SHADOW_AUDIT_HASH           0x01  /* Check current hash bucket */
+#define SHADOW_AUDIT_HASH_FULL      0x02  /* Check every hash bucket */
+#define SHADOW_AUDIT_ENTRIES        0x04  /* Check this walk's shadows */
+#define SHADOW_AUDIT_ENTRIES_FULL   0x08  /* Check every shadow */
+#define SHADOW_AUDIT_ENTRIES_MFNS   0x10  /* Check gfn-mfn map in shadows */
+#define SHADOW_AUDIT_P2M            0x20  /* Check the p2m table */
+
+#ifdef NDEBUG
+#define SHADOW_AUDIT                   0
+#define SHADOW_AUDIT_ENABLE            0
+#else
+#define SHADOW_AUDIT                0x15  /* Basic audit of all except p2m. */
+#define SHADOW_AUDIT_ENABLE         shadow_audit_enable
+extern int shadow_audit_enable;
+#endif
+
+/* 
+ * Levels of optimization
+ * XXX should go in config files somewhere?  
+ */
+#define SHOPT_WRITABLE_HEURISTIC  0x01  /* Guess at RW PTEs via linear maps */
+#define SHOPT_EARLY_UNSHADOW      0x02  /* Unshadow l1s on fork or exit */
+
+#define SHADOW_OPTIMIZATIONS      0x03
+
+
+/* With shadow pagetables, the different kinds of address start 
+ * to get get confusing.
+ * 
+ * Virtual addresses are what they usually are: the addresses that are used 
+ * to accessing memory while the guest is running.  The MMU translates from 
+ * virtual addresses to machine addresses. 
+ * 
+ * (Pseudo-)physical addresses are the abstraction of physical memory the
+ * guest uses for allocation and so forth.  For the purposes of this code, 
+ * we can largely ignore them.
+ *
+ * Guest frame numbers (gfns) are the entries that the guest puts in its
+ * pagetables.  For normal paravirtual guests, they are actual frame numbers,
+ * with the translation done by the guest.  
+ * 
+ * Machine frame numbers (mfns) are the entries that the hypervisor puts
+ * in the shadow page tables.
+ *
+ * Elsewhere in the xen code base, the name "gmfn" is generally used to refer
+ * to a "machine frame number, from the guest's perspective", or in other
+ * words, pseudo-physical frame numbers.  However, in the shadow code, the
+ * term "gmfn" means "the mfn of a guest page"; this combines naturally with
+ * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a
+ * guest L2 page), etc...
+ */
+
+/* With this defined, we do some ugly things to force the compiler to
+ * give us type safety between mfns and gfns and other integers.
+ * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions 
+ * that translate beween int and foo_t.
+ * 
+ * It does have some performance cost because the types now have 
+ * a different storage attribute, so may not want it on all the time. */
+#ifndef NDEBUG
+#define TYPE_SAFETY 1
+#endif
+
+#ifdef TYPE_SAFETY
+#define TYPE_SAFE(_type,_name)                                  \
+typedef struct { _type _name; } _name##_t;                      \
+static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \
+static inline _type _name##_x(_name##_t n) { return n._name; }
+#else
+#define TYPE_SAFE(_type,_name)                                          \
+typedef _type _name##_t;                                                \
+static inline _name##_t _##_name(_type n) { return n; }                 \
+static inline _type _name##_x(_name##_t n) { return n; }
+#endif
+
+TYPE_SAFE(unsigned long,mfn)
+#define SH_PRI_mfn "05lx"
+
+static inline int
+valid_mfn(mfn_t m)
+{
+    return VALID_MFN(mfn_x(m));
+}
+
+static inline mfn_t
+pagetable_get_mfn(pagetable_t pt)
+{
+    return _mfn(pagetable_get_pfn(pt));
+}
+
+static inline pagetable_t
+pagetable_from_mfn(mfn_t mfn)
+{
+    return pagetable_from_pfn(mfn_x(mfn));
+}
+
+static inline int
+shadow_vcpu_mode_translate(struct vcpu *v)
+{
+    // Returns true if this VCPU needs to be using the P2M table to translate
+    // between GFNs and MFNs.
+    //
+    // This is true of translated HVM domains on a vcpu which has paging
+    // enabled.  (HVM vcpu's with paging disabled are using the p2m table as
+    // its paging table, so no translation occurs in this case.)
+    //
+    return v->arch.shadow.hvm_paging_enabled;
+}
+
+
+/**************************************************************************/
+/* Mode-specific entry points into the shadow code */
+
+struct x86_emulate_ctxt;
+struct shadow_paging_mode {
+    int           (*page_fault            )(struct vcpu *v, unsigned long va,
+                                            struct cpu_user_regs *regs);
+    int           (*invlpg                )(struct vcpu *v, unsigned long va);
+    unsigned long (*gva_to_gpa            )(struct vcpu *v, unsigned long va);
+    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va);
+    void          (*update_cr3            )(struct vcpu *v);
+    int           (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl2he)(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl3e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl4e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    void          (*detach_old_tables     )(struct vcpu *v);
+    int           (*x86_emulate_write     )(struct vcpu *v, unsigned long va,
+                                            void *src, u32 bytes,
+                                            struct x86_emulate_ctxt *ctxt);
+    int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
+                                            unsigned long old, 
+                                            unsigned long new,
+                                            unsigned int bytes,
+                                            struct x86_emulate_ctxt *ctxt);
+    int           (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va,
+                                            unsigned long old_lo, 
+                                            unsigned long old_hi, 
+                                            unsigned long new_lo,
+                                            unsigned long new_hi,
+                                            struct x86_emulate_ctxt *ctxt);
+    mfn_t         (*make_monitor_table    )(struct vcpu *v);
+    void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+    int           (*guess_wrmap           )(struct vcpu *v, 
+                                            unsigned long vaddr, mfn_t gmfn);
+#endif
+    /* For outsiders to tell what mode we're in */
+    unsigned int shadow_levels;
+    unsigned int guest_levels;
+};
+
+static inline int shadow_guest_paging_levels(struct vcpu *v)
+{
+    ASSERT(v->arch.shadow.mode != NULL);
+    return v->arch.shadow.mode->guest_levels;
+}
+
+/**************************************************************************/
+/* Entry points into the shadow code */
+
+/* Turning on shadow test mode */
+int shadow_test_enable(struct domain *d);
+
+/* Handler for shadow control ops: enabling and disabling shadow modes, 
+ * and log-dirty bitmap ops all happen through here. */
+int shadow_domctl(struct domain *d, 
+                   xen_domctl_shadow_op_t *sc,
+                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
+
+/* Call when destroying a domain */
+void shadow_teardown(struct domain *d);
+
+/* Call once all of the references to the domain have gone away */
+void shadow_final_teardown(struct domain *d);
+
+
+/* Mark a page as dirty in the bitmap */
+void sh_do_mark_dirty(struct domain *d, mfn_t gmfn);
+static inline void mark_dirty(struct domain *d, unsigned long gmfn)
+{
+    if ( shadow_mode_log_dirty(d) )
+    {
+        shadow_lock(d);
+        sh_do_mark_dirty(d, _mfn(gmfn));
+        shadow_unlock(d);
+    }
+}
+
+/* Internal version, for when the shadow lock is already held */
+static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn)
+{
+    ASSERT(shadow_lock_is_acquired(d));
+    if ( shadow_mode_log_dirty(d) )
+        sh_do_mark_dirty(d, gmfn);
+}
+
+static inline int
+shadow_fault(unsigned long va, struct cpu_user_regs *regs)
+/* Called from pagefault handler in Xen, and from the HVM trap handlers
+ * for pagefaults.  Returns 1 if this fault was an artefact of the
+ * shadow code (and the guest should retry) or 0 if it is not (and the
+ * fault should be handled elsewhere or passed to the guest). */
+{
+    struct vcpu *v = current;
+    perfc_incrc(shadow_fault);
+    return v->arch.shadow.mode->page_fault(v, va, regs);
+}
+
+static inline int
+shadow_invlpg(struct vcpu *v, unsigned long va)
+/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
+ * instruction should be issued on the hardware, or 0 if it's safe not
+ * to do so. */
+{
+    return v->arch.shadow.mode->invlpg(v, va);
+}
+
+static inline unsigned long
+shadow_gva_to_gpa(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    return v->arch.shadow.mode->gva_to_gpa(v, va);
+}
+
+static inline unsigned long
+shadow_gva_to_gfn(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    return v->arch.shadow.mode->gva_to_gfn(v, va);
+}
+
+static inline void
+shadow_update_cr3(struct vcpu *v)
+/* Updates all the things that are derived from the guest's CR3. 
+ * Called when the guest changes CR3. */
+{
+    shadow_lock(v->domain);
+    v->arch.shadow.mode->update_cr3(v);
+    shadow_unlock(v->domain);
+}
+
+
+/* Should be called after CR3 is updated.
+ * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
+ * 
+ * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
+ * shadow_vtable, etc).
+ *
+ * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
+ * for HVM guests, arch.monitor_table and hvm's guest CR3.
+ *
+ * Update ref counts to shadow tables appropriately.
+ * For PAE, relocate L3 entries, if necessary, into low memory.
+ */
+static inline void update_cr3(struct vcpu *v)
+{
+    unsigned long cr3_mfn=0;
+
+    if ( shadow_mode_enabled(v->domain) )
+    {
+        shadow_update_cr3(v);
+        return;
+    }
+
+#if CONFIG_PAGING_LEVELS == 4
+    if ( !(v->arch.flags & TF_kernel_mode) )
+        cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
+    else
+#endif
+        cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
+
+    make_cr3(v, cr3_mfn);
+}
+
+extern void sh_update_paging_modes(struct vcpu *v);
+
+/* Should be called to initialise paging structures if the paging mode
+ * has changed, and when bringing up a VCPU for the first time. */
+static inline void shadow_update_paging_modes(struct vcpu *v)
+{
+    ASSERT(shadow_mode_enabled(v->domain));
+    shadow_lock(v->domain);
+    sh_update_paging_modes(v);
+    shadow_unlock(v->domain);
+}
+
+static inline void
+shadow_detach_old_tables(struct vcpu *v)
+{
+    if ( v->arch.shadow.mode )
+        v->arch.shadow.mode->detach_old_tables(v);
+}
+
+static inline mfn_t
+shadow_make_monitor_table(struct vcpu *v)
+{
+    return v->arch.shadow.mode->make_monitor_table(v);
+}
+
+static inline void
+shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
+{
+    v->arch.shadow.mode->destroy_monitor_table(v, mmfn);
+}
+
+/* Validate a pagetable change from the guest and update the shadows. */
+extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
+                                        void *new_guest_entry);
+
+/* Update the shadows in response to a pagetable write from a HVM guest */
+extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 
+                                            void *entry, u32 size);
+
+/* Remove all writeable mappings of a guest frame from the shadows.
+ * Returns non-zero if we need to flush TLBs. 
+ * level and fault_addr desribe how we found this to be a pagetable;
+ * level==0 means we have some other reason for revoking write access. */
+extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
+                                       unsigned int level,
+                                       unsigned long fault_addr);
+
+/* Remove all mappings of the guest mfn from the shadows. 
+ * Returns non-zero if we need to flush TLBs. */
+extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
+
+void
+shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
+/* This is a HVM page that we thing is no longer a pagetable.
+ * Unshadow it, and recursively unshadow pages that reference it. */
+
+/* Remove all shadows of the guest mfn. */
+extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all);
+static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
+{
+    sh_remove_shadows(v, gmfn, 1);
+}
+
+/* Add a page to a domain */
+void
+shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
+                               unsigned long mfn);
+
+/* Remove a page from a domain */
+void
+shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
+                                  unsigned long mfn);
+
+/*
+ * Definitions for the shadow_flags field in page_info.
+ * These flags are stored on *guest* pages...
+ * Bits 1-13 are encodings for the shadow types.
+ */
+#define PGC_SH_type_to_index(_type) ((_type) >> PGC_SH_type_shift)
+#define SHF_page_type_mask \
+    (((1u << (PGC_SH_type_to_index(PGC_SH_max_shadow) + 1u)) - 1u) - \
+     ((1u << PGC_SH_type_to_index(PGC_SH_min_shadow)) - 1u))
+
+#define SHF_L1_32   (1u << PGC_SH_type_to_index(PGC_SH_l1_32_shadow))
+#define SHF_FL1_32  (1u << PGC_SH_type_to_index(PGC_SH_fl1_32_shadow))
+#define SHF_L2_32   (1u << PGC_SH_type_to_index(PGC_SH_l2_32_shadow))
+#define SHF_L1_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l1_pae_shadow))
+#define SHF_FL1_PAE (1u << PGC_SH_type_to_index(PGC_SH_fl1_pae_shadow))
+#define SHF_L2_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l2_pae_shadow))
+#define SHF_L2H_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2h_pae_shadow))
+#define SHF_L3_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l3_pae_shadow))
+#define SHF_L1_64   (1u << PGC_SH_type_to_index(PGC_SH_l1_64_shadow))
+#define SHF_FL1_64  (1u << PGC_SH_type_to_index(PGC_SH_fl1_64_shadow))
+#define SHF_L2_64   (1u << PGC_SH_type_to_index(PGC_SH_l2_64_shadow))
+#define SHF_L3_64   (1u << PGC_SH_type_to_index(PGC_SH_l3_64_shadow))
+#define SHF_L4_64   (1u << PGC_SH_type_to_index(PGC_SH_l4_64_shadow))
+
+/* Used for hysteresis when automatically unhooking mappings on fork/exit */
+#define SHF_unhooked_mappings (1u<<31)
+
+/* 
+ * Allocation of shadow pages 
+ */
+
+/* Return the minumum acceptable number of shadow pages a domain needs */
+unsigned int shadow_min_acceptable_pages(struct domain *d);
+
+/* Set the pool of shadow pages to the required number of MB.
+ * Input will be rounded up to at least min_acceptable_shadow_pages().
+ * Returns 0 for success, 1 for failure. */
+unsigned int shadow_set_allocation(struct domain *d, 
+                                    unsigned int megabytes,
+                                    int *preempted);
+
+/* Return the size of the shadow pool, rounded up to the nearest MB */
+static inline unsigned int shadow_get_allocation(struct domain *d)
+{
+    unsigned int pg = d->arch.shadow.total_pages;
+    return ((pg >> (20 - PAGE_SHIFT))
+            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
+}
+
+/*
+ * Linked list for chaining entries in the shadow hash table. 
+ */
+struct shadow_hash_entry {
+    struct shadow_hash_entry *next;
+    mfn_t smfn;                 /* MFN of the shadow */
+#ifdef _x86_64_ /* Shorten 'n' so we don't waste a whole word on storing 't' */
+    unsigned long n:56;         /* MFN of guest PT or GFN of guest superpage */
+#else
+    unsigned long n;            /* MFN of guest PT or GFN of guest superpage */
+#endif
+    unsigned char t;            /* shadow type bits, or 0 for empty */
+};
+
+#define SHADOW_HASH_BUCKETS 251
+/* Other possibly useful primes are 509, 1021, 2039, 4093, 8191, 16381 */
+
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_CACHE_WALKS
+/* Optimization: cache the results of guest walks.  This helps with MMIO
+ * and emulated writes, which tend to issue very similar walk requests
+ * repeatedly.  We keep the results of the last few walks, and blow
+ * away the cache on guest cr3 write, mode change, or page fault. */
+
+#define SH_WALK_CACHE_ENTRIES 4
+
+/* Rather than cache a guest walk, which would include mapped pointers 
+ * to pages, we cache what a TLB would remember about the walk: the 
+ * permissions and the l1 gfn */
+struct shadow_walk_cache {
+    unsigned long va;           /* The virtual address (or 0 == unused) */
+    unsigned long gfn;          /* The gfn from the effective l1e   */
+    u32 permissions;            /* The aggregated permission bits   */
+};
+#endif
+
+
+/**************************************************************************/
+/* Guest physmap (p2m) support */
+
+/* Walk another domain's P2M table, mapping pages as we go */
+extern mfn_t
+sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+
+
+/* General conversion function from gfn to mfn */
+static inline mfn_t
+sh_gfn_to_mfn(struct domain *d, unsigned long gfn)
+{
+    if ( !shadow_mode_translate(d) )
+        return _mfn(gfn);
+    else if ( likely(current->domain == d) )
+        return _mfn(get_mfn_from_gpfn(gfn));
+    else
+        return sh_gfn_to_mfn_foreign(d, gfn);
+}
+
+// vcpu-specific version of gfn_to_mfn().  This is where we hide the dirty
+// little secret that, for hvm guests with paging disabled, nearly all of the
+// shadow code actually think that the guest is running on *untranslated* page
+// tables (which is actually domain->phys_table).
+//
+static inline mfn_t
+sh_vcpu_gfn_to_mfn(struct vcpu *v, unsigned long gfn)
+{ 
+    if ( !shadow_vcpu_mode_translate(v) )
+        return _mfn(gfn);
+    if ( likely(current->domain == v->domain) )
+        return _mfn(get_mfn_from_gpfn(gfn));
+    return sh_gfn_to_mfn_foreign(v->domain, gfn);
+}
+
+static inline unsigned long
+sh_mfn_to_gfn(struct domain *d, mfn_t mfn)
+{
+    if ( shadow_mode_translate(d) )
+        return get_gpfn_from_mfn(mfn_x(mfn));
+    else
+        return mfn_x(mfn);
+}
+
+
 
 #endif /* _XEN_SHADOW_H */
 
@@ -49,7 +633,7 @@
  * mode: C
  * c-set-style: "BSD"
  * c-basic-offset: 4
- * tab-width: 4
  * indent-tabs-mode: nil
  * End:
  */
+      
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/mm/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/Makefile  Mon Aug 28 12:09:36 2006 +0100
@@ -0,0 +1,1 @@
+subdir-y += shadow
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/mm/shadow/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/shadow/Makefile   Mon Aug 28 12:09:36 2006 +0100
@@ -0,0 +1,15 @@
+ifneq ($(pae),n)
+obj-$(x86_32) += common.o g2_on_s3.o g3_on_s3.o
+else
+obj-$(x86_32) += common.o g2_on_s2.o
+endif
+
+obj-$(x86_64) += common.o g4_on_s4.o g3_on_s3.o g2_on_s3.o
+
+guest_levels  = $(subst g,,$(filter g%,$(subst ., ,$(subst _, ,$(1)))))
+shadow_levels = $(subst s,,$(filter s%,$(subst ., ,$(subst _, ,$(1)))))
+shadow_defns  = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1)) \
+                -DSHADOW_PAGING_LEVELS=$(call shadow_levels,$(1))
+
+g%.o: multi.c $(HDRS) Makefile
+       $(CC) $(CFLAGS) $(call shadow_defns,$(@F)) -c $< -o $@
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/mm/shadow/common.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Aug 28 12:09:36 2006 +0100
@@ -0,0 +1,3407 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/common.c
+ *
+ * Shadow code that does not need to be multiply compiled.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define SHADOW 1
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/irq.h>
+#include <xen/domain_page.h>
+#include <xen/guest_access.h>
+#include <xen/keyhandler.h>
+#include <asm/event.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/shadow.h>
+#include "private.h"
+
+#if SHADOW_AUDIT
+int shadow_audit_enable = 0;
+
+static void shadow_audit_key(unsigned char key)
+{
+    shadow_audit_enable = !shadow_audit_enable;
+    printk("%s shadow_audit_enable=%d\n",
+           __func__, shadow_audit_enable);
+}
+
+static int __init shadow_audit_key_init(void)
+{
+    register_keyhandler(
+        'O', shadow_audit_key,  "toggle shadow audits");
+    return 0;
+}
+__initcall(shadow_audit_key_init);
+#endif /* SHADOW_AUDIT */
+
+static void sh_free_log_dirty_bitmap(struct domain *d);
+
+int _shadow_mode_refcounts(struct domain *d)
+{
+    return shadow_mode_refcounts(d);
+}
+
+
+/**************************************************************************/
+/* x86 emulator support for the shadow code
+ */
+
+static int
+sh_x86_emulate_read_std(unsigned long addr,
+                         unsigned long *val,
+                         unsigned int bytes,
+                         struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+    if ( hvm_guest(v) )
+    {
+        *val = 0;
+        // XXX -- this is WRONG.
+        //        It entirely ignores the permissions in the page tables.
+        //        In this case, that is only a user vs supervisor access check.
+        //
+        if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) )
+        {
+#if 0
+            SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                           v->domain->domain_id, v->vcpu_id, 
+                           addr, *val, bytes);
+#endif
+            return X86EMUL_CONTINUE;
+        }
+
+        /* If we got here, there was nothing mapped here, or a bad GFN 
+         * was mapped here.  This should never happen: we're here because
+         * of a write fault at the end of the instruction we're emulating. */ 
+        SHADOW_PRINTK("read failed to va %#lx\n", addr);
+        return X86EMUL_PROPAGATE_FAULT;
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int
+sh_x86_emulate_write_std(unsigned long addr,
+                          unsigned long val,
+                          unsigned int bytes,
+                          struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                  v->domain->domain_id, v->vcpu_id, addr, val, bytes);
+#endif
+    if ( hvm_guest(v) )
+    {
+        // XXX -- this is WRONG.
+        //        It entirely ignores the permissions in the page tables.
+        //        In this case, that includes user vs supervisor, and
+        //        write access.
+        //
+        if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) )
+            return X86EMUL_CONTINUE;
+
+        /* If we got here, there was nothing mapped here, or a bad GFN 
+         * was mapped here.  This should never happen: we're here because
+         * of a write fault at the end of the instruction we're emulating,
+         * which should be handled by sh_x86_emulate_write_emulated. */ 
+        SHADOW_PRINTK("write failed to va %#lx\n", addr);
+        return X86EMUL_PROPAGATE_FAULT;
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int
+sh_x86_emulate_write_emulated(unsigned long addr,
+                               unsigned long val,
+                               unsigned int bytes,
+                               struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                  v->domain->domain_id, v->vcpu_id, addr, val, bytes);
+#endif
+    if ( hvm_guest(v) )
+    {
+        return v->arch.shadow.mode->x86_emulate_write(v, addr, &val, bytes, 
ctxt);
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int 
+sh_x86_emulate_cmpxchg_emulated(unsigned long addr,
+                                 unsigned long old,
+                                 unsigned long new,
+                                 unsigned int bytes,
+                                 struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx n:=%#lx bytes=%u\n",
+                   v->domain->domain_id, v->vcpu_id, addr, old, new, bytes);
+#endif
+    if ( hvm_guest(v) )
+    {
+        return v->arch.shadow.mode->x86_emulate_cmpxchg(v, addr, old, new, 
+                                                    bytes, ctxt);
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int 
+sh_x86_emulate_cmpxchg8b_emulated(unsigned long addr,
+                                   unsigned long old_lo,
+                                   unsigned long old_hi,
+                                   unsigned long new_lo,
+                                   unsigned long new_hi,
+                                   struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx:%lx n:=%#lx:%lx\n",
+                   v->domain->domain_id, v->vcpu_id, addr, old_hi, old_lo,
+                   new_hi, new_lo, ctxt);
+#endif
+    if ( hvm_guest(v) )
+    {
+        return v->arch.shadow.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, 
old_hi,
+                                                      new_lo, new_hi, ctxt);
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+
+struct x86_emulate_ops shadow_emulator_ops = {
+    .read_std           = sh_x86_emulate_read_std,
+    .write_std          = sh_x86_emulate_write_std,
+    .read_emulated      = sh_x86_emulate_read_std,
+    .write_emulated     = sh_x86_emulate_write_emulated,
+    .cmpxchg_emulated   = sh_x86_emulate_cmpxchg_emulated,
+    .cmpxchg8b_emulated = sh_x86_emulate_cmpxchg8b_emulated,
+};
+
+
+/**************************************************************************/
+/* Code for "promoting" a guest page to the point where the shadow code is
+ * willing to let it be treated as a guest page table.  This generally
+ * involves making sure there are no writable mappings available to the guest
+ * for this page.
+ */
+void shadow_promote(struct vcpu *v, mfn_t gmfn, u32 type)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+    unsigned long type_info;
+
+    ASSERT(valid_mfn(gmfn));
+
+    /* We should never try to promote a gmfn that has writeable mappings */
+    ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0);
+
+    // Is the page already shadowed?
+    if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
+    {
+        // No prior shadow exists...
+
+        // Grab a type-ref.  We don't really care if we are racing with another
+        // vcpu or not, or even what kind of type we get; we just want the type
+        // count to be > 0.
+        //
+        do {
+            type_info =
+                page->u.inuse.type_info & (PGT_type_mask | PGT_va_mask);
+        } while ( !get_page_type(page, type_info) );
+
+        // Now that the type ref is non-zero, we can safely use the
+        // shadow_flags.
+        //
+        page->shadow_flags = 0;
+    }
+
+    ASSERT(!test_bit(type >> PGC_SH_type_shift, &page->shadow_flags));
+    set_bit(type >> PGC_SH_type_shift, &page->shadow_flags);
+}
+
+void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+
+    ASSERT(test_bit(_PGC_page_table, &page->count_info));
+    ASSERT(test_bit(type >> PGC_SH_type_shift, &page->shadow_flags));
+
+    clear_bit(type >> PGC_SH_type_shift, &page->shadow_flags);
+
+    if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
+    {
+        // release the extra type ref
+        put_page_type(page);
+
+        // clear the is-a-page-table bit.
+        clear_bit(_PGC_page_table, &page->count_info);
+    }
+}
+
+/**************************************************************************/
+/* Validate a pagetable change from the guest and update the shadows.
+ * Returns a bitmask of SHADOW_SET_* flags. */
+
+static int
+__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
+                               void *entry, u32 size)
+{
+    int result = 0;
+    struct page_info *page = mfn_to_page(gmfn);
+
+    sh_mark_dirty(v->domain, gmfn);
+    
+    // Determine which types of shadows are affected, and update each.
+    //
+    // Always validate L1s before L2s to prevent another cpu with a linear
+    // mapping of this gmfn from seeing a walk that results from 
+    // using the new L2 value and the old L1 value.  (It is OK for such a
+    // guest to see a walk that uses the old L2 value with the new L1 value,
+    // as hardware could behave this way if one level of the pagewalk occurs
+    // before the store, and the next level of the pagewalk occurs after the
+    // store.
+    //
+    // Ditto for L2s before L3s, etc.
+    //
+
+    if ( !(page->count_info & PGC_page_table) )
+        return 0;  /* Not shadowed at all */
+
+#if CONFIG_PAGING_LEVELS == 2
+    if ( page->shadow_flags & SHF_L1_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 2, 2)
+            (v, gmfn, entry, size);
+#else 
+    if ( page->shadow_flags & SHF_L1_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 2)
+            (v, gmfn, entry, size);
+#endif
+
+#if CONFIG_PAGING_LEVELS == 2
+    if ( page->shadow_flags & SHF_L2_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 2, 2)
+            (v, gmfn, entry, size);
+#else 
+    if ( page->shadow_flags & SHF_L2_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 2)
+            (v, gmfn, entry, size);
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3 
+    if ( page->shadow_flags & SHF_L1_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 3)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L2_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 3)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L2H_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L3_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3)
+            (v, gmfn, entry, size);
+#else /* 32-bit non-PAE hypervisor does not support PAE guests */
+    ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4 
+    if ( page->shadow_flags & SHF_L1_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 4, 4)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L2_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 4, 4)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L3_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 4, 4)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L4_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl4e, 4, 4)
+            (v, gmfn, entry, size);
+#else /* 32-bit/PAE hypervisor does not support 64-bit guests */
+    ASSERT((page->shadow_flags 
+            & (SHF_L4_64|SHF_L3_64|SHF_L2_64|SHF_L1_64)) == 0);
+#endif
+
+    return result;
+}
+
+
+int
+shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry)
+/* This is the entry point from hypercalls. It returns a bitmask of all the 
+ * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */
+{
+    int rc;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t));
+    shadow_audit_tables(v);
+    return rc;
+}
+
+void
+shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
+                                void *entry, u32 size)
+/* This is the entry point for emulated writes to pagetables in HVM guests */
+{
+    struct domain *d = v->domain;
+    int rc;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    rc = __shadow_validate_guest_entry(v, gmfn, entry, size);
+    if ( rc & SHADOW_SET_FLUSH )
+    {
+        // Flush everyone except the local processor, which will flush when it
+        // re-enters the HVM guest.
+        //
+        cpumask_t mask = d->domain_dirty_cpumask;
+        cpu_clear(v->processor, mask);
+        flush_tlb_mask(mask);
+    }
+    if ( rc & SHADOW_SET_ERROR ) 
+    {
+        /* This page is probably not a pagetable any more: tear it out of the 
+         * shadows, along with any tables that reference it */
+        shadow_remove_all_shadows_and_parents(v, gmfn);
+    }
+    /* We ignore the other bits: since we are about to change CR3 on
+     * VMENTER we don't need to do any extra TLB flushes. */ 
+}
+
+
+/**************************************************************************/
+/* Memory management for shadow pages. */ 
+
+/* Meaning of the count_info field in shadow pages
+ * ----------------------------------------------
+ * 
+ * A count of all references to this page from other shadow pages and
+ * guest CR3s (a.k.a. v->arch.shadow.table).  
+ *
+ * The top bits hold the shadow type and the pinned bit.  Top-level
+ * shadows are pinned so that they don't disappear when not in a CR3
+ * somewhere.
+ *
+ * We don't need to use get|put_page for this as the updates are all
+ * protected by the shadow lock.  We can't use get|put_page for this
+ * as the size of the count on shadow pages is different from that on
+ * normal guest pages.
+ */
+
+/* Meaning of the type_info field in shadow pages
+ * ----------------------------------------------
+ * 
+ * type_info use depends on the shadow type (from count_info)
+ * 
+ * PGC_SH_none : This page is in the shadow free pool.  type_info holds
+ *                the chunk order for our freelist allocator.
+ *
+ * PGC_SH_l*_shadow : This page is in use as a shadow. type_info 
+ *                     holds the mfn of the guest page being shadowed,
+ *
+ * PGC_SH_fl1_*_shadow : This page is being used to shatter a superpage.
+ *                        type_info holds the gfn being shattered.
+ *
+ * PGC_SH_monitor_table : This page is part of a monitor table.
+ *                         type_info is not used.
+ */
+
+/* Meaning of the _domain field in shadow pages
+ * --------------------------------------------
+ *
+ * In shadow pages, this field will always have its least significant bit
+ * set.  This ensures that all attempts to get_page() will fail (as all
+ * valid pickled domain pointers have a zero for their least significant bit).
+ * Instead, the remaining upper bits are used to record the shadow generation
+ * counter when the shadow was created.
+ */
+
+/* Meaning of the shadow_flags field
+ * ----------------------------------
+ * 
+ * In guest pages that are shadowed, one bit for each kind of shadow they have.
+ * 
+ * In shadow pages, will be used for holding a representation of the populated
+ * entries in this shadow (either a min/max, or a bitmap, or ...)
+ *
+ * In monitor-table pages, holds the level of the particular page (to save
+ * spilling the shadow types into an extra bit by having three types of monitor
+ * page).
+ */
+
+/* Meaning of the list_head struct in shadow pages
+ * -----------------------------------------------
+ *
+ * In free shadow pages, this is used to hold the free-lists of chunks.
+ *
+ * In top-level shadow tables, this holds a linked-list of all top-level
+ * shadows (used for recovering memory and destroying shadows). 
+ *
+ * In lower-level shadows, this holds the physical address of a higher-level
+ * shadow entry that holds a reference to this shadow (or zero).
+ */
+
+/* Allocating shadow pages
+ * -----------------------
+ *
+ * Most shadow pages are allocated singly, but there are two cases where we 
+ * need to allocate multiple pages together.
+ * 
+ * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows.
+ *    A 32-bit guest l1 table covers 4MB of virtuial address space,
+ *    and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB
+ *    of virtual address space each).  Similarly, a 32-bit guest l2 table 
+ *    (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va 
+ *    each).  These multi-page shadows are contiguous and aligned; 
+ *    functions for handling offsets into them are defined in shadow.c 
+ *    (shadow_l1_index() etc.)
+ *    
+ * 2: Shadowing PAE top-level pages.  Each guest page that contains
+ *    any PAE top-level pages requires two shadow pages to shadow it.
+ *    They contain alternating l3 tables and pae_l3_bookkeeping structs.
+ *
+ * This table shows the allocation behaviour of the different modes:
+ *
+ * Xen paging      32b  pae  pae  64b  64b  64b
+ * Guest paging    32b  32b  pae  32b  pae  64b
+ * PV or HVM        *   HVM   *   HVM  HVM   * 
+ * Shadow paging   32b  pae  pae  pae  pae  64b
+ *
+ * sl1 size         4k   8k   4k   8k   4k   4k
+ * sl2 size         4k  16k   4k  16k   4k   4k
+ * sl3 size         -    -    8k   -    8k   4k
+ * sl4 size         -    -    -    -    -    4k
+ *
+ * We allocate memory from xen in four-page units and break them down
+ * with a simple buddy allocator.  Can't use the xen allocator to handle
+ * this as it only works for contiguous zones, and a domain's shadow
+ * pool is made of fragments.
+ *
+ * In HVM guests, the p2m table is built out of shadow pages, and we provide 
+ * a function for the p2m management to steal pages, in max-order chunks, from 
+ * the free pool.  We don't provide for giving them back, yet.
+ */
+
+/* Figure out the least acceptable quantity of shadow memory.
+ * The minimum memory requirement for always being able to free up a
+ * chunk of memory is very small -- only three max-order chunks per
+ * vcpu to hold the top level shadows and pages with Xen mappings in them.  
+ *
+ * But for a guest to be guaranteed to successfully execute a single
+ * instruction, we must be able to map a large number (about thirty) VAs
+ * at the same time, which means that to guarantee progress, we must
+ * allow for more than ninety allocated pages per vcpu.  We round that
+ * up to 128 pages, or half a megabyte per vcpu. */
+unsigned int shadow_min_acceptable_pages(struct domain *d) 
+{
+    u32 vcpu_count = 0;
+    struct vcpu *v;
+
+    for_each_vcpu(d, v)
+        vcpu_count++;
+
+    return (vcpu_count * 128);
+}
+
+/* Using the type_info field to store freelist order */
+#define SH_PFN_ORDER(_p) ((_p)->u.inuse.type_info)
+#define SH_SET_PFN_ORDER(_p, _o)                       \
+ do { (_p)->u.inuse.type_info = (_o); } while (0)
+ 
+
+/* Figure out the order of allocation needed for a given shadow type */
+static inline u32
+shadow_order(u32 shadow_type) 
+{
+#if CONFIG_PAGING_LEVELS > 2
+    static const u32 type_to_order[16] = {
+        0, /* PGC_SH_none           */
+        1, /* PGC_SH_l1_32_shadow   */
+        1, /* PGC_SH_fl1_32_shadow  */
+        2, /* PGC_SH_l2_32_shadow   */
+        0, /* PGC_SH_l1_pae_shadow  */
+        0, /* PGC_SH_fl1_pae_shadow */
+        0, /* PGC_SH_l2_pae_shadow  */
+        0, /* PGC_SH_l2h_pae_shadow */
+        1, /* PGC_SH_l3_pae_shadow  */
+        0, /* PGC_SH_l1_64_shadow   */
+        0, /* PGC_SH_fl1_64_shadow  */
+        0, /* PGC_SH_l2_64_shadow   */
+        0, /* PGC_SH_l3_64_shadow   */
+        0, /* PGC_SH_l4_64_shadow   */
+        2, /* PGC_SH_p2m_table      */
+        0  /* PGC_SH_monitor_table  */
+        };
+    u32 type = (shadow_type & PGC_SH_type_mask) >> PGC_SH_type_shift;
+    return type_to_order[type];
+#else  /* 32-bit Xen only ever shadows 32-bit guests on 32-bit shadows. */
+    return 0;
+#endif
+}
+
+
+/* Do we have a free chunk of at least this order? */
+static inline int chunk_is_available(struct domain *d, int order)
+{
+    int i;
+    
+    for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
+        if ( !list_empty(&d->arch.shadow.freelists[i]) )
+            return 1;
+    return 0;
+}
+
+/* Dispatcher function: call the per-mode function that will unhook the
+ * non-Xen mappings in this top-level shadow mfn */
+void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
+{
+    struct page_info *pg = mfn_to_page(smfn);
+    switch ( (pg->count_info & PGC_SH_type_mask) >> PGC_SH_type_shift )
+    {
+    case PGC_SH_l2_32_shadow >> PGC_SH_type_shift:
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,2,2)(v,smfn);
+#else
+        SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,3,2)(v,smfn);
+#endif
+        break;
+#if CONFIG_PAGING_LEVELS >= 3
+    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
+        break;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
+    case PGC_SH_l4_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings,4,4)(v,smfn);
+        break;
+#endif
+    default:
+        SHADOW_PRINTK("top-level shadow has bad type %08lx\n", 
+                       (unsigned long)((pg->count_info & PGC_SH_type_mask)
+                                       >> PGC_SH_type_shift));
+        BUG();
+    }
+}
+
+
+/* Make sure there is at least one chunk of the required order available
+ * in the shadow page pool. This must be called before any calls to
+ * shadow_alloc().  Since this will free existing shadows to make room,
+ * it must be called early enough to avoid freeing shadows that the
+ * caller is currently working on. */
+void shadow_prealloc(struct domain *d, unsigned int order)
+{
+    /* Need a vpcu for calling unpins; for now, since we don't have
+     * per-vcpu shadows, any will do */
+    struct vcpu *v = d->vcpu[0];
+    struct list_head *l, *t;
+    struct page_info *pg;
+    mfn_t smfn;
+
+    if ( chunk_is_available(d, order) ) return; 
+    
+    /* Stage one: walk the list of top-level pages, unpinning them */
+    perfc_incrc(shadow_prealloc_1);
+    list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows)
+    {
+        pg = list_entry(l, struct page_info, list);
+        smfn = page_to_mfn(pg);
+
+#if CONFIG_PAGING_LEVELS >= 3
+        if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow )
+        {
+            /* For PAE, we need to unpin each subshadow on this shadow */
+            SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn);
+        } 
+        else 
+#endif /* 32-bit code always takes this branch */
+        {
+            /* Unpin this top-level shadow */
+            sh_unpin(v, smfn);
+        }
+
+        /* See if that freed up a chunk of appropriate size */
+        if ( chunk_is_available(d, order) ) return;
+    }
+
+    /* Stage two: all shadow pages are in use in hierarchies that are
+     * loaded in cr3 on some vcpu.  Walk them, unhooking the non-Xen
+     * mappings. */
+    perfc_incrc(shadow_prealloc_2);
+    v = current;
+    if ( v->domain != d )
+        v = d->vcpu[0];
+    /* Walk the list from the tail: recently used toplevels have been pulled
+     * to the head */
+    list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows)
+    {
+        pg = list_entry(l, struct page_info, list);
+        smfn = page_to_mfn(pg);
+        shadow_unhook_mappings(v, smfn);
+
+        /* Need to flush TLB if we've altered our own tables */
+        if ( !shadow_mode_external(d) 
+             && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) )
+            local_flush_tlb();
+        
+        /* See if that freed up a chunk of appropriate size */
+        if ( chunk_is_available(d, order) ) return;
+    }
+    
+    /* Nothing more we can do: all remaining shadows are of pages that
+     * hold Xen mappings for some vcpu.  This can never happen. */
+    SHADOW_PRINTK("Can't pre-allocate %i shadow pages!\n"
+                   "  shadow pages total = %u, free = %u, p2m=%u\n",
+                   1 << order, 
+                   d->arch.shadow.total_pages, 
+                   d->arch.shadow.free_pages, 
+                   d->arch.shadow.p2m_pages);
+    BUG();
+}
+
+
+/* Allocate another shadow's worth of (contiguous, aligned) pages,
+ * and fill in the type and backpointer fields of their page_infos. 
+ * Never fails to allocate. */
+mfn_t shadow_alloc(struct domain *d,  
+                    u32 shadow_type,
+                    unsigned long backpointer)
+{
+    struct page_info *pg = NULL;
+    unsigned int order = shadow_order(shadow_type);
+    cpumask_t mask;
+    void *p;
+    int i;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(order <= SHADOW_MAX_ORDER);
+    ASSERT(shadow_type != PGC_SH_none);
+    perfc_incrc(shadow_alloc);
+
+    /* Find smallest order which can satisfy the request. */
+    for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
+        if ( !list_empty(&d->arch.shadow.freelists[i]) )
+        {
+            pg = list_entry(d->arch.shadow.freelists[i].next, 
+                            struct page_info, list);
+            list_del(&pg->list);
+            
+            /* We may have to halve the chunk a number of times. */
+            while ( i != order )
+            {
+                i--;
+                SH_SET_PFN_ORDER(pg, i);
+                list_add_tail(&pg->list, &d->arch.shadow.freelists[i]);
+                pg += 1 << i;
+            }
+            d->arch.shadow.free_pages -= 1 << order;
+
+            /* Init page info fields and clear the pages */
+            for ( i = 0; i < 1<<order ; i++ ) 
+            {
+                pg[i].u.inuse.type_info = backpointer;
+                pg[i].count_info = shadow_type;
+                pg[i].shadow_flags = 0;
+                INIT_LIST_HEAD(&pg[i].list);
+                /* Before we overwrite the old contents of this page, 
+                 * we need to be sure that no TLB holds a pointer to it. */
+                mask = d->domain_dirty_cpumask;
+                tlbflush_filter(mask, pg[i].tlbflush_timestamp);
+                if ( unlikely(!cpus_empty(mask)) )
+                {
+                    perfc_incrc(shadow_alloc_tlbflush);
+                    flush_tlb_mask(mask);
+                }
+                /* Now safe to clear the page for reuse */
+                p = sh_map_domain_page(page_to_mfn(pg+i));
+                ASSERT(p != NULL);
+                clear_page(p);
+                sh_unmap_domain_page(p);
+                perfc_incr(shadow_alloc_count);
+            }
+            return page_to_mfn(pg);
+        }
+    
+    /* If we get here, we failed to allocate. This should never happen.
+     * It means that we didn't call shadow_prealloc() correctly before
+     * we allocated.  We can't recover by calling prealloc here, because
+     * we might free up higher-level pages that the caller is working on. */
+    SHADOW_PRINTK("Can't allocate %i shadow pages!\n", 1 << order);
+    BUG();
+}
+
+
+/* Return some shadow pages to the pool. */
+void shadow_free(struct domain *d, mfn_t smfn)
+{
+    struct page_info *pg = mfn_to_page(smfn); 
+    u32 shadow_type;
+    unsigned long order;
+    unsigned long mask;
+    int i;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    perfc_incrc(shadow_free);
+
+    shadow_type = pg->count_info & PGC_SH_type_mask;
+    ASSERT(shadow_type != PGC_SH_none);
+    ASSERT(shadow_type != PGC_SH_p2m_table);
+    order = shadow_order(shadow_type);
+
+    d->arch.shadow.free_pages += 1 << order;
+
+    for ( i = 0; i < 1<<order; i++ ) 
+    {
+        /* Strip out the type: this is now a free shadow page */
+        pg[i].count_info = 0;
+        /* Remember the TLB timestamp so we will know whether to flush 
+         * TLBs when we reuse the page.  Because the destructors leave the
+         * contents of the pages in place, we can delay TLB flushes until
+         * just before the allocator hands the page out again. */
+        pg[i].tlbflush_timestamp = tlbflush_current_time();
+        perfc_decr(shadow_alloc_count);
+    }
+
+    /* Merge chunks as far as possible. */
+    while ( order < SHADOW_MAX_ORDER )
+    {
+        mask = 1 << order;
+        if ( (mfn_x(page_to_mfn(pg)) & mask) ) {
+            /* Merge with predecessor block? */
+            if ( (((pg-mask)->count_info & PGC_SH_type_mask) != PGT_none) 
+                 || (SH_PFN_ORDER(pg-mask) != order) )
+                break;
+            list_del(&(pg-mask)->list);
+            pg -= mask;
+        } else {
+            /* Merge with successor block? */
+            if ( (((pg+mask)->count_info & PGC_SH_type_mask) != PGT_none)
+                 || (SH_PFN_ORDER(pg+mask) != order) )
+                break;
+            list_del(&(pg+mask)->list);
+        }
+        order++;
+    }
+
+    SH_SET_PFN_ORDER(pg, order);
+    list_add_tail(&pg->list, &d->arch.shadow.freelists[order]);
+}
+
+/* Divert some memory from the pool to be used by the p2m mapping.
+ * This action is irreversible: the p2m mapping only ever grows.
+ * That's OK because the p2m table only exists for external domains,
+ * and those domains can't ever turn off shadow mode.
+ * Also, we only ever allocate a max-order chunk, so as to preserve
+ * the invariant that shadow_prealloc() always works.
+ * Returns 0 iff it can't get a chunk (the caller should then
+ * free up some pages in domheap and call set_sh_allocation);
+ * returns non-zero on success.
+ */
+static int
+shadow_alloc_p2m_pages(struct domain *d)
+{
+    struct page_info *pg;
+    u32 i;
+    ASSERT(shadow_lock_is_acquired(d));
+    
+    if ( d->arch.shadow.total_pages 
+         < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) )
+        return 0; /* Not enough shadow memory: need to increase it first */
+    
+    pg = mfn_to_page(shadow_alloc(d, PGC_SH_p2m_table, 0));
+    d->arch.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER);
+    d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER);
+    for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++)
+    {
+        /* Unlike shadow pages, mark p2m pages as owned by the domain */
+        page_set_owner(&pg[i], d);
+        list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist);
+    }
+    return 1;
+}
+
+// Returns 0 if no memory is available...
+mfn_t
+shadow_alloc_p2m_page(struct domain *d)
+{
+    struct list_head *entry;
+    mfn_t mfn;
+    void *p;
+
+    if ( list_empty(&d->arch.shadow.p2m_freelist) &&
+         !shadow_alloc_p2m_pages(d) )
+        return _mfn(0);
+    entry = d->arch.shadow.p2m_freelist.next;
+    list_del(entry);
+    list_add_tail(entry, &d->arch.shadow.p2m_inuse);
+    mfn = page_to_mfn(list_entry(entry, struct page_info, list));
+    sh_get_ref(mfn, 0);
+    p = sh_map_domain_page(mfn);
+    clear_page(p);
+    sh_unmap_domain_page(p);
+
+    return mfn;
+}
+
+#if CONFIG_PAGING_LEVELS == 3
+static void p2m_install_entry_in_monitors(struct domain *d, 
+                                          l3_pgentry_t *l3e) 
+/* Special case, only used for external-mode domains on PAE hosts:
+ * update the mapping of the p2m table.  Once again, this is trivial in
+ * other paging modes (one top-level entry points to the top-level p2m,
+ * no maintenance needed), but PAE makes life difficult by needing a
+ * copy the eight l3es of the p2m table in eight l2h slots in the
+ * monitor table.  This function makes fresh copies when a p2m l3e
+ * changes. */
+{
+    l2_pgentry_t *ml2e;
+    struct vcpu *v;
+    unsigned int index;
+
+    index = ((unsigned long)l3e & ~PAGE_MASK) / sizeof(l3_pgentry_t);
+    ASSERT(index < MACHPHYS_MBYTES>>1);
+
+    for_each_vcpu(d, v) 
+    {
+        if ( pagetable_get_pfn(v->arch.monitor_table) == 0 ) 
+            continue;
+        ASSERT(shadow_mode_external(v->domain));
+
+        SHADOW_DEBUG(P2M, "d=%u v=%u index=%u mfn=%#lx\n",
+                      d->domain_id, v->vcpu_id, index, l3e_get_pfn(*l3e));
+
+        if ( v == current ) /* OK to use linear map of monitor_table */
+            ml2e = __linear_l2_table + l2_linear_offset(RO_MPT_VIRT_START);
+        else 
+        {
+            l3_pgentry_t *ml3e;
+            ml3e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ASSERT(l3e_get_flags(ml3e[3]) & _PAGE_PRESENT);
+            ml2e = sh_map_domain_page(_mfn(l3e_get_pfn(ml3e[3])));
+            ml2e += l2_table_offset(RO_MPT_VIRT_START);
+            sh_unmap_domain_page(ml3e);
+        }
+        ml2e[index] = l2e_from_pfn(l3e_get_pfn(*l3e), __PAGE_HYPERVISOR);
+        if ( v != current )
+            sh_unmap_domain_page(ml2e);
+    }
+}
+#endif
+
+// Find the next level's P2M entry, checking for out-of-range gfn's...
+// Returns NULL on error.
+//
+static l1_pgentry_t *
+p2m_find_entry(void *table, unsigned long *gfn_remainder,
+                   unsigned long gfn, u32 shift, u32 max)
+{
+    u32 index;
+
+    index = *gfn_remainder >> shift;
+    if ( index >= max )
+    {
+        SHADOW_DEBUG(P2M, "gfn=0x%lx out of range "
+                      "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
+                       gfn, *gfn_remainder, shift, index, max);
+        return NULL;
+    }
+    *gfn_remainder &= (1 << shift) - 1;
+    return (l1_pgentry_t *)table + index;
+}
+
+// Walk one level of the P2M table, allocating a new table if required.
+// Returns 0 on error.
+//
+static int
+p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, 
+               unsigned long *gfn_remainder, unsigned long gfn, u32 shift, 
+               u32 max, unsigned long type)
+{
+    l1_pgentry_t *p2m_entry;
+    void *next;
+
+    if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
+                                      shift, max)) )
+        return 0;
+
+    if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
+    {
+        mfn_t mfn = shadow_alloc_p2m_page(d);
+        if ( mfn_x(mfn) == 0 )
+            return 0;
+        *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
+        mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated;
+        mfn_to_page(mfn)->count_info = 1;
+#if CONFIG_PAGING_LEVELS == 3
+        if (type == PGT_l2_page_table)
+        {
+            /* We have written to the p2m l3: need to sync the per-vcpu
+             * copies of it in the monitor tables */
+            p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry);
+        }
+#endif
+        /* The P2M can be shadowed: keep the shadows synced */
+        if ( d->vcpu[0] )
+            (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn,
+                                                 p2m_entry, sizeof *p2m_entry);
+    }
+    *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
+    next = sh_map_domain_page(*table_mfn);
+    sh_unmap_domain_page(*table);
+    *table = next;
+
+    return 1;
+}
+
+// Returns 0 on error (out of memory)
+int
+shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+{
+    // XXX -- this might be able to be faster iff current->domain == d
+    mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
+    void *table = sh_map_domain_page(table_mfn);
+    unsigned long gfn_remainder = gfn;
+    l1_pgentry_t *p2m_entry;
+
+#if CONFIG_PAGING_LEVELS >= 4
+    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                         L4_PAGETABLE_SHIFT - PAGE_SHIFT,
+                         L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
+        return 0;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    // When using PAE Xen, we only allow 33 bits of pseudo-physical
+    // address in translated guests (i.e. 8 GBytes).  This restriction
+    // comes from wanting to map the P2M table into the 16MB RO_MPT hole
+    // in Xen's address space for translated PV guests.
+    //
+    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                         L3_PAGETABLE_SHIFT - PAGE_SHIFT,
+                         (CONFIG_PAGING_LEVELS == 3
+                          ? 8
+                          : L3_PAGETABLE_ENTRIES),
+                         PGT_l2_page_table) )
+        return 0;
+#endif
+    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                         L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+                         L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
+        return 0;
+
+    p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+                               0, L1_PAGETABLE_ENTRIES);
+    ASSERT(p2m_entry);
+    if ( valid_mfn(mfn) )
+        *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
+    else
+        *p2m_entry = l1e_empty();
+
+    /* The P2M can be shadowed: keep the shadows synced */
+    (void) __shadow_validate_guest_entry(d->vcpu[0], table_mfn, 
+                                          p2m_entry, sizeof *p2m_entry);
+
+    sh_unmap_domain_page(table);
+
+    return 1;
+}
+
+// Allocate a new p2m table for a domain.
+//
+// The structure of the p2m table is that of a pagetable for xen (i.e. it is
+// controlled by CONFIG_PAGING_LEVELS).
+//
+// Returns 0 if p2m table could not be initialized
+//
+static int
+shadow_alloc_p2m_table(struct domain *d)
+{
+    mfn_t p2m_top;
+    struct list_head *entry;
+    unsigned int page_count = 0;
+    
+    SHADOW_PRINTK("allocating p2m table\n");
+    ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0);
+
+    p2m_top = shadow_alloc_p2m_page(d);
+    mfn_to_page(p2m_top)->count_info = 1;
+    mfn_to_page(p2m_top)->u.inuse.type_info = 
+#if CONFIG_PAGING_LEVELS == 4
+        PGT_l4_page_table
+#elif CONFIG_PAGING_LEVELS == 3
+        PGT_l3_page_table
+#elif CONFIG_PAGING_LEVELS == 2
+        PGT_l2_page_table
+#endif
+        | 1 | PGT_validated;
+   
+    if ( mfn_x(p2m_top) == 0 )
+        return 0;
+
+    d->arch.phys_table = pagetable_from_mfn(p2m_top);
+
+    SHADOW_PRINTK("populating p2m table\n");
+ 
+    for ( entry = d->page_list.next;
+          entry != &d->page_list;
+          entry = entry->next )
+    {
+        struct page_info *page = list_entry(entry, struct page_info, list);
+        mfn_t mfn = page_to_mfn(page);
+        unsigned long gfn = get_gpfn_from_mfn(mfn_x(mfn));
+        page_count++;
+        if (
+#ifdef __x86_64__
+            (gfn != 0x5555555555555555L)
+#else
+            (gfn != 0x55555555L)
+#endif
+             && gfn != INVALID_M2P_ENTRY
+             && !shadow_set_p2m_entry(d, gfn, mfn) )
+        {
+            SHADOW_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" 
SH_PRI_mfn "\n",
+                           gfn, mfn_x(mfn));
+            return 0;
+        }
+    }
+
+    SHADOW_PRINTK("p2m table initialised (%u pages)\n", page_count);
+    return 1;
+}
+
+mfn_t
+sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+/* Read another domain's p2m entries */
+{
+    mfn_t mfn;
+    unsigned long addr = gpfn << PAGE_SHIFT;
+    l2_pgentry_t *l2e;
+    l1_pgentry_t *l1e;
+    
+    ASSERT(shadow_mode_translate(d));
+    mfn = pagetable_get_mfn(d->arch.phys_table);
+
+
+#if CONFIG_PAGING_LEVELS > 2
+    if ( gpfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) ) 
+        /* This pfn is higher than the p2m map can hold */
+        return _mfn(INVALID_MFN);
+#endif
+
+
+#if CONFIG_PAGING_LEVELS >= 4
+    { 
+        l4_pgentry_t *l4e = sh_map_domain_page(mfn);
+        l4e += l4_table_offset(addr);
+        if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
+        {
+            sh_unmap_domain_page(l4e);
+            return _mfn(INVALID_MFN);
+        }
+        mfn = _mfn(l4e_get_pfn(*l4e));
+        sh_unmap_domain_page(l4e);
+    }
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    {
+        l3_pgentry_t *l3e = sh_map_domain_page(mfn);
+        l3e += l3_table_offset(addr);
+        if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
+        {
+            sh_unmap_domain_page(l3e);
+            return _mfn(INVALID_MFN);
+        }
+        mfn = _mfn(l3e_get_pfn(*l3e));
+        sh_unmap_domain_page(l3e);
+    }
+#endif
+
+    l2e = sh_map_domain_page(mfn);
+    l2e += l2_table_offset(addr);
+    if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
+    {
+        sh_unmap_domain_page(l2e);
+        return _mfn(INVALID_MFN);
+    }
+    mfn = _mfn(l2e_get_pfn(*l2e));
+    sh_unmap_domain_page(l2e);
+
+    l1e = sh_map_domain_page(mfn);
+    l1e += l1_table_offset(addr);
+    if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
+    {
+        sh_unmap_domain_page(l1e);
+        return _mfn(INVALID_MFN);
+    }
+    mfn = _mfn(l1e_get_pfn(*l1e));
+    sh_unmap_domain_page(l1e);
+
+    return mfn;
+}
+
+unsigned long
+shadow_gfn_to_mfn_foreign(unsigned long gpfn)
+{
+    return mfn_x(sh_gfn_to_mfn_foreign(current->domain, gpfn));
+}
+
+
+static void shadow_p2m_teardown(struct domain *d)
+/* Return all the p2m pages to Xen.
+ * We know we don't have any extra mappings to these pages */
+{
+    struct list_head *entry, *n;
+    struct page_info *pg;
+
+    d->arch.phys_table = pagetable_null();
+
+    list_for_each_safe(entry, n, &d->arch.shadow.p2m_inuse)
+    {
+        pg = list_entry(entry, struct page_info, list);
+        list_del(entry);
+        /* Should have just the one ref we gave it in alloc_p2m_page() */
+        if ( (pg->count_info & PGC_SH_count_mask) != 1 )
+        {
+            SHADOW_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+                           pg->count_info, pg->u.inuse.type_info);
+        }
+        ASSERT(page_get_owner(pg) == d);
+        /* Free should not decrement domain's total allocation, since 
+         * these pages were allocated without an owner. */
+        page_set_owner(pg, NULL); 
+        free_domheap_pages(pg, 0);
+        d->arch.shadow.p2m_pages--;
+        perfc_decr(shadow_alloc_count);
+    }
+    list_for_each_safe(entry, n, &d->arch.shadow.p2m_freelist)
+    {
+        list_del(entry);
+        pg = list_entry(entry, struct page_info, list);
+        ASSERT(page_get_owner(pg) == d);
+        /* Free should not decrement domain's total allocation. */
+        page_set_owner(pg, NULL); 
+        free_domheap_pages(pg, 0);
+        d->arch.shadow.p2m_pages--;
+        perfc_decr(shadow_alloc_count);
+    }
+    ASSERT(d->arch.shadow.p2m_pages == 0);
+}
+
+/* Set the pool of shadow pages to the required number of pages.
+ * Input will be rounded up to at least shadow_min_acceptable_pages(),
+ * plus space for the p2m table.
+ * Returns 0 for success, non-zero for failure. */
+static unsigned int set_sh_allocation(struct domain *d, 
+                                       unsigned int pages,
+                                       int *preempted)
+{
+    struct page_info *pg;
+    unsigned int lower_bound;
+    int j;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    
+    /* Don't allocate less than the minimum acceptable, plus one page per
+     * megabyte of RAM (for the p2m table) */
+    lower_bound = shadow_min_acceptable_pages(d) + (d->tot_pages / 256);
+    if ( pages > 0 && pages < lower_bound )
+        pages = lower_bound;
+    /* Round up to largest block size */
+    pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1);
+
+    SHADOW_PRINTK("current %i target %i\n", 
+                   d->arch.shadow.total_pages, pages);
+
+    while ( d->arch.shadow.total_pages != pages ) 
+    {
+        if ( d->arch.shadow.total_pages < pages ) 
+        {
+            /* Need to allocate more memory from domheap */
+            pg = alloc_domheap_pages(NULL, SHADOW_MAX_ORDER, 0); 
+            if ( pg == NULL ) 
+            { 
+                SHADOW_PRINTK("failed to allocate shadow pages.\n");
+                return -ENOMEM;
+            }
+            d->arch.shadow.free_pages += 1<<SHADOW_MAX_ORDER;
+            d->arch.shadow.total_pages += 1<<SHADOW_MAX_ORDER;
+            for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ ) 
+            {
+                pg[j].u.inuse.type_info = 0;  /* Free page */
+                pg[j].tlbflush_timestamp = 0; /* Not in any TLB */
+            }
+            SH_SET_PFN_ORDER(pg, SHADOW_MAX_ORDER);
+            list_add_tail(&pg->list, 
+                          &d->arch.shadow.freelists[SHADOW_MAX_ORDER]);
+        } 
+        else if ( d->arch.shadow.total_pages > pages ) 
+        {
+            /* Need to return memory to domheap */
+            shadow_prealloc(d, SHADOW_MAX_ORDER);
+            ASSERT(!list_empty(&d->arch.shadow.freelists[SHADOW_MAX_ORDER]));
+            pg = list_entry(d->arch.shadow.freelists[SHADOW_MAX_ORDER].next, 
+                            struct page_info, list);
+            list_del(&pg->list);
+            d->arch.shadow.free_pages -= 1<<SHADOW_MAX_ORDER;
+            d->arch.shadow.total_pages -= 1<<SHADOW_MAX_ORDER;
+            free_domheap_pages(pg, SHADOW_MAX_ORDER);
+        }
+
+        /* Check to see if we need to yield and try again */
+        if ( preempted && hypercall_preempt_check() )
+        {
+            *preempted = 1;
+            return 0;
+        }
+    }
+
+    return 0;
+}
+
+unsigned int shadow_set_allocation(struct domain *d, 
+                                    unsigned int megabytes,
+                                    int *preempted)
+/* Hypercall interface to set the shadow memory allocation */
+{
+    unsigned int rv;
+    shadow_lock(d);
+    rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); 
+    SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n",
+                   d->domain_id,
+                   d->arch.shadow.total_pages,
+                   shadow_get_allocation(d));
+    shadow_unlock(d);
+    return rv;
+}
+
+/**************************************************************************/
+/* Hash table for storing the guest->shadow mappings */
+
+/* Hash function that takes a gfn or mfn, plus another byte of type info */
+typedef u32 key_t;
+static inline key_t sh_hash(unsigned long n, u8 t) 
+{
+    unsigned char *p = (unsigned char *)&n;
+    key_t k = t;
+    int i;
+    for ( i = 0; i < sizeof(n) ; i++ ) k = (u32)p[i] + (k<<6) + (k<<16) - k;
+    return k;
+}
+
+#if SHADOW_AUDIT & (SHADOW_AUDIT_HASH|SHADOW_AUDIT_HASH_FULL)
+
+/* Before we get to the mechanism, define a pair of audit functions
+ * that sanity-check the contents of the hash table. */
+static void sh_hash_audit_bucket(struct domain *d, int bucket)
+/* Audit one bucket of the hash table */
+{
+    struct shadow_hash_entry *e, *x;
+    struct page_info *pg;
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+
+    e = &d->arch.shadow.hash_table[bucket];
+    if ( e->t == 0 ) return; /* Bucket is empty */ 
+    while ( e )
+    {
+        /* Empty link? */
+        BUG_ON( e->t == 0 ); 
+        /* Bogus type? */
+        BUG_ON( e->t > (PGC_SH_max_shadow >> PGC_SH_type_shift) );
+        /* Wrong bucket? */
+        BUG_ON( sh_hash(e->n, e->t) % SHADOW_HASH_BUCKETS != bucket ); 
+        /* Duplicate entry? */
+        for ( x = e->next; x; x = x->next )
+            BUG_ON( x->n == e->n && x->t == e->t );
+        /* Bogus MFN? */
+        BUG_ON( !valid_mfn(e->smfn) );
+        pg = mfn_to_page(e->smfn);
+        /* Not a shadow? */
+        BUG_ON( page_get_owner(pg) != 0 );
+        /* Wrong kind of shadow? */
+        BUG_ON( (pg->count_info & PGC_SH_type_mask) >> PGC_SH_type_shift 
+                != e->t ); 
+        /* Bad backlink? */
+        BUG_ON( pg->u.inuse.type_info != e->n );
+        if ( e->t != (PGC_SH_fl1_32_shadow >> PGC_SH_type_shift)
+             && e->t != (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
+             && e->t != (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift) )
+        {
+            /* Bad shadow flags on guest page? */
+            BUG_ON( !(mfn_to_page(_mfn(e->n))->shadow_flags & (1<<e->t)) );
+        }
+        /* That entry was OK; on we go */
+        e = e->next;
+    }
+}
+
+#else
+#define sh_hash_audit_bucket(_d, _b)
+#endif /* Hashtable bucket audit */
+
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_HASH_FULL
+
+static void sh_hash_audit(struct domain *d)
+/* Full audit: audit every bucket in the table */
+{
+    int i;
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        sh_hash_audit_bucket(d, i);
+    }
+}
+
+#else
+#define sh_hash_audit(_d)
+#endif /* Hashtable bucket audit */
+
+/* Memory management interface for bucket allocation.
+ * These ought to come out of shadow memory, but at least on 32-bit
+ * machines we are forced to allocate them from xenheap so that we can
+ * address them. */
+static struct shadow_hash_entry *sh_alloc_hash_entry(struct domain *d)
+{
+    struct shadow_hash_entry *extra, *x;
+    int i;
+
+    /* We need to allocate a new node. Ensure the free list is not empty. 
+     * Allocate new entries in units the same size as the original table. */
+    if ( unlikely(d->arch.shadow.hash_freelist == NULL) )
+    {
+        size_t sz = sizeof(void *) + (SHADOW_HASH_BUCKETS * sizeof(*x));
+        extra = xmalloc_bytes(sz);
+
+        if ( extra == NULL )
+        {
+            /* No memory left! */
+            SHADOW_ERROR("xmalloc() failed when allocating hash buckets.\n");
+            domain_crash_synchronous();
+        }
+        memset(extra, 0, sz);
+
+        /* Record the allocation block so it can be correctly freed later. */
+        *((struct shadow_hash_entry **)&extra[SHADOW_HASH_BUCKETS]) = 
+            d->arch.shadow.hash_allocations;
+        d->arch.shadow.hash_allocations = &extra[0];
+
+        /* Thread a free chain through the newly-allocated nodes. */
+        for ( i = 0; i < (SHADOW_HASH_BUCKETS - 1); i++ )
+            extra[i].next = &extra[i+1];
+        extra[i].next = NULL;
+
+        /* Add the new nodes to the free list. */
+        d->arch.shadow.hash_freelist = &extra[0];
+    }
+
+    /* Allocate a new node from the free list. */
+    x = d->arch.shadow.hash_freelist;
+    d->arch.shadow.hash_freelist = x->next;
+    return x;
+}
+
+static void sh_free_hash_entry(struct domain *d, struct shadow_hash_entry *e)
+{
+    /* Mark the bucket as empty and return it to the free list */
+    e->t = 0; 
+    e->next = d->arch.shadow.hash_freelist;
+    d->arch.shadow.hash_freelist = e;
+}
+
+
+/* Allocate and initialise the table itself.  
+ * Returns 0 for success, 1 for error. */
+static int shadow_hash_alloc(struct domain *d)
+{
+    struct shadow_hash_entry *table;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(!d->arch.shadow.hash_table);
+
+    table = xmalloc_array(struct shadow_hash_entry, SHADOW_HASH_BUCKETS);
+    if ( !table ) return 1;
+    memset(table, 0, 
+           SHADOW_HASH_BUCKETS * sizeof (struct shadow_hash_entry));
+    d->arch.shadow.hash_table = table;
+    return 0;
+}
+
+/* Tear down the hash table and return all memory to Xen.
+ * This function does not care whether the table is populated. */
+static void shadow_hash_teardown(struct domain *d)
+{
+    struct shadow_hash_entry *a, *n;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+
+    /* Return the table itself */
+    xfree(d->arch.shadow.hash_table);
+    d->arch.shadow.hash_table = NULL;
+
+    /* Return any extra allocations */
+    a = d->arch.shadow.hash_allocations;
+    while ( a ) 
+    {
+        /* We stored a linked-list pointer at the end of each allocation */
+        n = *((struct shadow_hash_entry **)(&a[SHADOW_HASH_BUCKETS]));
+        xfree(a);
+        a = n;
+    }
+    d->arch.shadow.hash_allocations = NULL;
+    d->arch.shadow.hash_freelist = NULL;
+}
+
+
+mfn_t shadow_hash_lookup(struct vcpu *v, unsigned long n, u8 t)
+/* Find an entry in the hash table.  Returns the MFN of the shadow,
+ * or INVALID_MFN if it doesn't exist */
+{
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *p, *x, *head;
+    key_t key;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+    ASSERT(t);
+
+    sh_hash_audit(d);
+
+    perfc_incrc(shadow_hash_lookups);
+    key = sh_hash(n, t);
+
+    x = head = &d->arch.shadow.hash_table[key % SHADOW_HASH_BUCKETS];
+    p = NULL;
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+
+    do
+    {
+        ASSERT(x->t || ((x == head) && (x->next == NULL)));
+
+        if ( x->n == n && x->t == t )
+        {
+            /* Pull-to-front if 'x' isn't already the head item */
+            if ( unlikely(x != head) )
+            {
+                if ( unlikely(d->arch.shadow.hash_walking != 0) )
+                    /* Can't reorder: someone is walking the hash chains */
+                    return x->smfn;
+                else 
+                {
+                    /* Delete 'x' from list and reinsert after head. */
+                    p->next = x->next;
+                    x->next = head->next;
+                    head->next = x;
+                    
+                    /* Swap 'x' contents with head contents. */
+                    SWAP(head->n, x->n);
+                    SWAP(head->t, x->t);
+                    SWAP(head->smfn, x->smfn);
+                }
+            }
+            else
+            {
+                perfc_incrc(shadow_hash_lookup_head);
+            }
+            return head->smfn;
+        }
+
+        p = x;
+        x = x->next;
+    }
+    while ( x != NULL );
+
+    perfc_incrc(shadow_hash_lookup_miss);
+    return _mfn(INVALID_MFN);
+}
+
+void shadow_hash_insert(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn)
+/* Put a mapping (n,t)->smfn into the hash table */
+{
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *x, *head;
+    key_t key;
+    
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+    ASSERT(t);
+
+    sh_hash_audit(d);
+
+    perfc_incrc(shadow_hash_inserts);
+    key = sh_hash(n, t);
+
+    head = &d->arch.shadow.hash_table[key % SHADOW_HASH_BUCKETS];
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+
+    /* If the bucket is empty then insert the new page as the head item. */
+    if ( head->t == 0 )
+    {
+        head->n = n;
+        head->t = t;
+        head->smfn = smfn;
+        ASSERT(head->next == NULL);
+    }
+    else 
+    {
+        /* Insert a new entry directly after the head item. */
+        x = sh_alloc_hash_entry(d);
+        x->n = n; 
+        x->t = t;
+        x->smfn = smfn;
+        x->next = head->next;
+        head->next = x;
+    }
+    
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+}
+
+void shadow_hash_delete(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn)
+/* Excise the mapping (n,t)->smfn from the hash table */
+{
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *p, *x, *head;
+    key_t key;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+    ASSERT(t);
+
+    sh_hash_audit(d);
+
+    perfc_incrc(shadow_hash_deletes);
+    key = sh_hash(n, t);
+
+    head = &d->arch.shadow.hash_table[key % SHADOW_HASH_BUCKETS];
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+
+    /* Match on head item? */
+    if ( head->n == n && head->t == t )
+    {
+        if ( (x = head->next) != NULL )
+        {
+            /* Overwrite head with contents of following node. */
+            head->n = x->n;
+            head->t = x->t;
+            head->smfn = x->smfn;
+
+            /* Delete following node. */
+            head->next = x->next;
+            sh_free_hash_entry(d, x);
+        }
+        else
+        {
+            /* This bucket is now empty. Initialise the head node. */
+            head->t = 0;
+        }
+    }
+    else 
+    {
+        /* Not at the head; need to walk the chain */
+        p = head;
+        x = head->next; 
+        
+        while(1)
+        {
+            ASSERT(x); /* We can't have hit the end, since our target is
+                        * still in the chain somehwere... */
+            if ( x->n == n && x->t == t )
+            {
+                /* Delete matching node. */
+                p->next = x->next;
+                sh_free_hash_entry(d, x);
+                break;
+            }
+            p = x;
+            x = x->next;
+        }
+    }
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+}
+
+typedef int (*hash_callback_t)(struct vcpu *v, mfn_t smfn, mfn_t other_mfn);
+
+static void hash_foreach(struct vcpu *v, 
+                         unsigned int callback_mask, 
+                         hash_callback_t callbacks[], 
+                         mfn_t callback_mfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0;
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_walking == 0);
+    d->arch.shadow.hash_walking = 1;
+
+    callback_mask &= ~1; /* Never attempt to call back on empty buckets */
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = &d->arch.shadow.hash_table[i]; x; x = x->next )
+        {
+            if ( callback_mask & (1 << x->t) ) 
+            {
+                ASSERT(x->t <= 15);
+                ASSERT(callbacks[x->t] != NULL);
+                if ( (done = callbacks[x->t](v, x->smfn, callback_mfn)) != 0 )
+                    break;
+            }
+        }
+        if ( done ) break; 
+    }
+    d->arch.shadow.hash_walking = 0; 
+}
+
+
+/**************************************************************************/
+/* Destroy a shadow page: simple dispatcher to call the per-type destructor
+ * which will decrement refcounts appropriately and return memory to the 
+ * free pool. */
+
+void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
+{
+    struct page_info *pg = mfn_to_page(smfn);
+    u32 t = pg->count_info & PGC_SH_type_mask;
+
+
+    SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
+
+    /* Double-check, if we can, that the shadowed page belongs to this
+     * domain, (by following the back-pointer). */
+    ASSERT(t == PGC_SH_fl1_32_shadow  ||  
+           t == PGC_SH_fl1_pae_shadow ||  
+           t == PGC_SH_fl1_64_shadow  || 
+           t == PGC_SH_monitor_table  || 
+           (page_get_owner(mfn_to_page(_mfn(pg->u.inuse.type_info))) 
+            == v->domain)); 
+
+    /* The down-shifts here are so that the switch statement is on nice
+     * small numbers that the compiler will enjoy */
+    switch ( t >> PGC_SH_type_shift )
+    {
+#if CONFIG_PAGING_LEVELS == 2
+    case PGC_SH_l1_32_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 2, 2)(v, smfn); 
+        break;
+    case PGC_SH_l2_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 2, 2)(v, smfn);
+        break;
+#else /* PAE or 64bit */
+    case PGC_SH_l1_32_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 2)(v, smfn);
+        break;
+    case PGC_SH_l2_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 2)(v, smfn);
+        break;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3
+    case PGC_SH_l1_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 3)(v, smfn);
+        break;
+    case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
+        break;
+    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn);
+        break;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
+    case PGC_SH_l1_64_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 4, 4)(v, smfn);
+        break;
+    case PGC_SH_l2_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 4, 4)(v, smfn);
+        break;
+    case PGC_SH_l3_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 4, 4)(v, smfn);
+        break;
+    case PGC_SH_l4_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, 4, 4)(v, smfn);
+        break;
+#endif
+    default:
+        SHADOW_PRINTK("tried to destroy shadow of bad type %08lx\n", 
+                       (unsigned long)t);
+        BUG();
+    }    
+}
+
+/**************************************************************************/
+/* Remove all writeable mappings of a guest frame from the shadow tables 
+ * Returns non-zero if we need to flush TLBs. 
+ * level and fault_addr desribe how we found this to be a pagetable;
+ * level==0 means we have some other reason for revoking write access.*/
+
+int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, 
+                                unsigned int level,
+                                unsigned long fault_addr)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32  */
+#else 
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32  */
+#endif
+        NULL, /* l2_32   */
+#if CONFIG_PAGING_LEVELS >= 3
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */
+#else 
+        NULL, /* l1_pae  */
+        NULL, /* fl1_pae */
+#endif
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+        NULL, /* l3_pae  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
+
+    static unsigned int callback_mask = 
+          1 << (PGC_SH_l1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_64_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift)
+        ;
+    struct page_info *pg = mfn_to_page(gmfn);
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    /* Only remove writable mappings if we are doing shadow refcounts.
+     * In guest refcounting, we trust Xen to already be restricting
+     * all the writes to the guest page tables, so we do not need to
+     * do more. */
+    if ( !shadow_mode_refcounts(v->domain) )
+        return 0;
+
+    /* Early exit if it's already a pagetable, or otherwise not writeable */
+    if ( sh_mfn_is_a_page_table(gmfn) 
+         || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
+        return 0;
+
+    perfc_incrc(shadow_writeable);
+
+    /* If this isn't a "normal" writeable page, the domain is trying to 
+     * put pagetables in special memory of some kind.  We can't allow that. */
+    if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_writable_page )
+    {
+        SHADOW_ERROR("can't remove write access to mfn %lx, type_info is %" 
+                      PRtype_info "\n",
+                      mfn_x(gmfn), mfn_to_page(gmfn)->u.inuse.type_info);
+        domain_crash(v->domain);
+    }
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+    if ( v == current && level != 0 )
+    {
+        unsigned long gfn;
+        /* Heuristic: there is likely to be only one writeable mapping,
+         * and that mapping is likely to be in the current pagetable,
+         * either in the guest's linear map (linux, windows) or in a
+         * magic slot used to map high memory regions (linux HIGHTPTE) */
+
+#define GUESS(_a, _h) do {                                              \
+            if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) )          \
+                perfc_incrc(shadow_writeable_h_ ## _h);                \
+            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
+                return 1;                                               \
+        } while (0)
+
+        
+        /* Linux lowmem: first 1GB is mapped 1-to-1 above 0xC0000000 */
+        if ( v == current 
+             && (gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x40000000 )
+            GUESS(0xC0000000 + (gfn << PAGE_SHIFT), 4);
+
+        if ( v->arch.shadow.mode->guest_levels == 2 )
+        {
+            if ( level == 1 )
+                /* 32bit non-PAE w2k3: linear map at 0xC0000000 */
+                GUESS(0xC0000000UL + (fault_addr >> 10), 1);
+        }
+#if CONFIG_PAGING_LEVELS >= 3
+        else if ( v->arch.shadow.mode->guest_levels == 3 )
+        {
+            /* 32bit PAE w2k3: linear map at 0xC0000000 */
+            switch ( level ) 
+            {
+            case 1: GUESS(0xC0000000UL + (fault_addr >> 9), 2); break;
+            case 2: GUESS(0xC0600000UL + (fault_addr >> 18), 2); break;
+            }
+        }
+#if CONFIG_PAGING_LEVELS >= 4
+        else if ( v->arch.shadow.mode->guest_levels == 4 )
+        {
+            /* 64bit w2k3: linear map at 0x0000070000000000 */
+            switch ( level ) 
+            {
+            case 1: GUESS(0x70000000000UL + (fault_addr >> 9), 3); break;
+            case 2: GUESS(0x70380000000UL + (fault_addr >> 18), 3); break;
+            case 3: GUESS(0x70381C00000UL + (fault_addr >> 27), 3); break;
+            }
+        }
+#endif /* CONFIG_PAGING_LEVELS >= 4 */
+#endif /* CONFIG_PAGING_LEVELS >= 3 */
+
+#undef GUESS
+
+    }
+#endif
+    
+    /* Brute-force search of all the shadows, by walking the hash */
+    perfc_incrc(shadow_writeable_bf);
+    hash_foreach(v, callback_mask, callbacks, gmfn);
+
+    /* If that didn't catch the mapping, something is very wrong */
+    if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) != 0 )
+    {
+        SHADOW_ERROR("can't find all writeable mappings of mfn %lx: "
+                      "%lu left\n", mfn_x(gmfn),
+                      (mfn_to_page(gmfn)->u.inuse.type_info&PGT_count_mask));
+        domain_crash(v->domain);
+    }
+    
+    /* We killed at least one writeable mapping, so must flush TLBs. */
+    return 1;
+}
+
+
+
+/**************************************************************************/
+/* Remove all mappings of a guest frame from the shadow tables.
+ * Returns non-zero if we need to flush TLBs. */
+
+int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+    int expected_count;
+
+    /* Dispatch table for getting per-type functions */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32  */
+#else 
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32  */
+#endif
+        NULL, /* l2_32   */
+#if CONFIG_PAGING_LEVELS >= 3
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */
+#else 
+        NULL, /* l1_pae  */
+        NULL, /* fl1_pae */
+#endif
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+        NULL, /* l3_pae  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
+
+    static unsigned int callback_mask = 
+          1 << (PGC_SH_l1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_64_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift)
+        ;
+
+    perfc_incrc(shadow_mappings);
+    if ( (page->count_info & PGC_count_mask) == 0 )
+        return 0;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    /* XXX TODO: 
+     * Heuristics for finding the (probably) single mapping of this gmfn */
+    
+    /* Brute-force search of all the shadows, by walking the hash */
+    perfc_incrc(shadow_mappings_bf);
+    hash_foreach(v, callback_mask, callbacks, gmfn);
+
+    /* If that didn't catch the mapping, something is very wrong */
+    expected_count = (page->count_info & PGC_allocated) ? 1 : 0;
+    if ( (page->count_info & PGC_count_mask) != expected_count )
+    {
+        /* Don't complain if we're in HVM and there's one extra mapping: 
+         * The qemu helper process has an untyped mapping of this dom's RAM */
+        if ( !(shadow_mode_external(v->domain)
+               && (page->count_info & PGC_count_mask) <= 2
+               && (page->u.inuse.type_info & PGT_count_mask) == 0) )
+        {
+            SHADOW_ERROR("can't find all mappings of mfn %lx: "
+                          "c=%08x t=%08lx\n", mfn_x(gmfn), 
+                          page->count_info, page->u.inuse.type_info);
+        }
+    }
+
+    /* We killed at least one mapping, so must flush TLBs. */
+    return 1;
+}
+
+
+/**************************************************************************/
+/* Remove all shadows of a guest frame from the shadow tables */
+
+static int sh_remove_shadow_via_pointer(struct vcpu *v, mfn_t smfn)
+/* Follow this shadow's up-pointer, if it has one, and remove the reference
+ * found there.  Returns 1 if that was the only reference to this shadow */
+{
+    struct page_info *pg = mfn_to_page(smfn);
+    mfn_t pmfn;
+    void *vaddr;
+    int rc;
+
+    ASSERT((pg->count_info & PGC_SH_type_mask) > 0);
+    ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow);
+    
+    if (pg->up == 0) return 0;
+    pmfn = _mfn(pg->up >> PAGE_SHIFT);
+    ASSERT(valid_mfn(pmfn));
+    vaddr = sh_map_domain_page(pmfn);
+    ASSERT(vaddr);
+    vaddr += pg->up & (PAGE_SIZE-1);
+    ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
+    
+    /* Is this the only reference to this shadow? */
+    rc = ((pg->count_info & PGC_SH_count_mask) == 1) ? 1 : 0;
+
+    /* Blank the offending entry */
+    switch ((pg->count_info & PGC_SH_type_mask)) 
+    {
+    case PGC_SH_l1_32_shadow:
+    case PGC_SH_l2_32_shadow:
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,2,2)(v, vaddr, pmfn);
+#else
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,2)(v, vaddr, pmfn);
+#endif
+        break;
+#if CONFIG_PAGING_LEVELS >=3
+    case PGC_SH_l1_pae_shadow:
+    case PGC_SH_l2_pae_shadow:
+    case PGC_SH_l2h_pae_shadow:
+    case PGC_SH_l3_pae_shadow:
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
+        break;
+#if CONFIG_PAGING_LEVELS >= 4
+    case PGC_SH_l1_64_shadow:
+    case PGC_SH_l2_64_shadow:
+    case PGC_SH_l3_64_shadow:
+    case PGC_SH_l4_64_shadow:
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,4,4)(v, vaddr, pmfn);
+        break;
+#endif
+#endif
+    default: BUG(); /* Some wierd unknown shadow type */
+    }
+    
+    sh_unmap_domain_page(vaddr);
+    if ( rc )
+        perfc_incrc(shadow_up_pointer);
+    else
+        perfc_incrc(shadow_unshadow_bf);
+
+    return rc;
+}
+
+void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all)
+/* Remove the shadows of this guest page.  
+ * If all != 0, find all shadows, if necessary by walking the tables.
+ * Otherwise, just try the (much faster) heuristics, which will remove 
+ * at most one reference to each shadow of the page. */
+{
+    struct page_info *pg;
+    mfn_t smfn;
+    u32 sh_flags;
+    unsigned char t;
+
+    /* Dispatch table for getting per-type functions: each level must
+     * be called with the function to remove a lower-level shadow. */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+        NULL, /* l1_32   */
+        NULL, /* fl1_32  */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,2,2), /* l2_32   */
+#else 
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,2), /* l2_32   */
+#endif
+        NULL, /* l1_pae  */
+        NULL, /* fl1_pae */
+#if CONFIG_PAGING_LEVELS >= 3
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae  */
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
+        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae  */
+#else 
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+        NULL, /* l3_pae  */
+#endif
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,4,4), /* l2_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,4,4), /* l3_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_l3_shadow,4,4), /* l4_64   */
+#else
+        NULL, /* l2_64   */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+#endif
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
+
+    /* Another lookup table, for choosing which mask to use */
+    static unsigned int masks[16] = {
+        0, /* none    */
+        1 << (PGC_SH_l2_32_shadow >> PGC_SH_type_shift), /* l1_32   */
+        0, /* fl1_32  */
+        0, /* l2_32   */
+        ((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift))
+         | (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae  */
+        0, /* fl1_pae */
+        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae  */
+        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae  */
+        0, /* l3_pae  */
+        1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64   */
+        0, /* fl1_64  */
+        1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64   */
+        1 << (PGC_SH_l4_64_shadow >> PGC_SH_type_shift), /* l3_64   */
+        0, /* l4_64   */
+        0, /* p2m     */
+        0  /* unused  */
+    };
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    pg = mfn_to_page(gmfn);
+
+    /* Bale out now if the page is not shadowed */
+    if ( (pg->count_info & PGC_page_table) == 0 )
+        return;
+
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
+                   v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
+
+    /* Search for this shadow in all appropriate shadows */
+    perfc_incrc(shadow_unshadow);
+    sh_flags = pg->shadow_flags;
+
+    /* Lower-level shadows need to be excised from upper-level shadows.
+     * This call to hash_foreach() looks dangerous but is in fact OK: each
+     * call will remove at most one shadow, and terminate immediately when
+     * it does remove it, so we never walk the hash after doing a deletion.  */
+#define DO_UNSHADOW(_type) do {                                 \
+    t = (_type) >> PGC_SH_type_shift;                          \
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);              \
+    if ( !sh_remove_shadow_via_pointer(v, smfn) && all )       \
+        hash_foreach(v, masks[t], callbacks, smfn);             \
+} while (0)
+
+    /* Top-level shadows need to be unpinned */
+#define DO_UNPIN(_type) do {                                             \
+    t = (_type) >> PGC_SH_type_shift;                                   \
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);                       \
+    if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned )                \
+        sh_unpin(v, smfn);                                              \
+    if ( (_type) == PGC_SH_l3_pae_shadow )                              \
+        SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); \
+} while (0)
+
+    if ( sh_flags & SHF_L1_32 )   DO_UNSHADOW(PGC_SH_l1_32_shadow);
+    if ( sh_flags & SHF_L2_32 )   DO_UNPIN(PGC_SH_l2_32_shadow);
+#if CONFIG_PAGING_LEVELS >= 3
+    if ( sh_flags & SHF_L1_PAE )  DO_UNSHADOW(PGC_SH_l1_pae_shadow);
+    if ( sh_flags & SHF_L2_PAE )  DO_UNSHADOW(PGC_SH_l2_pae_shadow);
+    if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow);
+    if ( sh_flags & SHF_L3_PAE )  DO_UNPIN(PGC_SH_l3_pae_shadow);
+#if CONFIG_PAGING_LEVELS >= 4
+    if ( sh_flags & SHF_L1_64 )   DO_UNSHADOW(PGC_SH_l1_64_shadow);
+    if ( sh_flags & SHF_L2_64 )   DO_UNSHADOW(PGC_SH_l2_64_shadow);
+    if ( sh_flags & SHF_L3_64 )   DO_UNSHADOW(PGC_SH_l3_64_shadow);
+    if ( sh_flags & SHF_L4_64 )   DO_UNPIN(PGC_SH_l4_64_shadow);
+#endif
+#endif
+
+#undef DO_UNSHADOW
+#undef DO_UNPIN
+
+
+#if CONFIG_PAGING_LEVELS > 2
+    /* We may have caused some PAE l3 entries to change: need to 
+     * fix up the copies of them in various places */
+    if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) )
+        sh_pae_recopy(v->domain);
+#endif
+
+    /* If that didn't catch the shadows, something is wrong */
+    if ( all && (pg->count_info & PGC_page_table) )
+    {
+        SHADOW_ERROR("can't find all shadows of mfn %05lx 
(shadow_flags=%08x)\n",
+                      mfn_x(gmfn), pg->shadow_flags);
+        domain_crash(v->domain);
+    }
+}
+
+void
+shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
+/* Even harsher: this is a HVM page that we thing is no longer a pagetable.
+ * Unshadow it, and recursively unshadow pages that reference it. */
+{
+    shadow_remove_all_shadows(v, gmfn);
+    /* XXX TODO:
+     * Rework this hashtable walker to return a linked-list of all 
+     * the shadows it modified, then do breadth-first recursion 
+     * to find the way up to higher-level tables and unshadow them too. 
+     *
+     * The current code (just tearing down each page's shadows as we
+     * detect that it is not a pagetable) is correct, but very slow. 
+     * It means extra emulated writes and slows down removal of mappings. */
+}
+
+/**************************************************************************/
+
+void sh_update_paging_modes(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    struct shadow_paging_mode *old_mode = v->arch.shadow.mode;
+    mfn_t old_guest_table;
+
+    ASSERT(shadow_lock_is_acquired(d));
+
+    // Valid transitions handled by this function:
+    // - For PV guests:
+    //     - after a shadow mode has been changed
+    // - For HVM guests:
+    //     - after a shadow mode has been changed
+    //     - changes in CR0.PG, CR4.PAE, CR4.PSE, or CR4.PGE
+    //
+
+    // Avoid determining the current shadow mode for uninitialized CPUs, as
+    // we can not yet determine whether it is an HVM or PV domain.
+    //
+    if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+    {
+        printk("%s: postponing determination of shadow mode\n", __func__);
+        return;
+    }
+
+    // First, tear down any old shadow tables held by this vcpu.
+    //
+    shadow_detach_old_tables(v);
+
+    if ( !hvm_guest(v) )
+    {
+        ///
+        /// PV guest
+        ///
+#if CONFIG_PAGING_LEVELS == 4
+        if ( pv_32bit_guest(v) )
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3);
+        else
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
+#elif CONFIG_PAGING_LEVELS == 3
+        v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+#elif CONFIG_PAGING_LEVELS == 2
+        v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
+#else
+#error unexpected paging mode
+#endif
+    }
+    else
+    {
+        ///
+        /// HVM guest
+        ///
+        ASSERT(shadow_mode_translate(d));
+        ASSERT(shadow_mode_external(d));
+
+        v->arch.shadow.hvm_paging_enabled = !!hvm_paging_enabled(v);
+        if ( !v->arch.shadow.hvm_paging_enabled )
+        {
+            
+            /* Set v->arch.guest_table to use the p2m map, and choose
+             * the appropriate shadow mode */
+            old_guest_table = pagetable_get_mfn(v->arch.guest_table);
+#if CONFIG_PAGING_LEVELS == 2
+            v->arch.guest_table =
+                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
+#elif CONFIG_PAGING_LEVELS == 3 
+            v->arch.guest_table =
+                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+#else /* CONFIG_PAGING_LEVELS == 4 */
+            { 
+                l4_pgentry_t *l4e; 
+                /* Use the start of the first l3 table as a PAE l3 */
+                ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+                l4e = 
sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+                ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+                v->arch.guest_table =
+                    pagetable_from_pfn(l4e_get_pfn(l4e[0]));
+                sh_unmap_domain_page(l4e);
+            }
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+#endif
+            /* Fix up refcounts on guest_table */
+            get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d);
+            if ( mfn_x(old_guest_table) != 0 )
+                put_page(mfn_to_page(old_guest_table));
+        }
+        else
+        {
+#ifdef __x86_64__
+            if ( hvm_long_mode_enabled(v) )
+            {
+                // long mode guest...
+                v->arch.shadow.mode =
+                    &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4);
+            }
+            else
+#endif
+                if ( hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PAE )
+                {
+#if CONFIG_PAGING_LEVELS >= 3
+                    // 32-bit PAE mode guest...
+                    v->arch.shadow.mode =
+                        &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3);
+#else
+                    SHADOW_ERROR("PAE not supported in 32-bit Xen\n");
+                    domain_crash(d);
+                    return;
+#endif
+                }
+                else
+                {
+                    // 32-bit 2 level guest...
+#if CONFIG_PAGING_LEVELS >= 3
+                    v->arch.shadow.mode =
+                        &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2);
+#else
+                    v->arch.shadow.mode =
+                        &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2);
+#endif
+                }
+        }
+
+        if ( pagetable_get_pfn(v->arch.monitor_table) == 0 )
+        {
+            mfn_t mmfn = shadow_make_monitor_table(v);
+            v->arch.monitor_table = pagetable_from_mfn(mmfn);
+            v->arch.monitor_vtable = sh_map_domain_page(mmfn);
+        } 
+
+        if ( v->arch.shadow.mode != old_mode )
+        {
+            SHADOW_PRINTK("new paging mode: d=%u v=%u g=%u s=%u "
+                           "(was g=%u s=%u)\n",
+                           d->domain_id, v->vcpu_id, 
+                           v->arch.shadow.mode->guest_levels,
+                           v->arch.shadow.mode->shadow_levels,
+                           old_mode ? old_mode->guest_levels : 0,
+                           old_mode ? old_mode->shadow_levels : 0);
+            if ( old_mode &&
+                 (v->arch.shadow.mode->shadow_levels !=
+                  old_mode->shadow_levels) )
+            {
+                /* Need to make a new monitor table for the new mode */
+                mfn_t new_mfn, old_mfn;
+
+                if ( v != current ) 
+                {
+                    SHADOW_ERROR("Some third party (d=%u v=%u) is changing "
+                                  "this HVM vcpu's (d=%u v=%u) paging mode!\n",
+                                  current->domain->domain_id, current->vcpu_id,
+                                  v->domain->domain_id, v->vcpu_id);
+                    domain_crash(v->domain);
+                    return;
+                }
+
+                sh_unmap_domain_page(v->arch.monitor_vtable);
+                old_mfn = pagetable_get_mfn(v->arch.monitor_table);
+                v->arch.monitor_table = pagetable_null();
+                new_mfn = v->arch.shadow.mode->make_monitor_table(v);          
  
+                v->arch.monitor_table = pagetable_from_mfn(new_mfn);
+                v->arch.monitor_vtable = sh_map_domain_page(new_mfn);
+                SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n",
+                               mfn_x(new_mfn));
+
+                /* Don't be running on the old monitor table when we 
+                 * pull it down!  Switch CR3, and warn the HVM code that
+                 * its host cr3 has changed. */
+                make_cr3(v, mfn_x(new_mfn));
+                write_ptbase(v);
+                hvm_update_host_cr3(v);
+                old_mode->destroy_monitor_table(v, old_mfn);
+            }
+        }
+
+        // XXX -- Need to deal with changes in CR4.PSE and CR4.PGE.
+        //        These are HARD: think about the case where two CPU's have
+        //        different values for CR4.PSE and CR4.PGE at the same time.
+        //        This *does* happen, at least for CR4.PGE...
+    }
+
+    v->arch.shadow.mode->update_cr3(v);
+}
+
+/**************************************************************************/
+/* Turning on and off shadow features */
+
+static void sh_new_mode(struct domain *d, u32 new_mode)
+/* Inform all the vcpus that the shadow mode has been changed */
+{
+    struct vcpu *v;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d != current->domain);
+    d->arch.shadow.mode = new_mode;
+    if ( new_mode & SHM2_translate ) 
+        shadow_audit_p2m(d);
+    for_each_vcpu(d, v)
+        sh_update_paging_modes(v);
+}
+
+static int shadow_enable(struct domain *d, u32 mode)
+/* Turn on "permanent" shadow features: external, translate, refcount.
+ * Can only be called once on a domain, and these features cannot be
+ * disabled. 
+ * Returns 0 for success, -errno for failure. */
+{    
+    unsigned int old_pages;
+    int rv = 0;
+
+    mode |= SHM2_enable;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    /* Sanity check the arguments */
+    if ( (d == current->domain) ||
+         shadow_mode_enabled(d) ||
+         ((mode & SHM2_external) && !(mode & SHM2_translate)) )
+    {
+        rv = -EINVAL;
+        goto out;
+    }
+
+    // XXX -- eventually would like to require that all memory be allocated
+    // *after* shadow_enabled() is called...  So here, we would test to make
+    // sure that d->page_list is empty.
+#if 0
+    spin_lock(&d->page_alloc_lock);
+    if ( !list_empty(&d->page_list) )
+    {
+        spin_unlock(&d->page_alloc_lock);
+        rv = -EINVAL;
+        goto out;
+    }
+    spin_unlock(&d->page_alloc_lock);
+#endif
+
+    /* Init the shadow memory allocation if the user hasn't done so */
+    old_pages = d->arch.shadow.total_pages;
+    if ( old_pages == 0 )
+        if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
+        {
+            set_sh_allocation(d, 0, NULL);
+            rv = -ENOMEM;
+            goto out;
+        }
+
+    /* Init the hash table */
+    if ( shadow_hash_alloc(d) != 0 )
+    {
+        set_sh_allocation(d, old_pages, NULL);            
+        rv = -ENOMEM;
+        goto out;
+    }
+
+    /* Init the P2M table */
+    if ( mode & SHM2_translate )
+        if ( !shadow_alloc_p2m_table(d) )
+        {
+            shadow_hash_teardown(d);
+            set_sh_allocation(d, old_pages, NULL);
+            shadow_p2m_teardown(d);
+            rv = -ENOMEM;
+            goto out;
+        }
+
+    /* Update the bits */
+    sh_new_mode(d, mode);
+    shadow_audit_p2m(d);
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+    return 0;
+}
+
+void shadow_teardown(struct domain *d)
+/* Destroy the shadow pagetables of this domain and free its shadow memory.
+ * Should only be called for dying domains. */
+{
+    struct vcpu *v;
+    mfn_t mfn;
+
+    ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
+    ASSERT(d != current->domain);
+
+    if ( !shadow_lock_is_acquired(d) )
+        shadow_lock(d); /* Keep various asserts happy */
+
+    if ( shadow_mode_enabled(d) )
+    {
+        /* Release the shadow and monitor tables held by each vcpu */
+        for_each_vcpu(d, v)
+        {
+            shadow_detach_old_tables(v);
+            if ( shadow_mode_external(d) )
+            {
+                mfn = pagetable_get_mfn(v->arch.monitor_table);
+                if ( valid_mfn(mfn) && (mfn_x(mfn) != 0) )
+                    shadow_destroy_monitor_table(v, mfn);
+                v->arch.monitor_table = pagetable_null();
+            }
+        }
+    }
+
+    if ( d->arch.shadow.total_pages != 0 )
+    {
+        SHADOW_PRINTK("teardown of domain %u starts."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->domain_id,
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+        /* Destroy all the shadows and release memory to domheap */
+        set_sh_allocation(d, 0, NULL);
+        /* Release the hash table back to xenheap */
+        if (d->arch.shadow.hash_table) 
+            shadow_hash_teardown(d);
+        /* Release the log-dirty bitmap of dirtied pages */
+        sh_free_log_dirty_bitmap(d);
+        /* Should not have any more memory held */
+        SHADOW_PRINTK("teardown done."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+        ASSERT(d->arch.shadow.total_pages == 0);
+    }
+
+    /* We leave the "permanent" shadow modes enabled, but clear the
+     * log-dirty mode bit.  We don't want any more mark_dirty()
+     * calls now that we've torn down the bitmap */
+    d->arch.shadow.mode &= ~SHM2_log_dirty;
+
+    shadow_unlock(d);
+}
+
+void shadow_final_teardown(struct domain *d)
+/* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */
+{
+
+    SHADOW_PRINTK("dom %u final teardown starts."
+                   "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                   d->domain_id,
+                   d->arch.shadow.total_pages, 
+                   d->arch.shadow.free_pages, 
+                   d->arch.shadow.p2m_pages);
+
+    /* Double-check that the domain didn't have any shadow memory.  
+     * It is possible for a domain that never got domain_kill()ed
+     * to get here with its shadow allocation intact. */
+    if ( d->arch.shadow.total_pages != 0 )
+        shadow_teardown(d);
+
+    /* It is now safe to pull down the p2m map. */
+    if ( d->arch.shadow.p2m_pages != 0 )
+        shadow_p2m_teardown(d);
+
+    SHADOW_PRINTK("dom %u final teardown done."
+                   "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                   d->domain_id,
+                   d->arch.shadow.total_pages, 
+                   d->arch.shadow.free_pages, 
+                   d->arch.shadow.p2m_pages);
+}
+
+static int shadow_one_bit_enable(struct domain *d, u32 mode)
+/* Turn on a single shadow mode feature */
+{
+    ASSERT(shadow_lock_is_acquired(d));
+
+    /* Sanity check the call */
+    if ( d == current->domain || (d->arch.shadow.mode & mode) )
+    {
+        return -EINVAL;
+    }
+
+    if ( d->arch.shadow.mode == 0 )
+    {
+        /* Init the shadow memory allocation and the hash table */
+        if ( set_sh_allocation(d, 1, NULL) != 0 
+             || shadow_hash_alloc(d) != 0 )
+        {
+            set_sh_allocation(d, 0, NULL);
+            return -ENOMEM;
+        }
+    }
+
+    /* Update the bits */
+    sh_new_mode(d, d->arch.shadow.mode | mode);
+
+    return 0;
+}
+
+static int shadow_one_bit_disable(struct domain *d, u32 mode) 
+/* Turn off a single shadow mode feature */
+{
+    struct vcpu *v;
+    ASSERT(shadow_lock_is_acquired(d));
+
+    /* Sanity check the call */
+    if ( d == current->domain || !(d->arch.shadow.mode & mode) )
+    {
+        return -EINVAL;
+    }
+
+    /* Update the bits */
+    sh_new_mode(d, d->arch.shadow.mode & ~mode);
+    if ( d->arch.shadow.mode == 0 )
+    {
+        /* Get this domain off shadows */
+        SHADOW_PRINTK("un-shadowing of domain %u starts."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->domain_id,
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+        for_each_vcpu(d, v)
+        {
+            shadow_detach_old_tables(v);
+#if CONFIG_PAGING_LEVELS == 4
+            if ( !(v->arch.flags & TF_kernel_mode) )
+                make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
+            else
+#endif
+                make_cr3(v, pagetable_get_pfn(v->arch.guest_table));
+
+        }
+
+        /* Pull down the memory allocation */
+        if ( set_sh_allocation(d, 0, NULL) != 0 )
+        {
+            // XXX - How can this occur?
+            //       Seems like a bug to return an error now that we've
+            //       disabled the relevant shadow mode.
+            //
+            return -ENOMEM;
+        }
+        shadow_hash_teardown(d);
+        SHADOW_PRINTK("un-shadowing of domain %u done."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->domain_id,
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+    }
+
+    return 0;
+}
+
+/* Enable/disable ops for the "test" and "log-dirty" modes */
+int shadow_test_enable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    if ( shadow_mode_enabled(d) )
+    {
+        SHADOW_ERROR("Don't support enabling test mode"
+                      "on already shadowed doms\n");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = shadow_one_bit_enable(d, SHM2_enable);
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+
+    return ret;
+}
+
+int shadow_test_disable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+    ret = shadow_one_bit_disable(d, SHM2_enable);
+    shadow_unlock(d);
+    domain_unpause(d);
+
+    return ret;
+}
+
+static int
+sh_alloc_log_dirty_bitmap(struct domain *d)
+{
+    ASSERT(d->arch.shadow.dirty_bitmap == NULL);
+    d->arch.shadow.dirty_bitmap_size =
+        (d->shared_info->arch.max_pfn + (BITS_PER_LONG - 1)) &
+        ~(BITS_PER_LONG - 1);
+    d->arch.shadow.dirty_bitmap =
+        xmalloc_array(unsigned long,
+                      d->arch.shadow.dirty_bitmap_size / BITS_PER_LONG);
+    if ( d->arch.shadow.dirty_bitmap == NULL )
+    {
+        d->arch.shadow.dirty_bitmap_size = 0;
+        return -ENOMEM;
+    }
+    memset(d->arch.shadow.dirty_bitmap, 0, d->arch.shadow.dirty_bitmap_size/8);
+
+    return 0;
+}
+
+static void
+sh_free_log_dirty_bitmap(struct domain *d)
+{
+    d->arch.shadow.dirty_bitmap_size = 0;
+    if ( d->arch.shadow.dirty_bitmap )
+    {
+        xfree(d->arch.shadow.dirty_bitmap);
+        d->arch.shadow.dirty_bitmap = NULL;
+    }
+}
+
+static int shadow_log_dirty_enable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    if ( shadow_mode_log_dirty(d) )
+    {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if ( shadow_mode_enabled(d) )
+    {
+        SHADOW_ERROR("Don't (yet) support enabling log-dirty"
+                      "on already shadowed doms\n");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = sh_alloc_log_dirty_bitmap(d);
+    if ( ret != 0 )
+    {
+        sh_free_log_dirty_bitmap(d);
+        goto out;
+    }
+
+    ret = shadow_one_bit_enable(d, SHM2_log_dirty);
+    if ( ret != 0 )
+        sh_free_log_dirty_bitmap(d);
+
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+    return ret;
+}
+
+static int shadow_log_dirty_disable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+    ret = shadow_one_bit_disable(d, SHM2_log_dirty);
+    if ( !shadow_mode_log_dirty(d) )
+        sh_free_log_dirty_bitmap(d);
+    shadow_unlock(d);
+    domain_unpause(d);
+
+    return ret;
+}
+
+/**************************************************************************/
+/* P2M map manipulations */
+
+static void
+sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
+{
+    struct vcpu *v;
+
+    if ( !shadow_mode_translate(d) )
+        return;
+
+    v = current;
+    if ( v->domain != d )
+        v = d->vcpu[0];
+
+
+    SHADOW_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
+
+    ASSERT(mfn_x(sh_gfn_to_mfn(d, gfn)) == mfn);
+    //ASSERT(sh_mfn_to_gfn(d, mfn) == gfn);
+
+    shadow_remove_all_shadows_and_parents(v, _mfn(mfn));
+    if ( shadow_remove_all_mappings(v, _mfn(mfn)) )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+    shadow_set_p2m_entry(d, gfn, _mfn(INVALID_MFN));
+    set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+}
+
+void
+shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
+                                  unsigned long mfn)
+{
+    shadow_lock(d);
+    shadow_audit_p2m(d);
+    sh_p2m_remove_page(d, gfn, mfn);
+    shadow_audit_p2m(d);
+    shadow_unlock(d);    
+}
+
+void
+shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
+                               unsigned long mfn)
+{
+    struct vcpu *v;
+    unsigned long ogfn;
+    mfn_t omfn;
+
+    if ( !shadow_mode_translate(d) )
+        return;
+
+    v = current;
+    if ( v->domain != d )
+        v = d->vcpu[0];
+
+    shadow_lock(d);
+    shadow_audit_p2m(d);
+
+    SHADOW_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
+
+    omfn = sh_gfn_to_mfn(d, gfn);
+    if ( valid_mfn(omfn) )
+    {
+        /* Get rid of the old mapping, especially any shadows */
+        shadow_remove_all_shadows_and_parents(v, omfn);
+        if ( shadow_remove_all_mappings(v, omfn) )
+            flush_tlb_mask(d->domain_dirty_cpumask);
+        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+    }        
+
+    ogfn = sh_mfn_to_gfn(d, _mfn(mfn));
+    if (
+#ifdef __x86_64__
+        (ogfn != 0x5555555555555555L)
+#else
+        (ogfn != 0x55555555L)
+#endif
+        && (ogfn != INVALID_M2P_ENTRY)
+        && (ogfn != gfn) )
+    {
+        /* This machine frame is already mapped at another physical address */
+        SHADOW_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
+                       mfn, ogfn, gfn);
+        if ( valid_mfn(omfn = sh_gfn_to_mfn(d, ogfn)) ) 
+        {
+            SHADOW_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", 
+                           ogfn , mfn_x(omfn));
+            if ( mfn_x(omfn) == mfn ) 
+                sh_p2m_remove_page(d, ogfn, mfn);
+        }
+    }
+
+    shadow_set_p2m_entry(d, gfn, _mfn(mfn));
+    set_gpfn_from_mfn(mfn, gfn);
+    shadow_audit_p2m(d);
+    shadow_unlock(d);
+}
+
+/**************************************************************************/
+/* Log-dirty mode support */
+
+/* Convert a shadow to log-dirty mode. */
+void shadow_convert_to_log_dirty(struct vcpu *v, mfn_t smfn)
+{
+    BUG();
+}
+
+
+/* Read a domain's log-dirty bitmap and stats.  
+ * If the operation is a CLEAN, clear the bitmap and stats as well. */
+static int shadow_log_dirty_op(
+    struct domain *d, struct xen_domctl_shadow_op *sc)
+{
+    int i, rv = 0, clean = 0;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
+
+    SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", 
+                  (clean) ? "clean" : "peek",
+                  d->domain_id,
+                  d->arch.shadow.fault_count, 
+                  d->arch.shadow.dirty_count);
+
+    sc->stats.fault_count = d->arch.shadow.fault_count;
+    sc->stats.dirty_count = d->arch.shadow.dirty_count;    
+        
+    if ( clean ) 
+    {
+        struct list_head *l, *t;
+        struct page_info *pg;
+
+        /* Need to revoke write access to the domain's pages again. 
+         * In future, we'll have a less heavy-handed approach to this, 
+         * but for now, we just unshadow everything except Xen. */
+        list_for_each_safe(l, t, &d->arch.shadow.toplevel_shadows)
+        {
+            pg = list_entry(l, struct page_info, list);
+            shadow_unhook_mappings(d->vcpu[0], page_to_mfn(pg));
+        }
+
+        d->arch.shadow.fault_count = 0;
+        d->arch.shadow.dirty_count = 0;
+    }
+
+    if ( guest_handle_is_null(sc->dirty_bitmap) ||
+         (d->arch.shadow.dirty_bitmap == NULL) )
+    {
+        rv = -EINVAL;
+        goto out;
+    }
+ 
+    if ( sc->pages > d->arch.shadow.dirty_bitmap_size )
+        sc->pages = d->arch.shadow.dirty_bitmap_size; 
+
+#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
+    for ( i = 0; i < sc->pages; i += CHUNK )
+    {
+        int bytes = ((((sc->pages - i) > CHUNK) 
+                      ? CHUNK 
+                      : (sc->pages - i)) + 7) / 8;
+     
+        if ( copy_to_guest_offset(
+                 sc->dirty_bitmap, 
+                 i/(8*sizeof(unsigned long)),
+                 d->arch.shadow.dirty_bitmap + (i/(8*sizeof(unsigned long))),
+                 (bytes + sizeof(unsigned long) - 1) / sizeof(unsigned long)) )
+        {
+            rv = -EINVAL;
+            goto out;
+        }
+
+        if ( clean )
+            memset(d->arch.shadow.dirty_bitmap + (i/(8*sizeof(unsigned long))),
+                   0, bytes);
+    }
+#undef CHUNK
+
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+    return 0;
+}
+
+
+/* Mark a page as dirty */
+void sh_do_mark_dirty(struct domain *d, mfn_t gmfn)
+{
+    unsigned long pfn;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(shadow_mode_log_dirty(d));
+
+    if ( !valid_mfn(gmfn) )
+        return;
+
+    ASSERT(d->arch.shadow.dirty_bitmap != NULL);
+
+    /* We /really/ mean PFN here, even for non-translated guests. */
+    pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+
+    /*
+     * Values with the MSB set denote MFNs that aren't really part of the 
+     * domain's pseudo-physical memory map (e.g., the shared info frame).
+     * Nothing to do here...
+     */
+    if ( unlikely(!VALID_M2P(pfn)) )
+        return;
+
+    /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
+    if ( likely(pfn < d->arch.shadow.dirty_bitmap_size) ) 
+    { 
+        if ( !__test_and_set_bit(pfn, d->arch.shadow.dirty_bitmap) )
+        {
+            SHADOW_DEBUG(LOGDIRTY, 
+                          "marked mfn %" SH_PRI_mfn " (pfn=%lx), dom %d\n",
+                          mfn_x(gmfn), pfn, d->domain_id);
+            d->arch.shadow.dirty_count++;
+        }
+    }
+    else
+    {
+        SHADOW_PRINTK("mark_dirty OOR! "
+                       "mfn=%" SH_PRI_mfn " pfn=%lx max=%x (dom %d)\n"
+                       "owner=%d c=%08x t=%" PRtype_info "\n",
+                       mfn_x(gmfn), 
+                       pfn, 
+                       d->arch.shadow.dirty_bitmap_size,
+                       d->domain_id,
+                       (page_get_owner(mfn_to_page(gmfn))
+                        ? page_get_owner(mfn_to_page(gmfn))->domain_id
+                        : -1),
+                       mfn_to_page(gmfn)->count_info, 
+                       mfn_to_page(gmfn)->u.inuse.type_info);
+    }
+}
+
+
+/**************************************************************************/
+/* Shadow-control XEN_DOMCTL dispatcher */
+
+int shadow_domctl(struct domain *d, 
+                   xen_domctl_shadow_op_t *sc,
+                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+{
+    int rc, preempted = 0;
+
+    if ( unlikely(d == current->domain) )
+    {
+        DPRINTK("Don't try to do a shadow op on yourself!\n");
+        return -EINVAL;
+    }
+
+    switch ( sc->op )
+    {
+    case XEN_DOMCTL_SHADOW_OP_OFF:
+        if ( shadow_mode_log_dirty(d) )
+            if ( (rc = shadow_log_dirty_disable(d)) != 0 ) 
+                return rc;
+        if ( d->arch.shadow.mode & SHM2_enable )
+            if ( (rc = shadow_test_disable(d)) != 0 ) 
+                return rc;
+        return 0;
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
+        return shadow_test_enable(d);
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+        return shadow_log_dirty_enable(d);
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
+        return shadow_enable(d, SHM2_refcounts|SHM2_translate);
+
+    case XEN_DOMCTL_SHADOW_OP_CLEAN:
+    case XEN_DOMCTL_SHADOW_OP_PEEK:
+        return shadow_log_dirty_op(d, sc);
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE:
+        if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
+            return shadow_log_dirty_enable(d);
+        return shadow_enable(d, sc->mode << SHM2_shift);
+
+    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+        sc->mb = shadow_get_allocation(d);
+        return 0;
+
+    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+        rc = shadow_set_allocation(d, sc->mb, &preempted);
+        if ( preempted )
+            /* Not finished.  Set up to re-run the call. */
+            rc = hypercall_create_continuation(
+                __HYPERVISOR_domctl, "h", u_domctl);
+        else 
+            /* Finished.  Return the new allocation */
+            sc->mb = shadow_get_allocation(d);
+        return rc;
+
+    default:
+        SHADOW_ERROR("Bad shadow op %u\n", sc->op);
+        return -EINVAL;
+    }
+}
+
+
+/**************************************************************************/
+/* Auditing shadow tables */
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL
+
+void shadow_audit_tables(struct vcpu *v) 
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,2,2),  /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,2,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,2,2),  /* l2_32   */
+#else 
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,2),  /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,2),  /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,3),  /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2_pae  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2h_pae */
+        SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3),  /* l3_pae  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4),  /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,4,4),  /* l2_64   */
+        SHADOW_INTERNAL_NAME(sh_audit_l3_table,4,4),  /* l3_64   */
+        SHADOW_INTERNAL_NAME(sh_audit_l4_table,4,4),  /* l4_64   */
+#endif /* CONFIG_PAGING_LEVELS >= 4 */
+#endif /* CONFIG_PAGING_LEVELS > 2 */
+        NULL  /* All the rest */
+    };
+    unsigned int mask; 
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+    
+    if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL )
+        mask = ~1; /* Audit every table in the system */
+    else 
+    {
+        /* Audit only the current mode's tables */
+        switch ( v->arch.shadow.mode->guest_levels )
+        {
+        case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
+        case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
+                        |SHF_L2H_PAE|SHF_L3_PAE); break;
+        case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64  
+                        |SHF_L3_64|SHF_L4_64); break;
+        default: BUG();
+        }
+    }
+
+    hash_foreach(v, ~1, callbacks, _mfn(INVALID_MFN));
+}
+
+#endif /* Shadow audit */
+
+
+/**************************************************************************/
+/* Auditing p2m tables */
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_P2M
+
+void shadow_audit_p2m(struct domain *d)
+{
+    struct list_head *entry;
+    struct page_info *page;
+    struct domain *od;
+    unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
+    mfn_t p2mfn;
+    unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
+    int test_linear;
+    
+    if ( !(SHADOW_AUDIT_ENABLE) || !shadow_mode_translate(d) )
+        return;
+
+    //SHADOW_PRINTK("p2m audit starts\n");
+
+    test_linear = ( (d == current->domain) && current->arch.monitor_vtable );
+    if ( test_linear )
+        local_flush_tlb(); 
+
+    /* Audit part one: walk the domain's page allocation list, checking 
+     * the m2p entries. */
+    for ( entry = d->page_list.next;
+          entry != &d->page_list;
+          entry = entry->next )
+    {
+        page = list_entry(entry, struct page_info, list);
+        mfn = mfn_x(page_to_mfn(page));
+
+        // SHADOW_PRINTK("auditing guest page, mfn=%#lx\n", mfn); 
+
+        od = page_get_owner(page);
+
+        if ( od != d ) 
+        {
+            SHADOW_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
+                           mfn, od, (od?od->domain_id:-1), d, d->domain_id);
+            continue;
+        }
+
+        gfn = get_gpfn_from_mfn(mfn);
+        if ( gfn == INVALID_M2P_ENTRY ) 
+        {
+            orphans_i++;
+            //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
+            //               mfn); 
+            continue;
+        }
+
+        if ( gfn == 0x55555555 ) 
+        {
+            orphans_d++;
+            //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", 
+            //               mfn); 
+            continue;
+        }
+
+        p2mfn = sh_gfn_to_mfn_foreign(d, gfn);
+        if ( mfn_x(p2mfn) != mfn )
+        {
+            mpbad++;
+            SHADOW_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
+                           " (-> gfn %#lx)\n",
+                           mfn, gfn, mfn_x(p2mfn),
+                           (mfn_valid(p2mfn)
+                            ? get_gpfn_from_mfn(mfn_x(p2mfn))
+                            : -1u));
+            /* This m2p entry is stale: the domain has another frame in
+             * this physical slot.  No great disaster, but for neatness,
+             * blow away the m2p entry. */ 
+            set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+        }
+
+        if ( test_linear )
+        {
+            lp2mfn = get_mfn_from_gpfn(gfn);
+            if ( lp2mfn != mfn_x(p2mfn) )
+            {
+                SHADOW_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
+                               "(!= mfn %#lx)\n", gfn, lp2mfn, p2mfn);
+            }
+        }
+
+        // SHADOW_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", 
+        //                mfn, gfn, p2mfn, lp2mfn); 
+    }   
+
+    /* Audit part two: walk the domain's p2m table, checking the entries. */
+    if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
+    {
+        l2_pgentry_t *l2e;
+        l1_pgentry_t *l1e;
+        int i1, i2;
+        
+#if CONFIG_PAGING_LEVELS == 4
+        l4_pgentry_t *l4e;
+        l3_pgentry_t *l3e;
+        int i3, i4;
+        l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+#elif CONFIG_PAGING_LEVELS == 3
+        l3_pgentry_t *l3e;
+        int i3;
+        l3e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+#else /* CONFIG_PAGING_LEVELS == 2 */
+        l2e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+#endif
+
+        gfn = 0;
+#if CONFIG_PAGING_LEVELS >= 3
+#if CONFIG_PAGING_LEVELS >= 4
+        for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
+        {
+            if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
+            {
+                gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
+                continue;
+            }
+            l3e = sh_map_domain_page(_mfn(l4e_get_pfn(l4e[i4])));
+#endif /* now at levels 3 or 4... */
+            for ( i3 = 0; 
+                  i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); 
+                  i3++ )
+            {
+                if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
+                {
+                    gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
+                    continue;
+                }
+                l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[i3])));
+#endif /* all levels... */
+                for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
+                {
+                    if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
+                    {
+                        gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+                        continue;
+                    }
+                    l1e = sh_map_domain_page(_mfn(l2e_get_pfn(l2e[i2])));
+                    
+                    for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
+                    {
+                        if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+                            continue;
+                        mfn = l1e_get_pfn(l1e[i1]);
+                        ASSERT(valid_mfn(_mfn(mfn)));
+                        m2pfn = get_gpfn_from_mfn(mfn);
+                        if ( m2pfn != gfn )
+                        {
+                            pmbad++;
+                            SHADOW_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+                                           " -> gfn %#lx\n", gfn, mfn, m2pfn);
+                            BUG();
+                        }
+                    }
+                    sh_unmap_domain_page(l1e);
+                }
+#if CONFIG_PAGING_LEVELS >= 3
+                sh_unmap_domain_page(l2e);
+            }
+#if CONFIG_PAGING_LEVELS >= 4
+            sh_unmap_domain_page(l3e);
+        }
+#endif
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
+        sh_unmap_domain_page(l4e);
+#elif CONFIG_PAGING_LEVELS == 3
+        sh_unmap_domain_page(l3e);
+#else /* CONFIG_PAGING_LEVELS == 2 */
+        sh_unmap_domain_page(l2e);
+#endif
+
+    }
+
+    //SHADOW_PRINTK("p2m audit complete\n");
+    //if ( orphans_i | orphans_d | mpbad | pmbad ) 
+    //    SHADOW_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
+    //                   orphans_i + orphans_d, orphans_i, orphans_d,
+    if ( mpbad | pmbad ) 
+        SHADOW_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
+                       pmbad, mpbad);
+}
+
+#endif /* p2m audit */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End: 
+ */
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/mm/shadow/multi.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c    Mon Aug 28 12:09:36 2006 +0100
@@ -0,0 +1,4492 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/multi.c
+ *
+ * Simple, mostly-synchronous shadow page tables. 
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+// DESIGN QUESTIONS:
+// Why use subshadows for PAE guests?
+// - reduces pressure in the hash table
+// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
+// - would need to find space in the page_info to store 7 more bits of
+//   backpointer
+// - independent shadows of 32 byte chunks makes it non-obvious how to quickly
+//   figure out when to demote the guest page from l3 status
+//
+// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
+// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
+//   space for both PV and HVM guests.
+//
+
+#define SHADOW 1
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/shadow.h>
+#include <asm/flushtlb.h>
+#include <asm/hvm/hvm.h>
+#include "private.h"
+#include "types.h"
+
+/* The first cut: an absolutely synchronous, trap-and-emulate version,
+ * supporting only HVM guests (and so only "external" shadow mode). 
+ *
+ * THINGS TO DO LATER:
+ * 
+ * FIX GVA_TO_GPA
+ * The current interface returns an unsigned long, which is not big enough
+ * to hold a physical address in PAE.  Should return a gfn instead.
+ * 
+ * TEARDOWN HEURISTICS
+ * Also: have a heuristic for when to destroy a previous paging-mode's 
+ * shadows.  When a guest is done with its start-of-day 32-bit tables
+ * and reuses the memory we want to drop those shadows.  Start with 
+ * shadows in a page in two modes as a hint, but beware of clever tricks 
+ * like reusing a pagetable for both PAE and 64-bit during boot...
+ *
+ * PAE LINEAR MAPS
+ * Rework shadow_get_l*e() to have the option of using map_domain_page()
+ * instead of linear maps.  Add appropriate unmap_l*e calls in the users. 
+ * Then we can test the speed difference made by linear maps.  If the 
+ * map_domain_page() version is OK on PAE, we could maybe allow a lightweight 
+ * l3-and-l2h-only shadow mode for PAE PV guests that would allow them 
+ * to share l2h pages again. 
+ *
+ * PAE L3 COPYING
+ * In this code, we copy all 32 bytes of a PAE L3 every time we change an 
+ * entry in it, and every time we change CR3.  We copy it for the linear 
+ * mappings (ugh! PAE linear mappings) and we copy it to the low-memory
+ * buffer so it fits in CR3.  Maybe we can avoid some of this recopying 
+ * by using the shadow directly in some places. 
+ * Also, for SMP, need to actually respond to seeing shadow.pae_flip_pending.
+ *
+ * GUEST_WALK_TABLES TLB FLUSH COALESCE
+ * guest_walk_tables can do up to three remote TLB flushes as it walks to
+ * the first l1 of a new pagetable.  Should coalesce the flushes to the end, 
+ * and if we do flush, re-do the walk.  If anything has changed, then 
+ * pause all the other vcpus and do the walk *again*.
+ *
+ * WP DISABLED
+ * Consider how to implement having the WP bit of CR0 set to 0.  
+ * Since we need to be able to cause write faults to pagetables, this might
+ * end up looking like not having the (guest) pagetables present at all in 
+ * HVM guests...
+ *
+ * PSE disabled / PSE36
+ * We don't support any modes other than PSE enabled, PSE36 disabled.
+ * Neither of those would be hard to change, but we'd need to be able to 
+ * deal with shadows made in one mode and used in another.
+ */
+
+#define FETCH_TYPE_PREFETCH 1
+#define FETCH_TYPE_DEMAND   2
+#define FETCH_TYPE_WRITE    4
+typedef enum {
+    ft_prefetch     = FETCH_TYPE_PREFETCH,
+    ft_demand_read  = FETCH_TYPE_DEMAND,
+    ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,
+} fetch_type_t;
+
+#ifdef DEBUG_TRACE_DUMP
+static char *fetch_type_names[] = {
+    [ft_prefetch]     "prefetch",
+    [ft_demand_read]  "demand read",
+    [ft_demand_write] "demand write",
+};
+#endif
+
+/* XXX forward declarations */
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, 
int clear_res);
+#endif
+static inline void sh_update_linear_entries(struct vcpu *v);
+
+/**************************************************************************/
+/* Hash table mapping from guest pagetables to shadows
+ *
+ * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
+ * FL1's:       maps the *gfn* of the start of a superpage to the mfn of a
+ *              shadow L1 which maps its "splinters".
+ * PAE CR3s:    maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
+ *              PAE L3 info page for that CR3 value.
+ */
+
+static inline mfn_t 
+get_fl1_shadow_status(struct vcpu *v, gfn_t gfn)
+/* Look for FL1 shadows in the hash table */
+{
+    mfn_t smfn = shadow_hash_lookup(v, gfn_x(gfn),
+                                     PGC_SH_fl1_shadow >> PGC_SH_type_shift);
+
+    if ( unlikely(shadow_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
+    {
+        struct page_info *page = mfn_to_page(smfn);
+        if ( !(page->count_info & PGC_SH_log_dirty) )
+            shadow_convert_to_log_dirty(v, smfn);
+    }
+
+    return smfn;
+}
+
+static inline mfn_t 
+get_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
+/* Look for shadows in the hash table */
+{
+    mfn_t smfn = shadow_hash_lookup(v, mfn_x(gmfn),
+                                     shadow_type >> PGC_SH_type_shift);
+    perfc_incrc(shadow_get_shadow_status);
+
+    if ( unlikely(shadow_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
+    {
+        struct page_info *page = mfn_to_page(smfn);
+        if ( !(page->count_info & PGC_SH_log_dirty) )
+            shadow_convert_to_log_dirty(v, smfn);
+    }
+
+    return smfn;
+}
+
+static inline void 
+set_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
+/* Put an FL1 shadow into the hash table */
+{
+    SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
+                   gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
+
+    if ( unlikely(shadow_mode_log_dirty(v->domain)) )
+        // mark this shadow as a log dirty shadow...
+        set_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+    else
+        clear_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+
+    shadow_hash_insert(v, gfn_x(gfn),
+                        PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
+}
+
+static inline void 
+set_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
+/* Put a shadow into the hash table */
+{
+    struct domain *d = v->domain;
+    int res;
+
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
+                   d->domain_id, v->vcpu_id, mfn_x(gmfn),
+                   shadow_type, mfn_x(smfn));
+
+    if ( unlikely(shadow_mode_log_dirty(d)) )
+        // mark this shadow as a log dirty shadow...
+        set_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+    else
+        clear_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+
+    res = get_page(mfn_to_page(gmfn), d);
+    ASSERT(res == 1);
+
+    shadow_hash_insert(v, mfn_x(gmfn), shadow_type >> PGC_SH_type_shift,
+                        smfn);
+}
+
+static inline void 
+delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
+/* Remove a shadow from the hash table */
+{
+    SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
+                   gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
+
+    shadow_hash_delete(v, gfn_x(gfn),
+                        PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
+}
+
+static inline void 
+delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
+/* Remove a shadow from the hash table */
+{
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
+                   v->domain->domain_id, v->vcpu_id,
+                   mfn_x(gmfn), shadow_type, mfn_x(smfn));
+    shadow_hash_delete(v, mfn_x(gmfn),
+                        shadow_type >> PGC_SH_type_shift, smfn);
+    put_page(mfn_to_page(gmfn));
+}
+
+/**************************************************************************/
+/* CPU feature support querying */
+
+static inline int
+guest_supports_superpages(struct vcpu *v)
+{
+    /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
+     * CR4.PSE is set or the guest is in PAE or long mode */
+    return (hvm_guest(v) && (GUEST_PAGING_LEVELS != 2 
+                             || (hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PSE)));
+}
+
+static inline int
+guest_supports_nx(struct vcpu *v)
+{
+    if ( !hvm_guest(v) )
+        return cpu_has_nx;
+
+    // XXX - fix this!
+    return 1;
+}
+
+
+/**************************************************************************/
+/* Functions for walking the guest page tables */
+
+
+/* Walk the guest pagetables, filling the walk_t with what we see. 
+ * Takes an uninitialised walk_t.  The caller must call unmap_walk() 
+ * on the walk_t before discarding it or calling guest_walk_tables again. 
+ * If "guest_op" is non-zero, we are serving a genuine guest memory access, 
+ * and must (a) be under the shadow lock, and (b) remove write access
+ * from any gueat PT pages we see, as we will be using their contents to 
+ * perform shadow updates.
+ * Returns 0 for success or non-zero if the guest pagetables are malformed.
+ * N.B. Finding a not-present entry does not cause a non-zero return code. */
+static inline int 
+guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
+{
+    ASSERT(!guest_op || shadow_lock_is_acquired(v->domain));
+
+    perfc_incrc(shadow_guest_walk);
+    memset(gw, 0, sizeof(*gw));
+    gw->va = va;
+
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    /* Get l4e from the top level table */
+    gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
+    gw->l4e = (guest_l4e_t *)v->arch.guest_vtable + guest_l4_table_offset(va);
+    /* Walk down to the l3e */
+    if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
+    gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e));
+    if ( !valid_mfn(gw->l3mfn) ) return 1;
+    /* This mfn is a pagetable: make sure the guest can't write to it. */
+    if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
+        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+    gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
+        + guest_l3_table_offset(va);
+#else /* PAE only... */
+    /* Get l3e from the top level table */
+    gw->l3mfn = pagetable_get_mfn(v->arch.guest_table);
+    gw->l3e = (guest_l3e_t *)v->arch.guest_vtable + guest_l3_table_offset(va);
+#endif /* PAE or 64... */
+    /* Walk down to the l2e */
+    if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
+    gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e));
+    if ( !valid_mfn(gw->l2mfn) ) return 1;
+    /* This mfn is a pagetable: make sure the guest can't write to it. */
+    if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
+        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+    gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
+        + guest_l2_table_offset(va);
+#else /* 32-bit only... */
+    /* Get l2e from the top level table */
+    gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
+    gw->l2e = (guest_l2e_t *)v->arch.guest_vtable + guest_l2_table_offset(va);
+#endif /* All levels... */
+    
+    if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
+    if ( guest_supports_superpages(v) &&
+         (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) ) 
+    {
+        /* Special case: this guest VA is in a PSE superpage, so there's
+         * no guest l1e.  We make one up so that the propagation code
+         * can generate a shadow l1 table.  Start with the gfn of the 
+         * first 4k-page of the superpage. */
+        gfn_t start = guest_l2e_get_gfn(*gw->l2e);
+        /* Grant full access in the l1e, since all the guest entry's 
+         * access controls are enforced in the shadow l2e.  This lets 
+         * us reflect l2 changes later without touching the l1s. */
+        int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
+                     _PAGE_ACCESSED|_PAGE_DIRTY);
+        /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
+         * of the level 1 */
+        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) 
+            flags |= _PAGE_PAT; 
+        /* Increment the pfn by the right number of 4k pages.  
+         * The ~0x1 is to mask out the PAT bit mentioned above. */
+        start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
+        gw->eff_l1e = guest_l1e_from_gfn(start, flags);
+        gw->l1e = NULL;
+        gw->l1mfn = _mfn(INVALID_MFN);
+    } 
+    else 
+    {
+        /* Not a superpage: carry on and find the l1e. */
+        gw->l1mfn = vcpu_gfn_to_mfn(v, guest_l2e_get_gfn(*gw->l2e));
+        if ( !valid_mfn(gw->l1mfn) ) return 1;
+        /* This mfn is a pagetable: make sure the guest can't write to it. */
+        if ( guest_op 
+             && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
+            flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+        gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
+            + guest_l1_table_offset(va);
+        gw->eff_l1e = *gw->l1e;
+    }
+
+    return 0;
+}
+
+/* Given a walk_t, translate the gw->va into the guest's notion of the
+ * corresponding frame number. */
+static inline gfn_t
+guest_walk_to_gfn(walk_t *gw)
+{
+    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+        return _gfn(INVALID_GFN);
+    return guest_l1e_get_gfn(gw->eff_l1e);
+}
+
+/* Given a walk_t, translate the gw->va into the guest's notion of the
+ * corresponding physical address. */
+static inline paddr_t
+guest_walk_to_gpa(walk_t *gw)
+{
+    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+        return 0;
+    return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
+}
+
+
+/* Unmap (and reinitialise) a guest walk.  
+ * Call this to dispose of any walk filled in by guest_walk_tables() */
+static void unmap_walk(struct vcpu *v, walk_t *gw)
+{
+#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
+    if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e);
+#endif
+    if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e);
+#endif
+    if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e);
+#ifdef DEBUG
+    memset(gw, 0, sizeof(*gw));
+#endif
+}
+
+
+/* Pretty-print the contents of a guest-walk */
+static inline void print_gw(walk_t *gw)
+{
+    SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va);
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    SHADOW_PRINTK("   l4mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l4mfn));
+    SHADOW_PRINTK("   l4e=%p\n", gw->l4e);
+    if ( gw->l4e )
+        SHADOW_PRINTK("   *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4);
+#endif /* PAE or 64... */
+    SHADOW_PRINTK("   l3mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l3mfn));
+    SHADOW_PRINTK("   l3e=%p\n", gw->l3e);
+    if ( gw->l3e )
+        SHADOW_PRINTK("   *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3);
+#endif /* All levels... */
+    SHADOW_PRINTK("   l2mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l2mfn));
+    SHADOW_PRINTK("   l2e=%p\n", gw->l2e);
+    if ( gw->l2e )
+        SHADOW_PRINTK("   *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2);
+    SHADOW_PRINTK("   l1mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l1mfn));
+    SHADOW_PRINTK("   l1e=%p\n", gw->l1e);
+    if ( gw->l1e )
+        SHADOW_PRINTK("   *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1);
+    SHADOW_PRINTK("   eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1);
+}
+
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
+/* Lightweight audit: pass all the shadows associated with this guest walk
+ * through the audit mechanisms */
+static void sh_audit_gw(struct vcpu *v, walk_t *gw) 
+{
+    mfn_t smfn;
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    if ( valid_mfn(gw->l4mfn)
+         && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, 
+                                                PGC_SH_l4_shadow))) )
+        (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
+#endif /* PAE or 64... */
+    if ( valid_mfn(gw->l3mfn)
+         && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, 
+                                                PGC_SH_l3_shadow))) )
+        (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
+#endif /* All levels... */
+    if ( valid_mfn(gw->l2mfn) )
+    {
+        if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
+                                                 PGC_SH_l2_shadow))) )
+            (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
+#if GUEST_PAGING_LEVELS == 3
+        if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
+                                                 PGC_SH_l2h_shadow))) )
+            (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
+#endif
+    }
+    if ( valid_mfn(gw->l1mfn)
+         && valid_mfn((smfn = get_shadow_status(v, gw->l1mfn, 
+                                                PGC_SH_l1_shadow))) )
+        (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
+    else if ( gw->l2e
+              && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
+              && valid_mfn( 
+              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
+        (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
+}
+
+#else
+#define sh_audit_gw(_v, _gw) do {} while(0)
+#endif /* audit code */
+
+
+
+/**************************************************************************/
+/* Function to write to the guest tables, for propagating accessed and 
+ * dirty bits from the shadow to the guest.
+ * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
+ * and an operation type.  The guest entry is always passed as an l1e: 
+ * since we only ever write flags, that's OK.
+ * Returns the new flag bits of the guest entry. */
+
+static u32 guest_set_ad_bits(struct vcpu *v,
+                             mfn_t gmfn, 
+                             guest_l1e_t *ep,
+                             unsigned int level, 
+                             fetch_type_t ft)
+{
+    u32 flags, shflags, bit;
+    struct page_info *pg;
+    int res = 0;
+
+    ASSERT(valid_mfn(gmfn)
+           && (sh_mfn_is_a_page_table(gmfn)
+               || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) 
+                   == 0)));
+    ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
+    ASSERT(level <= GUEST_PAGING_LEVELS);
+    ASSERT(ft == ft_demand_read || ft == ft_demand_write);
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    flags = guest_l1e_get_flags(*ep);
+
+    /* PAE l3s do not have A and D bits */
+    if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
+        return flags;
+
+    /* Need the D bit as well for writes, in l1es and 32bit/PAE PSE l2es. */
+    if ( ft == ft_demand_write  
+         && (level == 1 || 
+             (level == 2 && GUEST_PAGING_LEVELS < 4 
+              && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
+    {
+        if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) 
+             == (_PAGE_DIRTY | _PAGE_ACCESSED) )
+            return flags;  /* Guest already has A and D bits set */
+        flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
+        perfc_incrc(shadow_ad_update);
+    }
+    else 
+    {
+        if ( flags & _PAGE_ACCESSED )
+            return flags;  /* Guest already has A bit set */
+        flags |= _PAGE_ACCESSED;
+        perfc_incrc(shadow_a_update);
+    }
+
+    /* Set the bit(s) */
+    sh_mark_dirty(v->domain, gmfn);
+    SHADOW_DEBUG(A_AND_D, "gfn = %"SH_PRI_gfn", "
+                  "old flags = %#x, new flags = %#x\n", 
+                  guest_l1e_get_gfn(*ep), guest_l1e_get_flags(*ep), flags);
+    *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
+    
+    /* May need to propagate this change forward to other kinds of shadow */
+    pg = mfn_to_page(gmfn);
+    if ( !sh_mfn_is_a_page_table(gmfn) ) 
+    {
+        /* This guest pagetable is not yet shadowed at all. */
+        // MAF: I think this assert is busted...  If this gmfn has not yet
+        // been promoted, then it seems perfectly reasonable for there to be
+        // outstanding type refs to it...
+        /* TJD: No. If the gmfn has not been promoted, we must at least 
+         * have recognised that it is a pagetable, and pulled write access.
+         * The type count should only be non-zero if it is actually a page 
+         * table.  The test above was incorrect, though, so I've fixed it. */
+        ASSERT((pg->u.inuse.type_info & PGT_count_mask) == 0);
+        return flags;  
+    }
+
+    shflags = pg->shadow_flags & SHF_page_type_mask;
+    while ( shflags )
+    {
+        bit = find_first_set_bit(shflags);
+        ASSERT(shflags & (1u << bit));
+        shflags &= ~(1u << bit);
+        if ( !(pg->shadow_flags & (1u << bit)) )
+            continue;
+        switch ( bit )
+        {
+        case PGC_SH_type_to_index(PGC_SH_l1_shadow):
+            if (level != 1) 
+                res |= sh_map_and_validate_gl1e(v, gmfn, ep, sizeof (*ep));
+            break;
+        case PGC_SH_type_to_index(PGC_SH_l2_shadow):
+            if (level != 2) 
+                res |= sh_map_and_validate_gl2e(v, gmfn, ep, sizeof (*ep));
+            break;
+#if GUEST_PAGING_LEVELS == 3 /* PAE only */
+        case PGC_SH_type_to_index(PGC_SH_l2h_shadow):
+            if (level != 2) 
+                res |= sh_map_and_validate_gl2he(v, gmfn, ep, sizeof (*ep));
+            break;
+#endif
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+        case PGC_SH_type_to_index(PGC_SH_l3_shadow):
+            if (level != 3) 
+                res |= sh_map_and_validate_gl3e(v, gmfn, ep, sizeof (*ep));
+            break;
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+        case PGC_SH_type_to_index(PGC_SH_l4_shadow):
+            if (level != 4) 
+                res |= sh_map_and_validate_gl4e(v, gmfn, ep, sizeof (*ep));
+            break;
+#endif 
+#endif
+        default:
+            SHADOW_ERROR("mfn %"SH_PRI_mfn" is shadowed in multiple "
+                          "modes: A&D bits may be out of sync (flags=%#x).\n", 
+                          mfn_x(gmfn), pg->shadow_flags); 
+            /* XXX Shadows in other modes will not be updated, so will
+             * have their A and D bits out of sync. */
+        }
+    }
+    
+    /* We should never need to flush the TLB or recopy PAE entries */
+    ASSERT( res == 0 || res == SHADOW_SET_CHANGED );
+    return flags;
+}
+
+/**************************************************************************/
+/* Functions to compute the correct index into a shadow page, given an
+ * index into the guest page (as returned by guest_get_index()).
+ * This is trivial when the shadow and guest use the same sized PTEs, but
+ * gets more interesting when those sizes are mismatched (e.g. 32-bit guest,
+ * PAE- or 64-bit shadows).
+ *
+ * These functions also increment the shadow mfn, when necessary.  When PTE
+ * sizes are mismatched, it takes 2 shadow L1 pages for a single guest L1
+ * page.  In this case, we allocate 2 contiguous pages for the shadow L1, and
+ * use simple pointer arithmetic on a pointer to the guest L1e to figure out
+ * which shadow page we really want.  Similarly, when PTE sizes are
+ * mismatched, we shadow a guest L2 page with 4 shadow L2 pages.  (The easiest
+ * way to see this is: a 32-bit guest L2 page maps 4GB of virtual address
+ * space, while a PAE- or 64-bit shadow L2 page maps 1GB of virtual address
+ * space.)
+ *
+ * For PAE guests, for every 32-bytes of guest L3 page table, we use 64-bytes
+ * of shadow (to store both the shadow, and the info that would normally be
+ * stored in page_info fields).  This arrangement allows the shadow and the
+ * "page_info" fields to always be stored in the same page (in fact, in
+ * the same cache line), avoiding an extra call to map_domain_page().
+ */
+
+static inline u32
+guest_index(void *ptr)
+{
+    return (u32)((unsigned long)ptr & ~PAGE_MASK) / sizeof(guest_l1e_t);
+}
+
+static inline u32
+shadow_l1_index(mfn_t *smfn, u32 guest_index)
+{
+#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
+    *smfn = _mfn(mfn_x(*smfn) +
+                 (guest_index / SHADOW_L1_PAGETABLE_ENTRIES));
+    return (guest_index % SHADOW_L1_PAGETABLE_ENTRIES);
+#else
+    return guest_index;
+#endif
+}
+
+static inline u32
+shadow_l2_index(mfn_t *smfn, u32 guest_index)
+{
+#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
+    // Because we use 2 shadow l2 entries for each guest entry, the number of
+    // guest entries per shadow page is SHADOW_L2_PAGETABLE_ENTRIES/2
+    //
+    *smfn = _mfn(mfn_x(*smfn) +
+                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
+
+    // We multiple by two to get the index of the first of the two entries
+    // used to shadow the specified guest entry.
+    return (guest_index % (SHADOW_L2_PAGETABLE_ENTRIES / 2)) * 2;
+#else
+    return guest_index;
+#endif
+}
+
+#if GUEST_PAGING_LEVELS >= 3
+
+static inline u32
+shadow_l3_index(mfn_t *smfn, u32 guest_index)
+{
+#if GUEST_PAGING_LEVELS == 3
+    u32 group_id;
+
+    // Because we use twice the space in L3 shadows as was consumed in guest
+    // L3s, the number of guest entries per shadow page is
+    // SHADOW_L2_PAGETABLE_ENTRIES/2.  (Note this is *not*
+    // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
+    //
+    *smfn = _mfn(mfn_x(*smfn) +
+                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
+
+    // We store PAE L3 shadows in groups of 4, alternating shadows and
+    // pae_l3_bookkeeping structs.  So the effective shadow index is
+    // the the group_id * 8 + the offset within the group.
+    //
+    guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
+    group_id = guest_index / 4;
+    return (group_id * 8) + (guest_index % 4);
+#else
+    return guest_index;
+#endif
+}
+
+#endif // GUEST_PAGING_LEVELS >= 3
+
+#if GUEST_PAGING_LEVELS >= 4
+
+static inline u32
+shadow_l4_index(mfn_t *smfn, u32 guest_index)
+{
+    return guest_index;
+}
+
+#endif // GUEST_PAGING_LEVELS >= 4
+
+
+/**************************************************************************/
+/* Functions which compute shadow entries from their corresponding guest
+ * entries.
+ *
+ * These are the "heart" of the shadow code.
+ *
+ * There are two sets of these: those that are called on demand faults (read
+ * faults and write faults), and those that are essentially called to
+ * "prefetch" (or propagate) entries from the guest into the shadow.  The read
+ * fault and write fault are handled as two separate cases for L1 entries (due
+ * to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together
+ * into the respective demand_fault functions.
+ */
+
+#define CHECK(_cond)                                    \
+do {                                                    \
+    if (unlikely(!(_cond)))                             \
+    {                                                   \
+        printk("%s %s %d ASSERTION (%s) FAILED\n",      \
+               __func__, __FILE__, __LINE__, #_cond);   \
+        return -1;                                      \
+    }                                                   \
+} while (0);
+
+// The function below tries to capture all of the flag manipulation for the
+// demand and propagate functions into one place.
+//
+static always_inline u32
+sh_propagate_flags(struct vcpu *v, mfn_t target_mfn, 
+                    u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn, 
+                    int mmio, int level, fetch_type_t ft)
+{
+    struct domain *d = v->domain;
+    u32 pass_thru_flags;
+    u32 sflags;
+
+    // XXX -- might want to think about PAT support for HVM guests...
+
+#ifndef NDEBUG
+    // MMIO can only occur from L1e's
+    //
+    if ( mmio )
+        CHECK(level == 1);
+
+    // We should always have a pointer to the guest entry if it's a non-PSE
+    // non-MMIO demand access.
+    if ( ft & FETCH_TYPE_DEMAND )
+        CHECK(guest_entry_ptr || level == 1);
+#endif
+
+    // A not-present guest entry has a special signature in the shadow table,
+    // so that we do not have to consult the guest tables multiple times...
+    //
+    if ( unlikely(!(gflags & _PAGE_PRESENT)) )
+        return _PAGE_SHADOW_GUEST_NOT_PRESENT;
+
+    // Must have a valid target_mfn, unless this is mmio, or unless this is a
+    // prefetch.  In the case of a prefetch, an invalid mfn means that we can
+    // not usefully shadow anything, and so we return early.
+    //
+    if ( !valid_mfn(target_mfn) )
+    {
+        CHECK((ft == ft_prefetch) || mmio);
+        if ( !mmio )
+            return 0;
+    }
+
+    // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
+    //
+    if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
+        pass_thru_flags = _PAGE_PRESENT;
+    else
+    {
+        pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
+                           _PAGE_RW | _PAGE_PRESENT);
+        if ( guest_supports_nx(v) )
+            pass_thru_flags |= _PAGE_NX_BIT;
+    }
+
+    // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
+    // L3e's; they are all implied.  So we emulate them here.
+    //
+    if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
+        gflags = pass_thru_flags;
+
+    // Propagate bits from the guest to the shadow.
+    // Some of these may be overwritten, below.
+    // Since we know the guest's PRESENT bit is set, we also set the shadow's
+    // SHADOW_PRESENT bit.
+    //
+    sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
+
+    // Copy the guest's RW bit into the SHADOW_RW bit.
+    //
+    if ( gflags & _PAGE_RW )
+        sflags |= _PAGE_SHADOW_RW;
+
+    // Set the A&D bits for higher level shadows.
+    // Higher level entries do not, strictly speaking, have dirty bits, but
+    // since we use shadow linear tables, each of these entries may, at some
+    // point in time, also serve as a shadow L1 entry.
+    // By setting both the  A&D bits in each of these, we eliminate the burden
+    // on the hardware to update these bits on initial accesses.
+    //
+    if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
+        sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
+
+
+    // Set the A and D bits in the guest entry, if we need to.
+    if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
+        gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
+    
+    // If the A or D bit has not yet been set in the guest, then we must
+    // prevent the corresponding kind of access.
+    //
+    if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
+                  !(gflags & _PAGE_ACCESSED)) )
+        sflags &= ~_PAGE_PRESENT;
+
+    /* D bits exist in l1es, and 32bit/PAE PSE l2es, but not 64bit PSE l2es */
+    if ( unlikely( ((level == 1) 
+                    || ((level == 2) && (GUEST_PAGING_LEVELS < 4) 
+                        && guest_supports_superpages(v) &&
+                        (gflags & _PAGE_PSE)))
+                   && !(gflags & _PAGE_DIRTY)) )
+        sflags &= ~_PAGE_RW;
+
+    // MMIO caching
+    //
+    // MMIO mappings are marked as not present, but we set the SHADOW_MMIO bit
+    // to cache the fact that this entry  is in MMIO space.
+    //
+    if ( (level == 1) && mmio )
+    {
+        sflags &= ~(_PAGE_PRESENT);
+        sflags |= _PAGE_SHADOW_MMIO;
+    }
+    else 
+    {
+        // shadow_mode_log_dirty support
+        //
+        // Only allow the guest write access to a page a) on a demand fault,
+        // or b) if the page is already marked as dirty.
+        //
+        if ( unlikely((level == 1) &&
+                      !(ft & FETCH_TYPE_WRITE) &&
+                      shadow_mode_log_dirty(d) &&
+                      !sh_mfn_is_dirty(d, target_mfn)) )
+        {
+            sflags &= ~_PAGE_RW;
+        }
+        
+        // protect guest page tables
+        //
+        if ( unlikely((level == 1) &&
+                      sh_mfn_is_a_page_table(target_mfn)) )
+        {
+            if ( shadow_mode_trap_reads(d) )
+            {
+                // if we are trapping both reads & writes, then mark this page
+                // as not present...
+                //
+                sflags &= ~_PAGE_PRESENT;
+            }
+            else
+            {
+                // otherwise, just prevent any writes...
+                //
+                sflags &= ~_PAGE_RW;
+            }
+        }
+    }
+
+    return sflags;
+}
+
+#undef CHECK
+
+#if GUEST_PAGING_LEVELS >= 4
+static void
+l4e_propagate_from_guest(struct vcpu *v, 
+                         guest_l4e_t *gl4e,
+                         mfn_t gl4mfn,
+                         mfn_t sl3mfn,
+                         shadow_l4e_t *sl4p,
+                         fetch_type_t ft)
+{
+    u32 gflags = guest_l4e_get_flags(*gl4e);
+    u32 sflags = sh_propagate_flags(v, sl3mfn, gflags, (guest_l1e_t *) gl4e,
+                                     gl4mfn, 0, 4, ft);
+
+    *sl4p = shadow_l4e_from_mfn(sl3mfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "%s gl4e=%" SH_PRI_gpte " sl4e=%" SH_PRI_pte "\n",
+                  fetch_type_names[ft], gl4e->l4, sl4p->l4);
+    ASSERT(sflags != -1);
+}
+#endif // GUEST_PAGING_LEVELS >= 4
+
+#if GUEST_PAGING_LEVELS >= 3
+static void
+l3e_propagate_from_guest(struct vcpu *v,
+                         guest_l3e_t *gl3e,
+                         mfn_t gl3mfn, 
+                         mfn_t sl2mfn, 
+                         shadow_l3e_t *sl3p,
+                         fetch_type_t ft)
+{
+    u32 gflags = guest_l3e_get_flags(*gl3e);
+    u32 sflags = sh_propagate_flags(v, sl2mfn, gflags, (guest_l1e_t *) gl3e,
+                                     gl3mfn, 0, 3, ft);
+
+    *sl3p = shadow_l3e_from_mfn(sl2mfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "%s gl3e=%" SH_PRI_gpte " sl3e=%" SH_PRI_pte "\n",
+                  fetch_type_names[ft], gl3e->l3, sl3p->l3);
+    ASSERT(sflags != -1);
+}
+#endif // GUEST_PAGING_LEVELS >= 3
+
+static void
+l2e_propagate_from_guest(struct vcpu *v, 
+                         guest_l2e_t *gl2e,
+                         mfn_t gl2mfn,
+                         mfn_t sl1mfn, 
+                         shadow_l2e_t *sl2p,
+                         fetch_type_t ft)
+{
+    u32 gflags = guest_l2e_get_flags(*gl2e);
+    u32 sflags = sh_propagate_flags(v, sl1mfn, gflags, (guest_l1e_t *) gl2e, 
+                                     gl2mfn, 0, 2, ft);
+
+    *sl2p = shadow_l2e_from_mfn(sl1mfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "%s gl2e=%" SH_PRI_gpte " sl2e=%" SH_PRI_pte "\n",
+                  fetch_type_names[ft], gl2e->l2, sl2p->l2);
+    ASSERT(sflags != -1);
+}
+
+static inline int
+l1e_read_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
+               int mmio)
+/* returns 1 if emulation is required, and 0 otherwise */
+{
+    struct domain *d = v->domain;
+    u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
+    u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
+                                     mmio, 1, ft_demand_read);
+
+    if ( shadow_mode_trap_reads(d) && !mmio && sh_mfn_is_a_page_table(gmfn) )
+    {
+        // emulation required!
+        *sl1p = shadow_l1e_empty();
+        return 1;
+    }
+
+    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
+                  (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
+
+    ASSERT(sflags != -1);
+    return 0;
+}
+
+static inline int
+l1e_write_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
+                int mmio)
+/* returns 1 if emulation is required, and 0 otherwise */
+{
+    struct domain *d = v->domain;
+    u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
+    u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
+                                     mmio, 1, ft_demand_write);
+
+    sh_mark_dirty(d, gmfn);
+
+    if ( !mmio && sh_mfn_is_a_page_table(gmfn) )
+    {
+        // emulation required!
+        *sl1p = shadow_l1e_empty();
+        return 1;
+    }
+
+    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
+                  (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
+
+    ASSERT(sflags != -1);
+    return 0;
+}
+
+static inline void
+l1e_propagate_from_guest(struct vcpu *v, guest_l1e_t gl1e, shadow_l1e_t *sl1p,
+                         int mmio)
+{
+    gfn_t gfn = guest_l1e_get_gfn(gl1e);
+    mfn_t gmfn = (mmio) ? _mfn(gfn_x(gfn)) : vcpu_gfn_to_mfn(v, gfn);
+    u32 gflags = guest_l1e_get_flags(gl1e);
+    u32 sflags = sh_propagate_flags(v, gmfn, gflags, 0, _mfn(INVALID_MFN), 
+                                     mmio, 1, ft_prefetch);
+
+    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
+                  gl1e.l1, sl1p->l1);
+
+    ASSERT(sflags != -1);
+}
+
+
+/**************************************************************************/
+/* These functions update shadow entries (and do bookkeeping on the shadow
+ * tables they are in).  It is intended that they are the only
+ * functions which ever write (non-zero) data onto a shadow page.
+ *
+ * They return a set of flags: 
+ * SHADOW_SET_CHANGED -- we actually wrote a new value to the shadow.
+ * SHADOW_SET_FLUSH   -- the caller must cause a TLB flush.
+ * SHADOW_SET_ERROR   -- the input is not a valid entry (for example, if
+ *                        shadow_get_page_from_l1e() fails).
+ * SHADOW_SET_L3PAE_RECOPY -- one or more vcpu's need to have their local
+ *                             copies of their PAE L3 entries re-copied.
+ */
+
+static inline void safe_write_entry(void *dst, void *src) 
+/* Copy one PTE safely when processors might be running on the
+ * destination pagetable.   This does *not* give safety against
+ * concurrent writes (that's what the shadow lock is for), just 
+ * stops the hardware picking up partially written entries. */
+{
+    volatile unsigned long *d = dst;
+    unsigned long *s = src;
+    ASSERT(!((unsigned long) d & (sizeof (shadow_l1e_t) - 1)));
+#if CONFIG_PAGING_LEVELS == 3
+    /* In PAE mode, pagetable entries are larger
+     * than machine words, so won't get written atomically.  We need to make
+     * sure any other cpu running on these shadows doesn't see a
+     * half-written entry.  Do this by marking the entry not-present first,
+     * then writing the high word before the low word. */
+    BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long));
+    d[0] = 0;
+    d[1] = s[1];
+    d[0] = s[0];
+#else
+    /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word,
+     * which will be an atomic write, since the entry is aligned. */
+    BUILD_BUG_ON(sizeof (shadow_l1e_t) != sizeof (unsigned long));
+    *d = *s;
+#endif
+}
+
+
+static inline void 
+shadow_write_entries(void *d, void *s, int entries, mfn_t mfn)
+/* This function does the actual writes to shadow pages.
+ * It must not be called directly, since it doesn't do the bookkeeping
+ * that shadow_set_l*e() functions do. */
+{
+    shadow_l1e_t *dst = d;
+    shadow_l1e_t *src = s;
+    void *map = NULL;
+    int i;
+
+    /* Because we mirror access rights at all levels in the shadow, an
+     * l2 (or higher) entry with the RW bit cleared will leave us with
+     * no write access through the linear map.  
+     * We detect that by writing to the shadow with copy_to_user() and 
+     * using map_domain_page() to get a writeable mapping if we need to. */
+    if ( __copy_to_user(d, d, sizeof (unsigned long)) != 0 ) 
+    {
+        perfc_incrc(shadow_linear_map_failed);
+        map = sh_map_domain_page(mfn);
+        ASSERT(map != NULL);
+        dst = map + ((unsigned long)dst & (PAGE_SIZE - 1));
+    }
+
+
+    for ( i = 0; i < entries; i++ )
+        safe_write_entry(dst++, src++);
+
+    if ( map != NULL ) sh_unmap_domain_page(map);
+
+    /* XXX TODO:
+     * Update min/max field in page_info struct of this mfn */
+}
+
+static inline int
+perms_strictly_increased(u32 old_flags, u32 new_flags) 
+/* Given the flags of two entries, are the new flags a strict
+ * increase in rights over the old ones? */
+{
+    u32 of = old_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
+    u32 nf = new_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
+    /* Flip the NX bit, since it's the only one that decreases rights;
+     * we calculate as if it were an "X" bit. */
+    of ^= _PAGE_NX_BIT;
+    nf ^= _PAGE_NX_BIT;
+    /* If the changed bits are all set in the new flags, then rights strictly 
+     * increased between old and new. */
+    return ((of | (of ^ nf)) == nf);
+}
+
+static int inline
+shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
+{
+    int res;
+    mfn_t mfn;
+    struct domain *owner;
+    shadow_l1e_t sanitized_sl1e =
+        shadow_l1e_remove_flags(sl1e, _PAGE_SHADOW_RW | _PAGE_SHADOW_PRESENT);
+
+    //ASSERT(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT);
+    //ASSERT((shadow_l1e_get_flags(sl1e) & L1_DISALLOW_MASK) == 0);
+
+    if ( !shadow_mode_refcounts(d) )
+        return 1;
+
+    res = get_page_from_l1e(sanitized_sl1e, d);
+
+    // If a privileged domain is attempting to install a map of a page it does
+    // not own, we let it succeed anyway.
+    //
+    if ( unlikely(!res) &&
+         IS_PRIV(d) &&
+         !shadow_mode_translate(d) &&
+         valid_mfn(mfn = shadow_l1e_get_mfn(sl1e)) &&
+         (owner = page_get_owner(mfn_to_page(mfn))) &&
+         (d != owner) )
+    {
+        res = get_page_from_l1e(sanitized_sl1e, owner);
+        SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
+                       "which is owned by domain %d: %s\n",
+                       d->domain_id, mfn_x(mfn), owner->domain_id,
+                       res ? "success" : "failed");
+    }
+
+    if ( unlikely(!res) )
+    {
+        perfc_incrc(shadow_get_page_fail);
+        SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
+    }
+
+    return res;
+}
+
+static void inline
+shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
+{ 
+    if ( !shadow_mode_refcounts(d) )
+        return;
+
+    put_page_from_l1e(sl1e, d);
+}
+
+#if GUEST_PAGING_LEVELS >= 4
+static int shadow_set_l4e(struct vcpu *v, 
+                          shadow_l4e_t *sl4e, 
+                          shadow_l4e_t new_sl4e, 
+                          mfn_t sl4mfn)
+{
+    int flags = 0;
+    shadow_l4e_t old_sl4e;
+    paddr_t paddr;
+    ASSERT(sl4e != NULL);
+    old_sl4e = *sl4e;
+
+    if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
+    
+    paddr = ((((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
+             | (((unsigned long)sl4e) & ~PAGE_MASK));
+
+    if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */        
+        sh_get_ref(shadow_l4e_get_mfn(new_sl4e), paddr);
+    } 
+
+    /* Write the new entry */
+    shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
+    flags |= SHADOW_SET_CHANGED;
+
+    if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
+        if ( (mfn_x(osl3mfn) != mfn_x(shadow_l4e_get_mfn(new_sl4e)))
+             || !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e), 
+                                          shadow_l4e_get_flags(new_sl4e)) )
+        {
+            flags |= SHADOW_SET_FLUSH;
+        }
+        sh_put_ref(v, osl3mfn, paddr);
+    }
+    return flags;
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+#if GUEST_PAGING_LEVELS >= 3
+static int shadow_set_l3e(struct vcpu *v, 
+                          shadow_l3e_t *sl3e, 
+                          shadow_l3e_t new_sl3e, 
+                          mfn_t sl3mfn)
+{
+    int flags = 0;
+    shadow_l3e_t old_sl3e;
+    paddr_t paddr;
+    ASSERT(sl3e != NULL);
+    old_sl3e = *sl3e;
+
+    if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
+
+    paddr = ((((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) 
+             | (((unsigned long)sl3e) & ~PAGE_MASK));
+    
+    if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */        
+        sh_get_ref(shadow_l3e_get_mfn(new_sl3e), paddr);
+    } 
+
+    /* Write the new entry */
+    shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
+    flags |= SHADOW_SET_CHANGED;
+
+#if GUEST_PAGING_LEVELS == 3 
+    /* We wrote a guest l3e in a PAE pagetable.  This table is copied in
+     * the linear pagetable entries of its l2s, and may also be copied
+     * to a low memory location to make it fit in CR3.  Report that we
+     * need to resync those copies (we can't wait for the guest to flush
+     * the TLB because it might be an increase in rights). */
+    {
+        struct vcpu *vcpu;
+
+        struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
+        for_each_vcpu(v->domain, vcpu)
+        {
+            if (info->vcpus & (1 << vcpu->vcpu_id))
+            {
+                // Remember that this flip/update needs to occur.
+                vcpu->arch.shadow.pae_flip_pending = 1;
+                flags |= SHADOW_SET_L3PAE_RECOPY;
+            }
+        }
+    }
+#endif
+
+    if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
+        if ( (mfn_x(osl2mfn) != mfn_x(shadow_l3e_get_mfn(new_sl3e))) ||
+             !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e), 
+                                       shadow_l3e_get_flags(new_sl3e)) ) 
+        {
+            flags |= SHADOW_SET_FLUSH;
+        }
+        sh_put_ref(v, osl2mfn, paddr);
+    }
+    return flags;
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */ 
+
+static int shadow_set_l2e(struct vcpu *v, 
+                          shadow_l2e_t *sl2e, 
+                          shadow_l2e_t new_sl2e, 
+                          mfn_t sl2mfn)
+{
+    int flags = 0;
+    shadow_l2e_t old_sl2e;
+    paddr_t paddr;
+
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+    /* In 2-on-3 we work with pairs of l2es pointing at two-page
+     * shadows.  Reference counting and up-pointers track from the first
+     * page of the shadow to the first l2e, so make sure that we're 
+     * working with those:     
+     * Align the pointer down so it's pointing at the first of the pair */
+    sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~(sizeof(shadow_l2e_t)));
+    /* Align the mfn of the shadow entry too */
+    new_sl2e.l2 &= ~(1<<PAGE_SHIFT);
+#endif
+
+    ASSERT(sl2e != NULL);
+    old_sl2e = *sl2e;
+    
+    if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
+    
+    paddr = ((((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
+             | (((unsigned long)sl2e) & ~PAGE_MASK));
+
+    if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */
+        sh_get_ref(shadow_l2e_get_mfn(new_sl2e), paddr);
+    } 
+
+    /* Write the new entry */
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+    {
+        shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
+        /* The l1 shadow is two pages long and need to be pointed to by
+         * two adjacent l1es.  The pair have the same flags, but point
+         * at odd and even MFNs */
+        ASSERT(!(pair[0].l2 & (1<<PAGE_SHIFT)));
+        pair[1].l2 |= (1<<PAGE_SHIFT);
+        shadow_write_entries(sl2e, &pair, 2, sl2mfn);
+    }
+#else /* normal case */
+    shadow_write_entries(sl2e, &new_sl2e, 1, sl2mfn);
+#endif
+    flags |= SHADOW_SET_CHANGED;
+
+    if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
+        if ( (mfn_x(osl1mfn) != mfn_x(shadow_l2e_get_mfn(new_sl2e))) ||
+             !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e), 
+                                       shadow_l2e_get_flags(new_sl2e)) ) 
+        {
+            flags |= SHADOW_SET_FLUSH;
+        }
+        sh_put_ref(v, osl1mfn, paddr);
+    }
+    return flags;
+}
+
+static int shadow_set_l1e(struct vcpu *v, 
+                          shadow_l1e_t *sl1e, 
+                          shadow_l1e_t new_sl1e,
+                          mfn_t sl1mfn)
+{
+    int flags = 0;
+    struct domain *d = v->domain;
+    shadow_l1e_t old_sl1e;
+    ASSERT(sl1e != NULL);
+    
+    old_sl1e = *sl1e;
+
+    if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
+    
+    if ( shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */        
+        if ( shadow_mode_refcounts(d) ) {
+            if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 ) 
+            {
+                /* Doesn't look like a pagetable. */
+                flags |= SHADOW_SET_ERROR;
+                new_sl1e = shadow_l1e_empty();
+            }
+        }
+    } 
+
+    /* Write the new entry */
+    shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
+    flags |= SHADOW_SET_CHANGED;
+
+    if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        /* N.B. Unlike higher-level sets, never need an extra flush 
+         * when writing an l1e.  Because it points to the same guest frame 
+         * as the guest l1e did, it's the guest's responsibility to
+         * trigger a flush later. */
+        if ( shadow_mode_refcounts(d) ) 
+        {
+            shadow_put_page_from_l1e(old_sl1e, d);
+        } 
+    }
+    return flags;
+}
+
+
+/**************************************************************************/
+/* These functions take a vcpu and a virtual address, and return a pointer
+ * to the appropriate level N entry from the shadow tables.  
+ * If the necessary tables are not present in the shadow, they return NULL. */
+
+/* N.B. The use of GUEST_PAGING_LEVELS here is correct.  If the shadow has
+ * more levels than the guest, the upper levels are always fixed and do not 
+ * reflect any information from the guest, so we do not use these functions 
+ * to access them. */
+
+#if GUEST_PAGING_LEVELS >= 4
+static shadow_l4e_t *
+shadow_get_l4e(struct vcpu *v, unsigned long va)
+{
+    /* Reading the top level table is always valid. */
+    return sh_linear_l4_table(v) + shadow_l4_linear_offset(va);
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+
+#if GUEST_PAGING_LEVELS >= 3
+static shadow_l3e_t *
+shadow_get_l3e(struct vcpu *v, unsigned long va)
+{
+#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
+    /* Get the l4 */
+    shadow_l4e_t *sl4e = shadow_get_l4e(v, va);
+    ASSERT(sl4e != NULL);
+    if ( !(shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT) )
+        return NULL;
+    ASSERT(valid_mfn(shadow_l4e_get_mfn(*sl4e)));
+    /* l4 was present; OK to get the l3 */
+    return sh_linear_l3_table(v) + shadow_l3_linear_offset(va);
+#else /* PAE... */
+    /* Top level is always mapped */
+    ASSERT(v->arch.shadow_vtable);
+    return ((shadow_l3e_t *)v->arch.shadow_vtable) + 
shadow_l3_linear_offset(va);
+#endif 
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+
+static shadow_l2e_t *
+shadow_get_l2e(struct vcpu *v, unsigned long va)
+{
+#if GUEST_PAGING_LEVELS >= 3  /* 64bit/PAE... */
+    /* Get the l3 */
+    shadow_l3e_t *sl3e = shadow_get_l3e(v, va);
+    if ( sl3e == NULL || !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
+        return NULL;
+    ASSERT(valid_mfn(shadow_l3e_get_mfn(*sl3e)));
+    /* l3 was present; OK to get the l2 */
+#endif
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(va);
+}
+
+
+#if 0 // avoid the compiler warning for now...
+
+static shadow_l1e_t *
+shadow_get_l1e(struct vcpu *v, unsigned long va)
+{
+    /* Get the l2 */
+    shadow_l2e_t *sl2e = shadow_get_l2e(v, va);
+    if ( sl2e == NULL || !(shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT) )
+        return NULL;
+    ASSERT(valid_mfn(shadow_l2e_get_mfn(*sl2e)));
+    /* l2 was present; OK to get the l1 */
+    return sh_linear_l1_table(v) + shadow_l1_linear_offset(va);
+}
+
+#endif
+
+
+/**************************************************************************/
+/* Macros to walk pagetables.  These take the shadow of a pagetable and 
+ * walk every "interesting" entry.  That is, they don't touch Xen mappings, 
+ * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every 
+ * second entry (since pairs of entries are managed together). For multi-page
+ * shadows they walk all pages.
+ * 
+ * Arguments are an MFN, the variable to point to each entry, a variable 
+ * to indicate that we are done (we will shortcut to the end of the scan 
+ * when _done != 0), a variable to indicate that we should avoid Xen mappings,
+ * and the code. 
+ *
+ * WARNING: These macros have side-effects.  They change the values of both 
+ * the pointer and the MFN. */ 
+
+static inline void increment_ptr_to_guest_entry(void *ptr)
+{
+    if ( ptr )
+    {
+        guest_l1e_t **entry = ptr;
+        (*entry)++;
+    }
+}
+
+/* All kinds of l1: touch all entries */
+#define _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)       \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l1e_t *_sp = map_shadow_page((_sl1mfn));                     \
+    ASSERT((mfn_to_page(_sl1mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l1_shadow                                         \
+           || (mfn_to_page(_sl1mfn)->count_info & PGC_SH_type_mask)    \
+           == PGC_SH_fl1_shadow);                                      \
+    for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        (_sl1e) = _sp + _i;                                             \
+        if ( shadow_l1e_get_flags(*(_sl1e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        increment_ptr_to_guest_entry(_gl1p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+/* 32-bit l1, on PAE or 64-bit shadows: need to walk both pages of shadow */
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+#define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done,  _code)       \
+do {                                                                    \
+    int __done = 0;                                                     \
+    _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                         \
+                         ({ (__done = _done); }), _code);               \
+    _sl1mfn = _mfn(mfn_x(_sl1mfn) + 1);                                 \
+    if ( !__done )                                                      \
+        _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                     \
+                             ({ (__done = _done); }), _code);           \
+} while (0)
+#else /* Everything else; l1 shadows are only one page */
+#define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)        \
+       _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)
+#endif
+    
+
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+
+/* 32-bit l2 on PAE/64: four pages, touch every second entry, and avoid Xen */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)    \
+do {                                                                      \
+    int _i, _j, __done = 0;                                               \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)         \
+           == PGC_SH_l2_32_shadow);                                      \
+    for ( _j = 0; _j < 4 && !__done; _j++ )                               \
+    {                                                                     \
+        shadow_l2e_t *_sp = map_shadow_page(_sl2mfn);                     \
+        for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i += 2 )         \
+            if ( (!(_xen))                                                \
+                 || ((_j * SHADOW_L2_PAGETABLE_ENTRIES) + _i)             \
+                 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT) ) \
+            {                                                             \
+                (_sl2e) = _sp + _i;                                       \
+                if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )     \
+                    {_code}                                               \
+                if ( (__done = (_done)) ) break;                          \
+                increment_ptr_to_guest_entry(_gl2p);                      \
+            }                                                             \
+        unmap_shadow_page(_sp);                                           \
+        _sl2mfn = _mfn(mfn_x(_sl2mfn) + 1);                               \
+    }                                                                     \
+} while (0)
+
+#elif GUEST_PAGING_LEVELS == 2
+
+/* 32-bit on 32-bit: avoid Xen entries */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)     \
+do {                                                                       \
+    int _i;                                                                \
+    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                        \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)          \
+           == PGC_SH_l2_32_shadow);                                       \
+    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
+        if ( (!(_xen))                                                     \
+             ||                                                            \
+             (_i < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
+        {                                                                  \
+            (_sl2e) = _sp + _i;                                            \
+            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )          \
+                {_code}                                                    \
+            if ( _done ) break;                                            \
+            increment_ptr_to_guest_entry(_gl2p);                           \
+        }                                                                  \
+    unmap_shadow_page(_sp);                                                \
+} while (0)
+
+#elif GUEST_PAGING_LEVELS == 3
+
+/* PAE: if it's an l2h, don't touch Xen mappings */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)     \
+do {                                                                       \
+    int _i;                                                                \
+    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                        \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)          \
+           == PGC_SH_l2_pae_shadow                                        \
+           || (mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l2h_pae_shadow);                                     \
+    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
+        if ( (!(_xen))                                                     \
+             || ((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)    \
+                 != PGC_SH_l2h_pae_shadow)                                \
+             || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES))                  \
+                 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
+        {                                                                  \
+            (_sl2e) = _sp + _i;                                            \
+            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )          \
+                {_code}                                                    \
+            if ( _done ) break;                                            \
+            increment_ptr_to_guest_entry(_gl2p);                           \
+        }                                                                  \
+    unmap_shadow_page(_sp);                                                \
+} while (0)
+
+#else 
+
+/* 64-bit l2: touch all entries */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)  \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                     \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l2_64_shadow);                                    \
+    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        (_sl2e) = _sp + _i;                                             \
+        if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        increment_ptr_to_guest_entry(_gl2p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+#endif /* different kinds of l2 */
+
+#if GUEST_PAGING_LEVELS == 3
+
+/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
+#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code)             \
+do {                                                                    \
+    int _i;                                                             \
+    for ( _i = 0; _i < 4; _i++ )                                        \
+    {                                                                   \
+        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        _sl3e++;                                                        \
+        increment_ptr_to_guest_entry(_gl3p);                            \
+    }                                                                   \
+} while (0)
+
+/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
+#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
+do {                                                                    \
+    int _i, _j, _k, __done = 0;                                         \
+    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l3_pae_shadow);                                   \
+    /* The subshadows are split, 64 on each page of the shadow */       \
+    for ( _j = 0; _j < 2 && !__done; _j++ )                             \
+    {                                                                   \
+        void *_sp = sh_map_domain_page(_sl3mfn);                       \
+        for ( _i = 0; _i < 64; _i++ )                                   \
+        {                                                               \
+            /* Every second 32-byte region is a bookkeeping entry */    \
+            _sl3e = (shadow_l3e_t *)(_sp + (64 * _i));                  \
+            if ( (sl3p_to_info(_sl3e))->refcount > 0 )                  \
+                SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p,                   \
+                                        ({ __done = (_done); __done; }), \
+                                        _code);                         \
+            else                                                        \
+                for ( _k = 0 ; _k < 4 ; _k++ )                          \
+                    increment_ptr_to_guest_entry(_gl3p);                \
+            if ( __done ) break;                                        \
+        }                                                               \
+        sh_unmap_domain_page(_sp);                                     \
+        _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1);                             \
+    }                                                                   \
+} while (0)
+
+#elif GUEST_PAGING_LEVELS == 4
+
+/* 64-bit l3: touch all entries */
+#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l3e_t *_sp = map_shadow_page((_sl3mfn));                     \
+    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l3_64_shadow);                                    \
+    for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        (_sl3e) = _sp + _i;                                             \
+        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        increment_ptr_to_guest_entry(_gl3p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+/* 64-bit l4: avoid Xen mappings */
+#define SHADOW_FOREACH_L4E(_sl4mfn, _sl4e, _gl4p, _done, _xen, _code)  \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l4e_t *_sp = map_shadow_page((_sl4mfn));                     \
+    ASSERT((mfn_to_page(_sl4mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l4_64_shadow);                                    \
+    for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        if ( (!(_xen)) || is_guest_l4_slot(_i) )                        \
+        {                                                               \
+            (_sl4e) = _sp + _i;                                         \
+            if ( shadow_l4e_get_flags(*(_sl4e)) & _PAGE_PRESENT )       \
+                {_code}                                                 \
+            if ( _done ) break;                                         \
+        }                                                               \
+        increment_ptr_to_guest_entry(_gl4p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+#endif
+
+
+
+/**************************************************************************/
+/* Functions to install Xen mappings and linear mappings in shadow pages */
+
+static mfn_t sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type);
+
+// XXX -- this function should probably be moved to shadow-common.c, but that
+//        probably wants to wait until the shadow types have been moved from
+//        shadow-types.h to shadow-private.h
+//
+#if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
+void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
+{
+    struct domain *d = v->domain;
+    shadow_l4e_t *sl4e;
+
+    sl4e = sh_map_domain_page(sl4mfn);
+    ASSERT(sl4e != NULL);
+    ASSERT(sizeof (l4_pgentry_t) == sizeof (shadow_l4e_t));
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] =
+        shadow_l4e_from_mfn(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3)),
+                            __PAGE_HYPERVISOR);
+
+    /* Linear mapping */
+    sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
+        shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
+    sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
+        shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
+
+    if ( shadow_mode_translate(v->domain) )
+    {
+        /* install domain-specific P2M table */
+        sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] =
+            shadow_l4e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
+                                __PAGE_HYPERVISOR);
+    }
+
+    sh_unmap_domain_page(sl4e);    
+}
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
+// For 3-on-3 PV guests, we need to make sure the xen mappings are in
+// place, which means that we need to populate the l2h entry in the l3
+// table.
+
+void sh_install_xen_entries_in_l2h(struct vcpu *v, 
+                                    mfn_t sl2hmfn)
+{
+    struct domain *d = v->domain;
+    shadow_l2e_t *sl2e;
+    int i;
+
+    sl2e = sh_map_domain_page(sl2hmfn);
+    ASSERT(sl2e != NULL);
+    ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
+           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            shadow_l2e_from_mfn(
+                page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
+                __PAGE_HYPERVISOR);
+    
+    /* We don't set up a linear mapping here because we can't until this
+     * l2h is installed in an l3e.  sh_update_linear_entries() handles
+     * the linear mappings when the l3 is loaded. */
+
+    if ( shadow_mode_translate(d) )
+    {
+        /* Install the domain-specific p2m table */
+        l3_pgentry_t *p2m;
+        ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+        p2m = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+        for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
+        {
+            sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START) + i] =
+                shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])),
+                                    __PAGE_HYPERVISOR);
+        }
+        sh_unmap_domain_page(p2m);
+    }
+    
+    sh_unmap_domain_page(sl2e);
+}
+
+void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
+{
+    shadow_l3e_t *sl3e;
+    guest_l3e_t *gl3e = v->arch.guest_vtable;
+    shadow_l3e_t new_sl3e;
+    gfn_t l2gfn;
+    mfn_t l2gmfn, l2smfn;
+    int r;
+
+    ASSERT(!shadow_mode_external(v->domain));
+    ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
+    l2gfn = guest_l3e_get_gfn(gl3e[3]);
+    l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn));
+    l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
+    if ( !valid_mfn(l2smfn) )
+    {
+        l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
+    }
+    l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
+                             ft_prefetch);
+    sl3e = sh_map_domain_page(sl3mfn);
+    r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
+    sh_unmap_domain_page(sl3e);
+}
+#endif
+
+
+#if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
+void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
+{
+    struct domain *d = v->domain;
+    shadow_l2e_t *sl2e;
+    int i;
+
+    sl2e = sh_map_domain_page(sl2mfn);
+    ASSERT(sl2e != NULL);
+    ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            shadow_l2e_from_mfn(
+                page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
+                __PAGE_HYPERVISOR);
+
+    /* Linear mapping */
+    sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
+        shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
+    sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
+        shadow_l2e_from_mfn(sl2mfn, __PAGE_HYPERVISOR);
+
+    if ( shadow_mode_translate(d) )
+    {
+        /* install domain-specific P2M table */
+        sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START)] =
+            shadow_l2e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
+                                __PAGE_HYPERVISOR);
+    }
+
+    sh_unmap_domain_page(sl2e);
+}
+#endif
+
+
+
+
+
+/**************************************************************************/
+/* Create a shadow of a given guest page.
+ */
+static mfn_t
+sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
+{
+    mfn_t smfn = shadow_alloc(v->domain, shadow_type, mfn_x(gmfn));
+    SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
+                  mfn_x(gmfn), shadow_type, mfn_x(smfn));
+
+    if ( shadow_type != PGC_SH_guest_root_type )
+        /* Lower-level shadow, not yet linked form a higher level */
+        mfn_to_page(smfn)->up = 0;
+
+    // Create the Xen mappings...
+    if ( !shadow_mode_external(v->domain) )
+    {
+        switch (shadow_type) 
+        {
+#if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
+        case PGC_SH_l4_shadow:
+            sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
+#endif
+#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
+        case PGC_SH_l3_shadow:
+            sh_install_xen_entries_in_l3(v, gmfn, smfn); break;
+        case PGC_SH_l2h_shadow:
+            sh_install_xen_entries_in_l2h(v, smfn); break;
+#endif
+#if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
+        case PGC_SH_l2_shadow:
+            sh_install_xen_entries_in_l2(v, gmfn, smfn); break;
+#endif
+        default: /* Do nothing */ break;
+        }
+    }
+    
+    shadow_promote(v, gmfn, shadow_type);
+    set_shadow_status(v, gmfn, shadow_type, smfn);
+
+    return smfn;
+}
+
+/* Make a splintered superpage shadow */
+static mfn_t
+make_fl1_shadow(struct vcpu *v, gfn_t gfn)
+{
+    mfn_t smfn = shadow_alloc(v->domain, PGC_SH_fl1_shadow,
+                               (unsigned long) gfn_x(gfn));
+
+    SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" SH_PRI_mfn "\n",
+                  gfn_x(gfn), mfn_x(smfn));
+
+    set_fl1_shadow_status(v, gfn, smfn);
+    return smfn;
+}
+
+
+#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
+mfn_t
+sh_make_monitor_table(struct vcpu *v)
+{
+
+    ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
+    
+#if CONFIG_PAGING_LEVELS == 4    
+    {
+        struct domain *d = v->domain;
+        mfn_t m4mfn;
+        m4mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        sh_install_xen_entries_in_l4(v, m4mfn, m4mfn);
+        /* Remember the level of this table */
+        mfn_to_page(m4mfn)->shadow_flags = 4;
+#if SHADOW_PAGING_LEVELS < 4
+        // Install a monitor l3 table in slot 0 of the l4 table.
+        // This is used for shadow linear maps.
+        {
+            mfn_t m3mfn; 
+            l4_pgentry_t *l4e;
+            m3mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+            mfn_to_page(m3mfn)->shadow_flags = 3;
+            l4e = sh_map_domain_page(m4mfn);
+            l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR);
+            sh_unmap_domain_page(l4e);
+        }
+#endif /* SHADOW_PAGING_LEVELS < 4 */
+        return m4mfn;
+    }
+
+#elif CONFIG_PAGING_LEVELS == 3
+
+    {
+        struct domain *d = v->domain;
+        mfn_t m3mfn, m2mfn; 
+        l3_pgentry_t *l3e;
+        l2_pgentry_t *l2e;
+        int i;
+
+        m3mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        /* Remember the level of this table */
+        mfn_to_page(m3mfn)->shadow_flags = 3;
+
+        // Install a monitor l2 table in slot 3 of the l3 table.
+        // This is used for all Xen entries, including linear maps
+        m2mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        mfn_to_page(m2mfn)->shadow_flags = 2;
+        l3e = sh_map_domain_page(m3mfn);
+        l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
+        sh_install_xen_entries_in_l2h(v, m2mfn);
+        /* Install the monitor's own linear map */
+        l2e = sh_map_domain_page(m2mfn);
+        for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+            l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+                (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) 
+                ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) 
+                : l2e_empty();
+        sh_unmap_domain_page(l2e);
+        sh_unmap_domain_page(l3e);
+
+        SHADOW_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
+        return m3mfn;
+    }
+
+#elif CONFIG_PAGING_LEVELS == 2
+
+    {
+        struct domain *d = v->domain;
+        mfn_t m2mfn;
+        m2mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        sh_install_xen_entries_in_l2(v, m2mfn, m2mfn);
+        /* Remember the level of this table */
+        mfn_to_page(m2mfn)->shadow_flags = 2;
+        return m2mfn;
+    }
+
+#else
+#error this should not happen
+#endif /* CONFIG_PAGING_LEVELS */
+}
+#endif /* SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS */
+
+/**************************************************************************/
+/* These functions also take a virtual address and return the level-N
+ * shadow table mfn and entry, but they create the shadow pagetables if
+ * they are needed.  The "demand" argument is non-zero when handling
+ * a demand fault (so we know what to do about accessed bits &c).
+ * If the necessary tables are not present in the guest, they return NULL. */
+#if GUEST_PAGING_LEVELS >= 4
+static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl4mfn)
+{
+    /* There is always a shadow of the top level table.  Get it. */
+    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
+    /* Reading the top level table is always valid. */
+    return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+
+#if GUEST_PAGING_LEVELS >= 3
+static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl3mfn,
+                                                fetch_type_t ft)
+{
+#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
+    mfn_t sl4mfn;
+    shadow_l4e_t *sl4e;
+    if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
+    /* Get the l4e */
+    sl4e = shadow_get_and_create_l4e(v, gw, &sl4mfn);
+    ASSERT(sl4e != NULL);
+    if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 
+    {
+        *sl3mfn = shadow_l4e_get_mfn(*sl4e);
+        ASSERT(valid_mfn(*sl3mfn));
+    } 
+    else 
+    {
+        int r;
+        shadow_l4e_t new_sl4e;
+        /* No l3 shadow installed: find and install it. */
+        *sl3mfn = get_shadow_status(v, gw->l3mfn, PGC_SH_l3_shadow);
+        if ( !valid_mfn(*sl3mfn) ) 
+        {
+            /* No l3 shadow of this page exists at all: make one. */
+            *sl3mfn = sh_make_shadow(v, gw->l3mfn, PGC_SH_l3_shadow);
+        }
+        /* Install the new sl3 table in the sl4e */
+        l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn, 
+                                 *sl3mfn, &new_sl4e, ft);
+        r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
+        ASSERT((r & SHADOW_SET_FLUSH) == 0);
+    }
+    /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
+#else /* PAE... */
+    /* There is always a shadow of the top level table.  Get it. */
+    *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
+    /* This next line is important: the shadow l3 table is in an 8k
+     * shadow and we need to return the right mfn of the pair. This call
+     * will set it for us as a side-effect. */
+    (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
+    ASSERT(v->arch.shadow_vtable);
+    return ((shadow_l3e_t *)v->arch.shadow_vtable) 
+        + shadow_l3_table_offset(gw->va);
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+
+static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl2mfn,
+                                                fetch_type_t ft)
+{
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
+    mfn_t sl3mfn = _mfn(INVALID_MFN);
+    shadow_l3e_t *sl3e;
+    if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
+    /* Get the l3e */
+    sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft);
+    ASSERT(sl3e != NULL);  /* Since we know guest PT is valid this far */
+    if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) 
+    {
+        *sl2mfn = shadow_l3e_get_mfn(*sl3e);
+        ASSERT(valid_mfn(*sl2mfn));
+    } 
+    else 
+    {
+        int r;
+        shadow_l3e_t new_sl3e;
+        /* No l2 shadow installed: find and install it. */
+        *sl2mfn = get_shadow_status(v, gw->l2mfn, PGC_SH_l2_shadow);
+        if ( !valid_mfn(*sl2mfn) ) 
+        {
+            /* No l2 shadow of this page exists at all: make one. */
+            *sl2mfn = sh_make_shadow(v, gw->l2mfn, PGC_SH_l2_shadow);
+        }
+        /* Install the new sl2 table in the sl3e */
+        l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn, 
+                                 *sl2mfn, &new_sl3e, ft);
+        r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
+        ASSERT((r & SHADOW_SET_FLUSH) == 0);
+#if GUEST_PAGING_LEVELS == 3 
+        /* Need to sync up the linear maps, as we are about to use them */
+        ASSERT( r & SHADOW_SET_L3PAE_RECOPY );
+        sh_pae_recopy(v->domain);
+#endif
+    }
+    /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
+#else /* 32bit... */
+    /* There is always a shadow of the top level table.  Get it. */
+    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
+    /* This next line is important: the guest l2 has a 16k
+     * shadow, we need to return the right mfn of the four. This
+     * call will set it for us as a side-effect. */
+    (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
+    /* Reading the top level table is always valid. */
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
+#endif 
+}
+
+
+static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl1mfn,
+                                                fetch_type_t ft)
+{
+    mfn_t sl2mfn;
+    shadow_l2e_t *sl2e;
+
+    /* Get the l2e */
+    sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft);
+    if ( sl2e == NULL ) return NULL;
+    if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT ) 
+    {
+        *sl1mfn = shadow_l2e_get_mfn(*sl2e);
+        ASSERT(valid_mfn(*sl1mfn));
+    } 
+    else 
+    {
+        shadow_l2e_t new_sl2e;
+        int r, flags = guest_l2e_get_flags(*gw->l2e);
+        /* No l1 shadow installed: find and install it. */
+        if ( !(flags & _PAGE_PRESENT) )
+            return NULL; /* No guest page. */
+        if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) ) 
+        {
+            /* Splintering a superpage */
+            gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
+            *sl1mfn = get_fl1_shadow_status(v, l2gfn);
+            if ( !valid_mfn(*sl1mfn) ) 
+            {
+                /* No fl1 shadow of this superpage exists at all: make one. */
+                *sl1mfn = make_fl1_shadow(v, l2gfn);
+            }
+        } 
+        else 
+        {
+            /* Shadowing an actual guest l1 table */
+            if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
+            *sl1mfn = get_shadow_status(v, gw->l1mfn, PGC_SH_l1_shadow);
+            if ( !valid_mfn(*sl1mfn) ) 
+            {
+                /* No l1 shadow of this page exists at all: make one. */
+                *sl1mfn = sh_make_shadow(v, gw->l1mfn, PGC_SH_l1_shadow);
+            }
+        }
+        /* Install the new sl1 table in the sl2e */
+        l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn, 
+                                 *sl1mfn, &new_sl2e, ft);
+        r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
+        ASSERT((r & SHADOW_SET_FLUSH) == 0);        
+        /* This next line is important: in 32-on-PAE and 32-on-64 modes,
+         * the guest l1 table has an 8k shadow, and we need to return
+         * the right mfn of the pair. This call will set it for us as a
+         * side-effect.  (In all other cases, it's a no-op and will be
+         * compiled out.) */
+        (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
+    }
+    /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
+}
+
+
+
+/**************************************************************************/
+/* Destructors for shadow tables: 
+ * Unregister the shadow, decrement refcounts of any entries present in it,
+ * and release the memory.
+ *
+ * N.B. These destructors do not clear the contents of the shadows.
+ *      This allows us to delay TLB shootdowns until the page is being reused.
+ *      See shadow_alloc() and shadow_free() for how this is handled.
+ */
+
+#if GUEST_PAGING_LEVELS >= 4
+void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
+{
+    shadow_l4e_t *sl4e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+    mfn_t gmfn, sl4mfn;
+    int xen_mappings;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l4_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+    delete_shadow_status(v, gmfn, t, smfn);
+    shadow_demote(v, gmfn, t);
+    /* Take this shadow off the list of root shadows */
+    list_del_init(&mfn_to_page(smfn)->list);
+
+    /* Decrement refcounts of all the old entries */
+    xen_mappings = (!shadow_mode_external(v->domain));
+    sl4mfn = smfn; 
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
+        if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 
+        {
+            sh_put_ref(v, shadow_l4e_get_mfn(*sl4e),
+                        (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
+                        | ((unsigned long)sl4e & ~PAGE_MASK));
+        }
+    });
+    
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+#endif    
+
+#if GUEST_PAGING_LEVELS >= 3
+void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
+{
+    shadow_l3e_t *sl3e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+    mfn_t gmfn, sl3mfn;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l3_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+    delete_shadow_status(v, gmfn, t, smfn);
+    shadow_demote(v, gmfn, t);
+#if GUEST_PAGING_LEVELS == 3
+    /* Take this shadow off the list of root shadows */
+    list_del_init(&mfn_to_page(smfn)->list);
+#endif
+
+    /* Decrement refcounts of all the old entries */
+    sl3mfn = smfn; 
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
+        if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) 
+            sh_put_ref(v, shadow_l3e_get_mfn(*sl3e),
+                        (((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) 
+                        | ((unsigned long)sl3e & ~PAGE_MASK));
+    });
+
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+#endif    
+
+
+#if GUEST_PAGING_LEVELS == 3
+static void sh_destroy_l3_subshadow(struct vcpu *v, 
+                                     shadow_l3e_t *sl3e)
+/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
+{
+    int i;
+    ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); 
+    for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) 
+        if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) 
+            sh_put_ref(v, shadow_l3e_get_mfn(sl3e[i]),
+                        maddr_from_mapped_domain_page(sl3e));
+}
+#endif
+
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
+/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
+{
+    int i, j;
+    struct pae_l3_bookkeeping *bk;
+    
+    ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask) 
+           == PGC_SH_l3_pae_shadow);
+    /* The subshadows are split, 64 on each page of the shadow */
+    for ( i = 0; i < 2; i++ ) 
+    {
+        void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i));
+        for ( j = 0; j < 64; j++ )
+        {
+            /* Every second 32-byte region is a bookkeeping entry */
+            bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
+            if ( bk->pinned )
+                sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
+            /* Check whether we've just freed the whole shadow */
+            if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 ) 
+            {
+                sh_unmap_domain_page(p);
+                return;
+            }
+        }
+        sh_unmap_domain_page(p);
+    }
+}
+#endif
+
+void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
+{
+    shadow_l2e_t *sl2e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+    mfn_t gmfn, sl2mfn;
+    int xen_mappings;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l2_shadow 
+           || t == PGC_SH_l2h_pae_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+    delete_shadow_status(v, gmfn, t, smfn);
+    shadow_demote(v, gmfn, t);
+#if GUEST_PAGING_LEVELS == 2
+    /* Take this shadow off the list of root shadows */
+    list_del_init(&mfn_to_page(smfn)->list);
+#endif
+
+    /* Decrement refcounts of all the old entries */
+    sl2mfn = smfn;
+    xen_mappings = (!shadow_mode_external(v->domain) &&
+                    ((GUEST_PAGING_LEVELS == 2) ||
+                     ((GUEST_PAGING_LEVELS == 3) &&
+                      (t == PGC_SH_l2h_pae_shadow))));
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
+        if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT ) 
+            sh_put_ref(v, shadow_l2e_get_mfn(*sl2e),
+                        (((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT) 
+                        | ((unsigned long)sl2e & ~PAGE_MASK));
+    });
+
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+
+void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l1_shadow || t == PGC_SH_fl1_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    if ( t == PGC_SH_fl1_shadow )
+    {
+        gfn_t gfn = _gfn(mfn_to_page(smfn)->u.inuse.type_info);
+        delete_fl1_shadow_status(v, gfn, smfn);
+    }
+    else 
+    {
+        mfn_t gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+        delete_shadow_status(v, gmfn, t, smfn);
+        shadow_demote(v, gmfn, t);
+    }
+    
+    if ( shadow_mode_refcounts(d) )
+    {
+        /* Decrement refcounts of all the old entries */
+        mfn_t sl1mfn = smfn; 
+        SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
+            if ( shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT ) 
+                shadow_put_page_from_l1e(*sl1e, d);
+        });
+    }
+    
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+
+#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
+void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
+{
+    struct domain *d = v->domain;
+    ASSERT((mfn_to_page(mmfn)->count_info & PGC_SH_type_mask)
+           == PGC_SH_monitor_table);
+
+#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
+    /* Need to destroy the l3 monitor page in slot 0 too */
+    {
+        l4_pgentry_t *l4e = sh_map_domain_page(mmfn);
+        ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+        shadow_free(d, _mfn(l4e_get_pfn(l4e[0])));
+        sh_unmap_domain_page(l4e);
+    }
+#elif CONFIG_PAGING_LEVELS == 3
+    /* Need to destroy the l2 monitor page in slot 4 too */
+    {
+        l3_pgentry_t *l3e = sh_map_domain_page(mmfn);
+        ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+        shadow_free(d, _mfn(l3e_get_pfn(l3e[3])));
+        sh_unmap_domain_page(l3e);
+    }
+#endif
+
+    /* Put the memory back in the pool */
+    shadow_free(d, mmfn);
+}
+#endif
+
+/**************************************************************************/
+/* Functions to destroy non-Xen mappings in a pagetable hierarchy.
+ * These are called from common code when we are running out of shadow
+ * memory, and unpinning all the top-level shadows hasn't worked. 
+ *
+ * This implementation is pretty crude and slow, but we hope that it won't 
+ * be called very often. */
+
+#if GUEST_PAGING_LEVELS == 2
+
+void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
+{    
+    shadow_l2e_t *sl2e;
+    int xen_mappings = !shadow_mode_external(v->domain);
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
+        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+    });
+}
+
+#elif GUEST_PAGING_LEVELS == 3
+
+void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
+/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
+{
+    shadow_l3e_t *sl3e;
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
+        if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
+            mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
+            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) 
+                 == PGC_SH_l2h_pae_shadow ) 
+            {
+                /* High l2: need to pick particular l2es to unhook */
+                shadow_l2e_t *sl2e;
+                SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
+                    (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+                });
+            }
+            else
+            {
+                /* Normal l2: can safely unhook the whole l3e */
+                (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
+            }
+        }
+    });
+    /* We've changed PAE L3 entries: must sync up various copies of them */
+    sh_pae_recopy(v->domain);
+}
+
+#elif GUEST_PAGING_LEVELS == 4
+
+void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn)
+{
+    shadow_l4e_t *sl4e;
+    int xen_mappings = !shadow_mode_external(v->domain);
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
+        (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
+    });
+}
+
+#endif
+
+/**************************************************************************/
+/* Internal translation functions.
+ * These functions require a pointer to the shadow entry that will be updated.
+ */
+
+/* These functions take a new guest entry, translate it to shadow and write 
+ * the shadow entry.
+ *
+ * They return the same bitmaps as the shadow_set_lXe() functions.
+ */
+
+#if GUEST_PAGING_LEVELS >= 4
+static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
+{
+    shadow_l4e_t new_sl4e;
+    guest_l4e_t *new_gl4e = new_ge;
+    shadow_l4e_t *sl4p = se;
+    mfn_t sl3mfn = _mfn(INVALID_MFN);
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl4e_calls);
+
+    if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
+    {
+        gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
+        mfn_t gl3mfn = vcpu_gfn_to_mfn(v, gl3gfn);
+        if ( valid_mfn(gl3mfn) )
+            sl3mfn = get_shadow_status(v, gl3mfn, PGC_SH_l3_shadow);
+        else
+            result |= SHADOW_SET_ERROR;
+    }
+    l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
+                             sl3mfn, &new_sl4e, ft_prefetch);
+    result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
+    return result;
+}
+#endif // GUEST_PAGING_LEVELS >= 4
+
+#if GUEST_PAGING_LEVELS >= 3
+static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
+{
+    shadow_l3e_t new_sl3e;
+    guest_l3e_t *new_gl3e = new_ge;
+    shadow_l3e_t *sl3p = se;
+    mfn_t sl2mfn = _mfn(INVALID_MFN);
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl3e_calls);
+
+    if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
+    {
+        gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
+        mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
+        if ( valid_mfn(gl2mfn) )
+            sl2mfn = get_shadow_status(v, gl2mfn, PGC_SH_l2_shadow);
+        else
+            result |= SHADOW_SET_ERROR;
+    }
+    l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN), 
+                             sl2mfn, &new_sl3e, ft_prefetch);
+    result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
+
+#if GUEST_PAGING_LEVELS == 3
+    /* We have changed a PAE l3 entry: need to sync up the possible copies 
+     * of it */
+    if ( result & SHADOW_SET_L3PAE_RECOPY )
+        sh_pae_recopy(v->domain);
+#endif
+
+    return result;
+}
+#endif // GUEST_PAGING_LEVELS >= 3
+
+static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
+{
+    shadow_l2e_t new_sl2e;
+    guest_l2e_t *new_gl2e = new_ge;
+    shadow_l2e_t *sl2p = se;
+    mfn_t sl1mfn = _mfn(INVALID_MFN);
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl2e_calls);
+
+    if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
+    {
+        gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
+        if ( guest_supports_superpages(v) &&
+             (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
+        {
+            // superpage -- need to look up the shadow L1 which holds the
+            // splitters...
+            sl1mfn = get_fl1_shadow_status(v, gl1gfn);
+#if 0
+            // XXX - it's possible that we want to do some kind of prefetch
+            // for superpage fl1's here, but this is *not* on the demand path,
+            // so we'll hold off trying that for now...
+            //
+            if ( !valid_mfn(sl1mfn) )
+                sl1mfn = make_fl1_shadow(v, gl1gfn);
+#endif
+        }
+        else
+        {
+            mfn_t gl1mfn = vcpu_gfn_to_mfn(v, gl1gfn);
+            if ( valid_mfn(gl1mfn) )
+                sl1mfn = get_shadow_status(v, gl1mfn, PGC_SH_l1_shadow);
+            else
+                result |= SHADOW_SET_ERROR;
+        }
+    }
+    l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
+                             sl1mfn, &new_sl2e, ft_prefetch);
+    result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn);
+
+    return result;
+}
+
+static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
+{
+    shadow_l1e_t new_sl1e;
+    guest_l1e_t *new_gl1e = new_ge;
+    shadow_l1e_t *sl1p = se;
+    gfn_t gfn;
+    mfn_t mfn;
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl1e_calls);
+
+    gfn = guest_l1e_get_gfn(*new_gl1e);
+    mfn = vcpu_gfn_to_mfn(v, gfn);
+
+    l1e_propagate_from_guest(v, *new_gl1e, &new_sl1e, 
+                             /* mmio? */ !valid_mfn(mfn));
+    
+    result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
+    return result;
+}
+
+
+/**************************************************************************/
+/* Functions which translate and install a the shadows of arbitrary guest 
+ * entries that we have just seen the guest write. */
+
+
+static inline int 
+sh_map_and_validate(struct vcpu *v, mfn_t gmfn,
+                     void *new_gp, u32 size, u32 sh_type, 
+                     u32 (*shadow_index)(mfn_t *smfn, u32 idx),
+                     int (*validate_ge)(struct vcpu *v, void *ge, 
+                                        mfn_t smfn, void *se))
+/* Generic function for mapping and validating. */
+{
+    mfn_t smfn, smfn2, map_mfn;
+    shadow_l1e_t *sl1p;
+    u32 shadow_idx, guest_idx;
+    int result = 0;
+
+    /* Align address and size to guest entry boundaries */
+    size += (unsigned long)new_gp & (sizeof (guest_l1e_t) - 1);
+    new_gp = (void *)((unsigned long)new_gp & ~(sizeof (guest_l1e_t) - 1));
+    size = (size + sizeof (guest_l1e_t) - 1) & ~(sizeof (guest_l1e_t) - 1);
+    ASSERT(size + (((unsigned long)new_gp) & ~PAGE_MASK) <= PAGE_SIZE);
+
+    /* Map the shadow page */
+    smfn = get_shadow_status(v, gmfn, sh_type);
+    ASSERT(valid_mfn(smfn)); /* Otherwise we would not have been called */
+    guest_idx = guest_index(new_gp);
+    map_mfn = smfn;
+    shadow_idx = shadow_index(&map_mfn, guest_idx);
+    sl1p = map_shadow_page(map_mfn);
+
+    /* Validate one entry at a time */
+    while ( size )
+    {
+        smfn2 = smfn;
+        guest_idx = guest_index(new_gp);
+        shadow_idx = shadow_index(&smfn2, guest_idx);
+        if ( mfn_x(smfn2) != mfn_x(map_mfn) )
+        {
+            /* We have moved to another page of the shadow */
+            map_mfn = smfn2;
+            unmap_shadow_page(sl1p);
+            sl1p = map_shadow_page(map_mfn);
+        }
+        result |= validate_ge(v,
+                              new_gp,
+                              map_mfn,
+                              &sl1p[shadow_idx]);
+        size -= sizeof(guest_l1e_t);
+        new_gp += sizeof(guest_l1e_t);
+    }
+    unmap_shadow_page(sl1p);
+    return result;
+}
+
+
+int
+sh_map_and_validate_gl4e(struct vcpu *v, mfn_t gl4mfn,
+                          void *new_gl4p, u32 size)
+{
+#if GUEST_PAGING_LEVELS >= 4
+    return sh_map_and_validate(v, gl4mfn, new_gl4p, size, 
+                                PGC_SH_l4_shadow, 
+                                shadow_l4_index, 
+                                validate_gl4e);
+#else // ! GUEST_PAGING_LEVELS >= 4
+    SHADOW_PRINTK("called in wrong paging mode!\n");
+    BUG();
+    return 0;
+#endif 
+}
+    
+int
+sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
+                          void *new_gl3p, u32 size)
+{
+#if GUEST_PAGING_LEVELS >= 3
+    return sh_map_and_validate(v, gl3mfn, new_gl3p, size, 
+                                PGC_SH_l3_shadow, 
+                                shadow_l3_index, 
+                                validate_gl3e);
+#else // ! GUEST_PAGING_LEVELS >= 3
+    SHADOW_PRINTK("called in wrong paging mode!\n");
+    BUG();
+    return 0;
+#endif
+}
+
+int
+sh_map_and_validate_gl2e(struct vcpu *v, mfn_t gl2mfn,
+                          void *new_gl2p, u32 size)
+{
+    return sh_map_and_validate(v, gl2mfn, new_gl2p, size, 
+                                PGC_SH_l2_shadow, 
+                                shadow_l2_index, 
+                                validate_gl2e);
+}
+
+int
+sh_map_and_validate_gl2he(struct vcpu *v, mfn_t gl2mfn,
+                           void *new_gl2p, u32 size)
+{
+#if GUEST_PAGING_LEVELS == 3
+    return sh_map_and_validate(v, gl2mfn, new_gl2p, size, 
+                                PGC_SH_l2h_shadow, 
+                                shadow_l2_index, 
+                                validate_gl2e);
+#else /* Non-PAE guests don't have different kinds of l2 table */
+    SHADOW_PRINTK("called in wrong paging mode!\n");
+    BUG();
+    return 0;
+#endif
+}
+
+int
+sh_map_and_validate_gl1e(struct vcpu *v, mfn_t gl1mfn,
+                          void *new_gl1p, u32 size)
+{
+    return sh_map_and_validate(v, gl1mfn, new_gl1p, size, 
+                                PGC_SH_l1_shadow, 
+                                shadow_l1_index, 
+                                validate_gl1e);
+}
+
+
+/**************************************************************************/
+/* Optimization: If we see two emulated writes of zeros to the same
+ * page-table without another kind of page fault in between, we guess
+ * that this is a batch of changes (for process destruction) and
+ * unshadow the page so we don't take a pagefault on every entry.  This
+ * should also make finding writeable mappings of pagetables much
+ * easier. */
+
+/* Look to see if this is the second emulated write in a row to this
+ * page, and unshadow/unhook if it is */
+static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
+{
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    if ( v->arch.shadow.last_emulated_mfn == mfn_x(gmfn) &&
+         sh_mfn_is_a_page_table(gmfn) )
+    {
+        u32 flags = mfn_to_page(gmfn)->shadow_flags;
+        mfn_t smfn;
+        if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) )
+        {
+            perfc_incrc(shadow_early_unshadow);
+            sh_remove_shadows(v, gmfn, 0 /* Can fail to unshadow */ );
+            return;
+        }
+        /* SHF_unhooked_mappings is set to make sure we only unhook
+         * once in a single batch of updates. It is reset when this
+         * top-level page is loaded into CR3 again */
+        if ( !(flags & SHF_unhooked_mappings) ) 
+        {
+            perfc_incrc(shadow_early_unshadow_top);
+            mfn_to_page(gmfn)->shadow_flags |= SHF_unhooked_mappings;
+            if ( flags & SHF_L2_32 )
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+            if ( flags & SHF_L3_PAE ) 
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+            if ( flags & SHF_L4_64 ) 
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l4_64_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+        }
+    }
+    v->arch.shadow.last_emulated_mfn = mfn_x(gmfn);
+#endif
+}
+
+/* Stop counting towards early unshadows, as we've seen a real page fault */
+static inline void reset_early_unshadow(struct vcpu *v)
+{
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    v->arch.shadow.last_emulated_mfn = INVALID_MFN;
+#endif
+}
+
+
+
+/**************************************************************************/
+/* Entry points into the shadow code */
+
+/* Called from pagefault handler in Xen, and from the HVM trap handlers
+ * for pagefaults.  Returns 1 if this fault was an artefact of the
+ * shadow code (and the guest should retry) or 0 if it is not (and the
+ * fault should be handled elsewhere or passed to the guest). */
+
+static int sh_page_fault(struct vcpu *v, 
+                          unsigned long va, 
+                          struct cpu_user_regs *regs)
+{
+    struct domain *d = v->domain;
+    walk_t gw;
+    u32 accumulated_gflags;
+    gfn_t gfn;
+    mfn_t gmfn, sl1mfn=_mfn(0);
+    shadow_l1e_t sl1e, *ptr_sl1e;
+    paddr_t gpa;
+    struct cpu_user_regs emul_regs;
+    struct x86_emulate_ctxt emul_ctxt;
+    int r, mmio;
+    fetch_type_t ft = 0;
+
+    //
+    // XXX: Need to think about eventually mapping superpages directly in the
+    //      shadow (when possible), as opposed to splintering them into a
+    //      bunch of 4K maps.
+    //
+
+    SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
+                   v->domain->domain_id, v->vcpu_id, va, regs->error_code);
+    
+    shadow_lock(d);
+
+    shadow_audit_tables(v);
+                   
+    if ( guest_walk_tables(v, va, &gw, 1) != 0 )
+    {
+        SHADOW_PRINTK("malformed guest pagetable!");
+        print_gw(&gw);
+    }
+
+    sh_audit_gw(v, &gw);
+
+    // We do not look at the gw->l1e, as that will not exist for superpages.
+    // Instead, we use the gw->eff_l1e...
+    //
+    // We need not check all the levels of the guest page table entries for
+    // present vs not-present, as the eff_l1e will always be not present if
+    // one of the higher level entries is not present.
+    //
+    if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
+    {
+        if ( hvm_guest(v) && !shadow_vcpu_mode_translate(v) )
+        {
+            /* Not present in p2m map, means this is mmio */
+            gpa = va;
+            goto mmio;
+        }
+
+        perfc_incrc(shadow_fault_bail_not_present);
+        goto not_a_shadow_fault;
+    }
+
+    // All levels of the guest page table are now known to be present.
+    accumulated_gflags = accumulate_guest_flags(&gw);
+
+    // Check for attempts to access supervisor-only pages from user mode,
+    // i.e. ring 3.  Such errors are not caused or dealt with by the shadow
+    // code.
+    //
+    if ( (regs->error_code & PFEC_user_mode) &&
+         !(accumulated_gflags & _PAGE_USER) )
+    {
+        /* illegal user-mode access to supervisor-only page */
+        perfc_incrc(shadow_fault_bail_user_supervisor);
+        goto not_a_shadow_fault;
+    }
+
+    // Was it a write fault?
+    //
+    if ( regs->error_code & PFEC_write_access )
+    {
+        if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
+        {
+            perfc_incrc(shadow_fault_bail_ro_mapping);
+            goto not_a_shadow_fault;
+        }
+    }
+    else // must have been either an insn fetch or read fault
+    {
+        // Check for NX bit violations: attempts to execute code that is
+        // marked "do not execute".  Such errors are not caused or dealt with
+        // by the shadow code.
+        //
+        if ( regs->error_code & PFEC_insn_fetch )
+        {
+            if ( accumulated_gflags & _PAGE_NX_BIT )
+            {
+                /* NX prevented this code fetch */
+                perfc_incrc(shadow_fault_bail_nx);
+                goto not_a_shadow_fault;
+            }
+        }
+    }
+
+    /* Is this an MMIO access? */
+    gfn = guest_l1e_get_gfn(gw.eff_l1e);
+    mmio = ( hvm_guest(v) 
+             && shadow_vcpu_mode_translate(v) 
+             && mmio_space(gfn_to_paddr(gfn)) );
+
+    /* For MMIO, the shadow holds the *gfn*; for normal accesses, if holds 
+     * the equivalent mfn. */
+    if ( mmio ) 
+        gmfn = _mfn(gfn_x(gfn));
+    else
+    {
+        gmfn = vcpu_gfn_to_mfn(v, gfn);
+        if ( !valid_mfn(gmfn) )
+        {
+            perfc_incrc(shadow_fault_bail_bad_gfn);
+            SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n", 
+                           gfn_x(gfn), mfn_x(gmfn));
+            goto not_a_shadow_fault;
+        }
+    }
+
+    /* Make sure there is enough free shadow memory to build a chain of
+     * shadow tables: one SHADOW_MAX_ORDER chunk will always be enough
+     * to allocate all we need.  (We never allocate a top-level shadow
+     * on this path, only a 32b l1, pae l2+1 or 64b l3+2+1) */
+    shadow_prealloc(d, SHADOW_MAX_ORDER);
+
+    /* Acquire the shadow.  This must happen before we figure out the rights 
+     * for the shadow entry, since we might promote a page here. */
+    // XXX -- this code will need to change somewhat if/when the shadow code
+    // can directly map superpages...
+    ft = ((regs->error_code & PFEC_write_access) ?
+          ft_demand_write : ft_demand_read);
+    ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft);
+    ASSERT(ptr_sl1e);
+
+    /* Calculate the shadow entry */
+    if ( ft == ft_demand_write )
+    {
+        if ( l1e_write_fault(v, &gw, gmfn, &sl1e, mmio) )
+        {
+            perfc_incrc(shadow_fault_emulate_write);
+            goto emulate;
+        }
+    }
+    else if ( l1e_read_fault(v, &gw, gmfn, &sl1e, mmio) )
+    {
+        perfc_incrc(shadow_fault_emulate_read);
+        goto emulate;
+    }
+
+    /* Quick sanity check: we never make an MMIO entry that's got the 
+     * _PAGE_PRESENT flag set in it. */
+    ASSERT(!mmio || !(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT));
+
+    r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
+
+    if ( mmio ) 
+    {
+        gpa = guest_walk_to_gpa(&gw);
+        goto mmio;
+    }
+
+#if 0
+    if ( !(r & SHADOW_SET_CHANGED) )
+        debugtrace_printk("%s: shadow_set_l1e(va=%p, sl1e=%" SH_PRI_pte
+                          ") did not change anything\n",
+                          __func__, gw.va, l1e_get_intpte(sl1e));
+#endif
+
+    perfc_incrc(shadow_fault_fixed);
+    d->arch.shadow.fault_count++;
+    reset_early_unshadow(v);
+
+ done:
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+    SHADOW_PRINTK("fixed\n");
+    shadow_audit_tables(v);
+    shadow_unlock(d);
+    return EXCRET_fault_fixed;
+
+ emulate:
+
+    /* Take the register set we were called with */
+    emul_regs = *regs;
+    if ( hvm_guest(v) )
+    {
+        /* Add the guest's segment selectors, rip, rsp. rflags */ 
+        hvm_store_cpu_guest_regs(v, &emul_regs, NULL);
+    }
+    emul_ctxt.regs = &emul_regs;
+    emul_ctxt.cr2 = va;
+    emul_ctxt.mode = hvm_guest(v) ? hvm_guest_x86_mode(v) : X86EMUL_MODE_HOST;
+
+    SHADOW_PRINTK("emulate: eip=%#lx\n", emul_regs.eip);
+
+    v->arch.shadow.propagate_fault = 0;
+    if ( x86_emulate_memop(&emul_ctxt, &shadow_emulator_ops) )
+    {
+        SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n", 
+                       mfn_x(gmfn));
+        perfc_incrc(shadow_fault_emulate_failed);
+        /* If this is actually a page table, then we have a bug, and need 
+         * to support more operations in the emulator.  More likely, 
+         * though, this is a hint that this page should not be shadowed. */
+        shadow_remove_all_shadows(v, gmfn);
+        /* This means that actual missing operations will cause the 
+         * guest to loop on the same page fault. */
+        goto done;
+    }
+    if ( v->arch.shadow.propagate_fault )
+    {
+        /* Emulation triggered another page fault */
+        goto not_a_shadow_fault;
+    }
+
+    /* Emulator has changed the user registers: write back */
+    if ( hvm_guest(v) )
+    {
+        /* Write back the guest's segment selectors, rip, rsp. rflags */ 
+        hvm_load_cpu_guest_regs(v, &emul_regs);
+        /* And don't overwrite those in the caller's regs. */
+        emul_regs.eip = regs->eip;
+        emul_regs.cs = regs->cs;
+        emul_regs.eflags = regs->eflags;
+        emul_regs.esp = regs->esp;
+        emul_regs.ss = regs->ss;
+        emul_regs.es = regs->es;
+        emul_regs.ds = regs->ds;
+        emul_regs.fs = regs->fs;
+        emul_regs.gs = regs->gs;
+    }
+    *regs = emul_regs;
+
+    goto done;
+
+ mmio:
+    perfc_incrc(shadow_fault_mmio);
+    if ( !hvm_apic_support(d) && (gpa >= 0xFEC00000) )
+    {
+        /* Need to deal with these disabled-APIC accesses, as
+         * handle_mmio() apparently does not currently do that. */
+        /* TJD: What about it, then?   For now, I'm turning this BUG() 
+         * into a domain_crash() since we don't want to kill Xen. */
+        SHADOW_ERROR("disabled-APIC access: not supported\n.");
+        domain_crash(d); 
+    }
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+    SHADOW_PRINTK("mmio\n");
+    shadow_audit_tables(v);
+    reset_early_unshadow(v);
+    shadow_unlock(d);
+    sh_log_mmio(v, gpa);
+    handle_mmio(va, gpa);
+    return EXCRET_fault_fixed;
+
+ not_a_shadow_fault:
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+    SHADOW_PRINTK("not a shadow fault\n");
+    shadow_audit_tables(v);
+    reset_early_unshadow(v);
+    shadow_unlock(d);
+    return 0;
+}
+
+
+static int
+sh_invlpg(struct vcpu *v, unsigned long va)
+/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
+ * instruction should be issued on the hardware, or 0 if it's safe not
+ * to do so. */
+{
+    shadow_l2e_t *ptr_sl2e = shadow_get_l2e(v, va);
+
+    // XXX -- might be a good thing to prefetch the va into the shadow
+
+    // no need to flush anything if there's no SL2...
+    //
+    if ( !ptr_sl2e )
+        return 0;
+
+    // If there's nothing shadowed for this particular sl2e, then
+    // there is no need to do an invlpg, either...
+    //
+    if ( !(shadow_l2e_get_flags(*ptr_sl2e) & _PAGE_PRESENT) )
+        return 0;
+
+    // Check to see if the SL2 is a splintered superpage...
+    // If so, then we'll need to flush the entire TLB (because that's
+    // easier than invalidating all of the individual 4K pages).
+    //
+    if ( (mfn_to_page(shadow_l2e_get_mfn(*ptr_sl2e))->count_info &
+          PGC_SH_type_mask) == PGC_SH_fl1_shadow )
+    {
+        local_flush_tlb();
+        return 0;
+    }
+
+    return 1;
+}
+
+static unsigned long
+sh_gva_to_gfn(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    walk_t gw;
+    gfn_t gfn;
+
+    guest_walk_tables(v, va, &gw, 0);
+    gfn = guest_walk_to_gfn(&gw);
+    unmap_walk(v, &gw);
+
+    return gfn_x(gfn);
+}
+
+
+static unsigned long
+sh_gva_to_gpa(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    unsigned long gfn = sh_gva_to_gfn(v, va);
+    if ( gfn == INVALID_GFN )
+        return 0;
+    else
+        return (gfn << PAGE_SHIFT) | (va & ~PAGE_MASK);
+}
+
+
+// XXX -- should this be in this file?
+//        Or should it be moved to shadow-common.c?
+//
+/* returns a lowmem machine address of the copied HVM L3 root table
+ * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
+ * otherwise blank out any entries with reserved bits in them.  */
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+static unsigned long
+hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
+{
+    int i, f;
+    int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
+    l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+    memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
+    for ( i = 0; i < 4; i++ )
+    {
+        f = l3e_get_flags(l3tab[i]);
+        if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
+            new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
+        else
+            new_l3e = l3e_empty();
+        safe_write_entry(&copy[i], &new_l3e);
+    }
+    return __pa(copy);
+}
+#endif
+
+
+static inline void
+sh_update_linear_entries(struct vcpu *v)
+/* Sync up all the linear mappings for this vcpu's pagetables */
+{
+    struct domain *d = v->domain;
+
+    /* Linear pagetables in PV guests
+     * ------------------------------
+     *
+     * Guest linear pagetables, which map the guest pages, are at
+     * LINEAR_PT_VIRT_START.  Shadow linear pagetables, which map the
+     * shadows, are at SH_LINEAR_PT_VIRT_START.  Most of the time these
+     * are set up at shadow creation time, but (of course!) the PAE case
+     * is subtler.  Normal linear mappings are made by having an entry
+     * in the top-level table that points to itself (shadow linear) or
+     * to the guest top-level table (guest linear).  For PAE, to set up
+     * a linear map requires us to copy the four top-level entries into 
+     * level-2 entries.  That means that every time we change a PAE l3e,
+     * we need to reflect the change into the copy.
+     *
+     * Linear pagetables in HVM guests
+     * -------------------------------
+     *
+     * For HVM guests, the linear pagetables are installed in the monitor
+     * tables (since we can't put them in the shadow).  Shadow linear
+     * pagetables, which map the shadows, are at SH_LINEAR_PT_VIRT_START,
+     * and we use the linear pagetable slot at LINEAR_PT_VIRT_START for 
+     * a linear pagetable of the monitor tables themselves.  We have 
+     * the same issue of having to re-copy PAE l3 entries whevever we use
+     * PAE shadows. 
+     *
+     * Because HVM guests run on the same monitor tables regardless of the 
+     * shadow tables in use, the linear mapping of the shadow tables has to 
+     * be updated every time v->arch.shadow_table changes. 
+     */
+
+    /* Don't try to update the monitor table if it doesn't exist */
+    if ( shadow_mode_external(d) 
+         && pagetable_get_pfn(v->arch.monitor_table) == 0 ) 
+        return;
+
+#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 4)
+    
+    /* For PV, one l4e points at the guest l4, one points at the shadow
+     * l4.  No maintenance required. 
+     * For HVM, just need to update the l4e that points to the shadow l4. */
+
+    if ( shadow_mode_external(d) )
+    {
+        /* Use the linear map if we can; otherwise make a new mapping */
+        if ( v == current ) 
+        {
+            __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+        } 
+        else
+        { 
+            l4_pgentry_t *ml4e;
+            ml4e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+            sh_unmap_domain_page(ml4e);
+        }
+    }
+
+#elif (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 3)
+
+    /* This case only exists in HVM.  To give ourselves a linear map of the 
+     * shadows, we need to extend a PAE shadow to 4 levels.  We do this by 
+     * having a monitor l3 in slot 0 of the monitor l4 table, and 
+     * copying the PAE l3 entries into it.  Then, by having the monitor l4e
+     * for shadow pagetables also point to the monitor l4, we can use it
+     * to access the shadows. */
+
+    if ( shadow_mode_external(d) )
+    {
+        /* Install copies of the shadow l3es into the monitor l3 table.
+         * The monitor l3 table is hooked into slot 0 of the monitor
+         * l4 table, so we use l3 linear indices 0 to 3 */
+        shadow_l3e_t *sl3e;
+        l3_pgentry_t *ml3e;
+        mfn_t l3mfn;
+        int i;
+
+        /* Use linear mappings if we can; otherwise make new mappings */
+        if ( v == current ) 
+        {
+            ml3e = __linear_l3_table;
+            l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
+#if GUEST_PAGING_LEVELS == 2
+            /* Shadow l3 tables are made up by update_cr3 */
+            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+#else
+            sl3e = v->arch.shadow_vtable;
+#endif
+        }
+        else 
+        {   
+            l4_pgentry_t *ml4e;
+            ml4e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ASSERT(l4e_get_flags(ml4e[0]) & _PAGE_PRESENT);
+            l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
+            ml3e = sh_map_domain_page(l3mfn);
+            sh_unmap_domain_page(ml4e);
+#if GUEST_PAGING_LEVELS == 2
+            /* Shadow l3 tables are made up by update_cr3 */
+            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+#else
+            sl3e = sh_map_domain_page(pagetable_get_mfn(v->arch.shadow_table));
+#endif
+        }
+
+        for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+        {
+            ml3e[i] = 
+                (shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT) 
+                ? l3e_from_pfn(mfn_x(shadow_l3e_get_mfn(sl3e[i])), 
+                               __PAGE_HYPERVISOR) 
+                : l3e_empty();
+        }
+
+        if ( v != current ) 
+        {
+            sh_unmap_domain_page(ml3e);
+#if GUEST_PAGING_LEVELS != 2
+            sh_unmap_domain_page(sl3e);
+#endif
+        }
+    }
+
+#elif CONFIG_PAGING_LEVELS == 3
+
+    /* PV: need to copy the guest's l3 entries into the guest-linear-map l2
+     * entries in the shadow, and the shadow's l3 entries into the 
+     * shadow-linear-map l2 entries in the shadow.  This is safe to do 
+     * because Xen does not let guests share high-slot l2 tables between l3s,
+     * so we know we're not treading on anyone's toes. 
+     *
+     * HVM: need to copy the shadow's l3 entries into the
+     * shadow-linear-map l2 entries in the monitor table.  This is safe
+     * because we have one monitor table for each vcpu.  The monitor's
+     * own l3es don't need to be copied because they never change.  
+     * XXX That might change if we start stuffing things into the rest
+     * of the monitor's virtual address space. 
+     */ 
+    {
+        l2_pgentry_t *l2e, new_l2e;
+        shadow_l3e_t *guest_l3e = NULL, *shadow_l3e;
+        int i;
+
+#if GUEST_PAGING_LEVELS == 2
+        /* Shadow l3 tables were built by update_cr3 */
+        if ( shadow_mode_external(d) )
+            shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+        else
+            BUG(); /* PV 2-on-3 is not supported yet */
+        
+#else /* GUEST_PAGING_LEVELS == 3 */
+        
+        /* Use local vcpu's mappings if we can; otherwise make new mappings */
+        if ( v == current ) 
+        {
+            shadow_l3e = v->arch.shadow_vtable;
+            if ( !shadow_mode_external(d) )
+                guest_l3e = v->arch.guest_vtable;
+        }
+        else 
+        {
+            mfn_t smfn;
+            int idx;
+            
+            /* Map the shadow l3 */
+            smfn = pagetable_get_mfn(v->arch.shadow_table);
+            idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable));
+            shadow_l3e = sh_map_domain_page(smfn);
+            shadow_l3e += idx;
+            if ( !shadow_mode_external(d) )
+            {
+                /* Also the guest l3 */
+                mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table); 
+                guest_l3e = sh_map_domain_page(gmfn);
+                guest_l3e += guest_index(v->arch.guest_vtable);
+            }
+        }
+#endif /* GUEST_PAGING_LEVELS */
+        
+        /* Choose where to write the entries, using linear maps if possible */
+        if ( v == current && shadow_mode_external(d) ) 
+        {
+            /* From the monitor tables, it's safe to use linear maps to update
+             * monitor l2s */
+            l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
+        }
+        else if ( shadow_mode_external(d) ) 
+        {
+            /* Map the monitor table's high l2 */
+            l3_pgentry_t *l3e;
+            l3e = sh_map_domain_page(
+                pagetable_get_mfn(v->arch.monitor_table));
+            ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+            l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
+            sh_unmap_domain_page(l3e);
+        } 
+        else 
+        {
+            /* Map the shadow table's high l2 */
+            ASSERT(shadow_l3e_get_flags(shadow_l3e[3]) & _PAGE_PRESENT);
+            l2e = sh_map_domain_page(shadow_l3e_get_mfn(shadow_l3e[3]));
+        }
+        
+        
+        if ( !shadow_mode_external(d) )
+        {
+            /* Write linear mapping of guest. */
+            for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+            { 
+                new_l2e = (shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT) 
+                    ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
+                                   __PAGE_HYPERVISOR) 
+                    : l2e_empty();
+                safe_write_entry(
+                    &l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i],
+                    &new_l2e);
+            }
+        }
+        
+        /* Write linear mapping of shadow. */
+        for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+        {
+            new_l2e = (shadow_l3e_get_flags(shadow_l3e[i]) & _PAGE_PRESENT) 
+                ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(shadow_l3e[i])),
+                               __PAGE_HYPERVISOR) 
+                : l2e_empty();
+            safe_write_entry(
+                &l2e[l2_table_offset(SH_LINEAR_PT_VIRT_START) + i],
+                &new_l2e);
+        }
+        
+        if ( v != current || !shadow_mode_external(d) )
+            sh_unmap_domain_page(l2e);
+        
+#if GUEST_PAGING_LEVELS == 3
+        if ( v != current) 
+        {
+            sh_unmap_domain_page(shadow_l3e);
+            if ( !shadow_mode_external(d) )
+                sh_unmap_domain_page(guest_l3e);
+        }
+#endif
+    }
+
+#elif CONFIG_PAGING_LEVELS == 2
+
+    /* For PV, one l2e points at the guest l2, one points at the shadow
+     * l2. No maintenance required. 
+     * For HVM, just need to update the l2e that points to the shadow l2. */
+
+    if ( shadow_mode_external(d) )
+    {
+        /* Use the linear map if we can; otherwise make a new mapping */
+        if ( v == current ) 
+        {
+            __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+        } 
+        else
+        { 
+            l2_pgentry_t *ml2e;
+            ml2e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+            sh_unmap_domain_page(ml2e);
+        }
+    }
+
+#else
+#error this should not happen
+#endif
+}
+
+
+// XXX -- should this be in this file?
+//        Or should it be moved to shadow-common.c?
+//
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+void sh_pae_recopy(struct domain *d)
+/* Called whenever we write to the l3 entries of a PAE pagetable which 
+ * is currently in use.  Each vcpu that is using the table needs to 
+ * resync its copies of the l3s in linear maps and any low-memory
+ * copies it might have made for fitting into 32bit CR3.
+ * Since linear maps are also resynced when we change CR3, we don't
+ * need to worry about changes to PAE l3es that are not currently in use.*/
+{
+    struct vcpu *v;
+    cpumask_t flush_mask = CPU_MASK_NONE;
+    ASSERT(shadow_lock_is_acquired(d));
+    
+    for_each_vcpu(d, v)
+    {
+        if ( !v->arch.shadow.pae_flip_pending ) 
+            continue;
+
+        cpu_set(v->processor, flush_mask);
+        
+        SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
+
+        /* This vcpu has a copy in its linear maps */
+        sh_update_linear_entries(v);
+        if ( hvm_guest(v) )
+        {
+            /* This vcpu has a copy in its HVM PAE l3 */
+            v->arch.hvm_vcpu.hw_cr3 = 
+                hvm_pae_copy_root(v, v->arch.shadow_vtable,
+                                  !shadow_vcpu_mode_translate(v));
+        }
+#if CONFIG_PAGING_LEVELS == 3
+        else 
+        {
+            /* This vcpu might have copied the l3 to below 4GB */
+            if ( v->arch.cr3 >> PAGE_SHIFT 
+                 != pagetable_get_pfn(v->arch.shadow_table) )
+            {
+                /* Recopy to where that copy is. */
+                int i;
+                l3_pgentry_t *dst, *src;
+                dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
+                src = v->arch.shadow_vtable;
+                for ( i = 0 ; i < 4 ; i++ ) 
+                    safe_write_entry(dst + i, src + i);
+            }
+        }
+#endif
+        v->arch.shadow.pae_flip_pending = 0;        
+    }
+
+    flush_tlb_mask(flush_mask);
+}
+#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
+
+
+/* removes:
+ *     vcpu->arch.guest_vtable
+ *     vcpu->arch.shadow_table
+ *     vcpu->arch.shadow_vtable
+ * Does all appropriate management/bookkeeping/refcounting/etc...
+ */
+static void
+sh_detach_old_tables(struct vcpu *v)
+{
+    mfn_t smfn;
+
+    ////
+    //// vcpu->arch.guest_vtable
+    ////
+    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
+         v->arch.guest_vtable )
+    {
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        sh_unmap_domain_page_global(v->arch.guest_vtable);
+        v->arch.guest_vtable = NULL;
+    }
+
+    ////
+    //// vcpu->arch.shadow_table
+    ////
+    smfn = pagetable_get_mfn(v->arch.shadow_table);
+    if ( mfn_x(smfn) )
+    {
+        ASSERT(v->arch.shadow_vtable);
+
+#if GUEST_PAGING_LEVELS == 3
+        // PAE guests do not (necessarily) use an entire page for their
+        // 4-entry L3s, so we have to deal with them specially.
+        //
+        sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
+#else
+        sh_put_ref(v, smfn, 0);
+#endif
+
+#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+        {
+            struct pae_l3_bookkeeping *info =
+                sl3p_to_info(v->arch.shadow_vtable);
+            ASSERT(test_bit(v->vcpu_id, &info->vcpus));
+            clear_bit(v->vcpu_id, &info->vcpus);
+        }
+#endif
+        v->arch.shadow_table = pagetable_null();
+    }
+
+    ////
+    //// vcpu->arch.shadow_vtable
+    ////
+    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
+         v->arch.shadow_vtable )
+    {
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        //
+        sh_unmap_domain_page_global(v->arch.shadow_vtable);
+        v->arch.shadow_vtable = NULL;
+    }
+}
+
+static void
+sh_update_cr3(struct vcpu *v)
+/* Updates vcpu->arch.shadow_table after the guest has changed CR3.
+ * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
+ * if appropriate).
+ * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
+ */
+{
+    struct domain *d = v->domain;
+    mfn_t gmfn, smfn;
+#if GUEST_PAGING_LEVELS == 3
+    u32 guest_idx=0;
+#endif
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    ASSERT(v->arch.shadow.mode);
+
+    ////
+    //// vcpu->arch.guest_table is already set
+    ////
+    
+#ifndef NDEBUG 
+    /* Double-check that the HVM code has sent us a sane guest_table */
+    if ( hvm_guest(v) )
+    {
+        gfn_t gfn;
+
+        ASSERT(shadow_mode_external(d));
+
+        // Is paging enabled on this vcpu?
+        if ( shadow_vcpu_mode_translate(v) )
+        {
+            gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3)));
+            gmfn = vcpu_gfn_to_mfn(v, gfn);
+            ASSERT(valid_mfn(gmfn));
+            ASSERT(pagetable_get_pfn(v->arch.guest_table) == mfn_x(gmfn));
+        } 
+        else 
+        {
+            /* Paging disabled: guest_table points at (part of) p2m */
+#if SHADOW_PAGING_LEVELS != 3 /* in 3-on-4, guest-table is in slot 0 of p2m */
+            /* For everything else, they sould be the same */
+            ASSERT(v->arch.guest_table.pfn == d->arch.phys_table.pfn);
+#endif
+        }
+    }
+#endif
+
+    SHADOW_PRINTK("d=%u v=%u guest_table=%05lx\n",
+                   d->domain_id, v->vcpu_id, 
+                   (unsigned long)pagetable_get_pfn(v->arch.guest_table));
+
+#if GUEST_PAGING_LEVELS == 4
+    if ( !(v->arch.flags & TF_kernel_mode) )
+        gmfn = pagetable_get_mfn(v->arch.guest_table_user);
+    else
+#endif
+        gmfn = pagetable_get_mfn(v->arch.guest_table);
+
+    sh_detach_old_tables(v);
+
+    if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+    {
+        ASSERT(v->arch.cr3 == 0);
+        return;
+    }
+
+    ////
+    //// vcpu->arch.guest_vtable
+    ////
+    if ( shadow_mode_external(d) )
+    {
+#if GUEST_PAGING_LEVELS == 3
+        if ( shadow_vcpu_mode_translate(v) ) 
+            /* Paging enabled: find where in the page the l3 table is */
+            guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3));
+        else
+            /* Paging disabled: l3 is at the start of a page (in the p2m) */ 
+            guest_idx = 0; 
+
+        // Ignore the low 2 bits of guest_idx -- they are really just
+        // cache control.
+        guest_idx &= ~3;
+        // XXX - why does this need a global map?
+        v->arch.guest_vtable =
+            (guest_l3e_t *)sh_map_domain_page_global(gmfn) + guest_idx;
+#else
+        // XXX - why does this need a global map?
+        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+#endif
+    }
+    else
+    {
+#ifdef __x86_64__
+        v->arch.guest_vtable = __linear_l4_table;
+#elif GUEST_PAGING_LEVELS == 3
+        // XXX - why does this need a global map?
+        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+#else
+        v->arch.guest_vtable = __linear_l2_table;
+#endif
+    }
+
+#if 0
+    printk("%s %s %d gmfn=%05lx guest_vtable=%p\n",
+           __func__, __FILE__, __LINE__, gmfn, v->arch.guest_vtable);
+#endif
+
+    ////
+    //// vcpu->arch.shadow_table
+    ////
+    smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type);
+    if ( valid_mfn(smfn) )
+    {
+        /* Pull this root shadow to the front of the list of roots. */
+        list_del(&mfn_to_page(smfn)->list);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    else
+    {
+        /* This guest MFN is a pagetable.  Must revoke write access. */
+        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) 
+             != 0 )
+            flush_tlb_mask(d->domain_dirty_cpumask); 
+        /* Make sure there's enough free shadow memory. */
+        shadow_prealloc(d, SHADOW_MAX_ORDER); 
+        /* Shadow the page. */
+        smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    ASSERT(valid_mfn(smfn));
+    v->arch.shadow_table = pagetable_from_mfn(smfn);
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    /* Once again OK to unhook entries from this table if we see fork/exit */
+    ASSERT(sh_mfn_is_a_page_table(gmfn));
+    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
+#endif
+
+
+    ////
+    //// vcpu->arch.shadow_vtable
+    ////
+    if ( shadow_mode_external(d) )
+    {
+#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+        mfn_t adjusted_smfn = smfn;
+        u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        v->arch.shadow_vtable =
+            (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) +
+            shadow_idx;
+#else
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
+#endif
+    }
+    else
+    {
+#if SHADOW_PAGING_LEVELS == 4
+        v->arch.shadow_vtable = __sh_linear_l4_table;
+#elif GUEST_PAGING_LEVELS == 3
+        // XXX - why does this need a global map?
+        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
+#else
+        v->arch.shadow_vtable = __sh_linear_l2_table;
+#endif
+    }
+
+    ////
+    //// Take a ref to the new shadow table, and pin it.
+    ////
+    //
+    // This ref is logically "held" by v->arch.shadow_table entry itself.
+    // Release the old ref.
+    //
+#if GUEST_PAGING_LEVELS == 3
+    // PAE guests do not (necessarily) use an entire page for their
+    // 4-entry L3s, so we have to deal with them specially.
+    //
+    // XXX - might want to revisit this if/when we do multiple compilation for
+    //       HVM-vs-PV guests, as PAE PV guests could get away without doing
+    //       subshadows.
+    //
+    sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
+    sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
+#else
+    sh_get_ref(smfn, 0);
+    sh_pin(smfn);
+#endif
+
+#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+    // PAE 3-on-3 shadows have to keep track of which vcpu's are using
+    // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY
+    // case from validate_gl3e().  Search for SHADOW_SET_L3PAE_RECOPY
+    // in the code for more info.
+    //
+    {
+        struct pae_l3_bookkeeping *info =
+            sl3p_to_info(v->arch.shadow_vtable);
+        ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
+        set_bit(v->vcpu_id, &info->vcpus);
+    }
+#endif
+
+    debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
+                      __func__, gmfn, smfn);
+
+    ///
+    /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
+    ///
+    if ( shadow_mode_external(d) )
+    {
+        ASSERT(hvm_guest(v));
+        make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
+
+#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
+#if SHADOW_PAGING_LEVELS != 3
+#error unexpected combination of GUEST and SHADOW paging levels
+#endif
+        /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
+        {
+            mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
+            int i;
+
+            ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
+                   virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
+            for (i = 0; i < 4; i++)
+            {
+                v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
+                    shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
+            }
+        }
+#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+        /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
+         * If paging is disabled, clear l3e reserved bits; otherwise 
+         * remove entries that have reserved bits set. */
+        v->arch.hvm_vcpu.hw_cr3 =
+            hvm_pae_copy_root(v, v->arch.shadow_vtable, 
+                              !shadow_vcpu_mode_translate(v));
+#else
+        /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
+        v->arch.hvm_vcpu.hw_cr3 =
+            pagetable_get_paddr(v->arch.shadow_table);
+#endif
+    }
+    else // not shadow_mode_external...
+    {
+        /* We don't support PV except guest == shadow == config levels */
+        BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
+        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
+    }
+
+    /* Fix up the linear pagetable mappings */
+    sh_update_linear_entries(v);
+}
+
+
+/**************************************************************************/
+/* Functions to revoke guest rights */
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
+/* Look up this vaddr in the current shadow and see if it's a writeable
+ * mapping of this gmfn.  If so, remove it.  Returns 1 if it worked. */
+{
+    shadow_l1e_t sl1e, *sl1p;
+    shadow_l2e_t *sl2p;
+#if GUEST_PAGING_LEVELS >= 3
+    shadow_l3e_t *sl3p;
+#if GUEST_PAGING_LEVELS >= 4
+    shadow_l4e_t *sl4p;
+#endif
+#endif
+    mfn_t sl1mfn;
+
+
+    /* Carefully look in the shadow linear map for the l1e we expect */
+    if ( v->arch.shadow_vtable == NULL ) return 0;
+#if GUEST_PAGING_LEVELS >= 4
+    sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
+    if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
+        return 0;
+    sl3p = sh_linear_l3_table(v) + shadow_l3_linear_offset(vaddr);
+    if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
+        return 0;
+#elif GUEST_PAGING_LEVELS == 3
+    sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable) 
+        + shadow_l3_linear_offset(vaddr);
+    if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
+        return 0;
+#endif
+    sl2p = sh_linear_l2_table(v) + shadow_l2_linear_offset(vaddr);
+    if ( !(shadow_l2e_get_flags(*sl2p) & _PAGE_PRESENT) )
+        return 0;
+    sl1p = sh_linear_l1_table(v) + shadow_l1_linear_offset(vaddr);
+    sl1e = *sl1p;
+    if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW))
+          != (_PAGE_PRESENT|_PAGE_RW))
+         || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) )
+        return 0;
+
+    /* Found it!  Need to remove its write permissions. */
+    sl1mfn = shadow_l2e_get_mfn(*sl2p);
+    sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
+    shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
+    return 1;
+}
+#endif
+
+int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn)
+/* Excises all writeable mappings to readonly_mfn from this l1 shadow table */
+{
+    shadow_l1e_t *sl1e;
+    int done = 0;
+    int flags;
+    
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        flags = shadow_l1e_get_flags(*sl1e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (flags & _PAGE_RW) 
+             && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
+        {
+            shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
+            if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info
+                  & PGT_count_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+
+
+int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
+/* Excises all mappings to guest frame from this shadow l1 table */
+{
+    shadow_l1e_t *sl1e;
+    int done = 0;
+    int flags;
+    
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, 
+    {
+        flags = shadow_l1e_get_flags(*sl1e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(target_mfn)) )
+        {
+            shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
+            if ( (mfn_to_page(target_mfn)->count_info & PGC_count_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+
+/**************************************************************************/
+/* Functions to excise all pointers to shadows from higher-level shadows. */
+
+void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
+/* Blank out a single shadow entry */
+{
+    switch (mfn_to_page(smfn)->count_info & PGC_SH_type_mask) 
+    {
+    case PGC_SH_l1_shadow:
+        shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
+    case PGC_SH_l2_shadow:
+#if GUEST_PAGING_LEVELS == 3
+    case PGC_SH_l2h_shadow:
+#endif
+        shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
+#if GUEST_PAGING_LEVELS >= 3
+    case PGC_SH_l3_shadow:
+        shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
+#if GUEST_PAGING_LEVELS >= 4
+    case PGC_SH_l4_shadow:
+        shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
+#endif
+#endif
+    default: BUG(); /* Called with the wrong kind of shadow. */
+    }
+}
+
+int sh_remove_l1_shadow(struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn)
+/* Remove all mappings of this l1 shadow from this l2 shadow */
+{
+    shadow_l2e_t *sl2e;
+    int done = 0;
+    int flags;
+#if GUEST_PAGING_LEVELS != 4
+    int xen_mappings = !shadow_mode_external(v->domain);
+#endif
+    
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, done, xen_mappings, 
+    {
+        flags = shadow_l2e_get_flags(*sl2e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
+        {
+            shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+            if ( (mfn_to_page(sl1mfn)->count_info & PGC_SH_type_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+
+#if GUEST_PAGING_LEVELS >= 3
+int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
+/* Remove all mappings of this l2 shadow from this l3 shadow */
+{
+    shadow_l3e_t *sl3e;
+    int done = 0;
+    int flags;
+    
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, done, 
+    {
+        flags = shadow_l3e_get_flags(*sl3e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
+        {
+            shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
+            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+
+#if GUEST_PAGING_LEVELS >= 4
+int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
+/* Remove all mappings of this l3 shadow from this l4 shadow */
+{
+    shadow_l4e_t *sl4e;
+    int done = 0;
+    int flags, xen_mappings = !shadow_mode_external(v->domain);
+    
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, done, xen_mappings,
+    {
+        flags = shadow_l4e_get_flags(*sl4e);
+        if ( (flags & _PAGE_PRESENT) 
+             && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
+        {
+            shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
+            if ( (mfn_to_page(sl3mfn)->count_info & PGC_SH_type_mask) == 0 )
+                /* This breaks us cleanly out of the FOREACH macro */
+                done = 1;
+        }
+    });
+    return done;
+}
+#endif /* 64bit guest */ 
+#endif /* PAE guest */
+
+/**************************************************************************/
+/* Handling HVM guest writes to pagetables  */
+
+/* Check that the user is allowed to perform this write. 
+ * Returns a mapped pointer to write to, and the mfn it's on,
+ * or NULL for error. */
+static inline void * emulate_map_dest(struct vcpu *v,
+                                      unsigned long vaddr,
+                                      struct x86_emulate_ctxt *ctxt,
+                                      mfn_t *mfnp)
+{
+    walk_t gw;
+    u32 flags;
+    gfn_t gfn;
+    mfn_t mfn;
+
+    guest_walk_tables(v, vaddr, &gw, 1);
+    flags = accumulate_guest_flags(&gw);
+    gfn = guest_l1e_get_gfn(gw.eff_l1e);
+    mfn = vcpu_gfn_to_mfn(v, gfn);
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+
+    if ( !(flags & _PAGE_PRESENT) 
+         || !(flags & _PAGE_RW) 
+         || (!(flags & _PAGE_USER) && ring_3(ctxt->regs)) )
+    {
+        /* This write would have faulted even on bare metal */
+        v->arch.shadow.propagate_fault = 1;
+        return NULL;
+    }
+    
+    if ( !valid_mfn(mfn) )
+    {
+        /* Attempted a write to a bad gfn.  This should never happen:
+         * after all, we're here because this write is to a page table. */
+        BUG();
+    }
+
+    ASSERT(sh_mfn_is_a_page_table(mfn));
+    *mfnp = mfn;
+    return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
+}
+
+int
+sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
+                      u32 bytes, struct x86_emulate_ctxt *ctxt)
+{
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    while ( bytes > 0 )
+    {
+        mfn_t mfn;
+        int bytes_on_page;
+        void *addr;
+
+        bytes_on_page = PAGE_SIZE - (vaddr & ~PAGE_MASK);
+        if ( bytes_on_page > bytes )
+            bytes_on_page = bytes;
+
+        if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
+            return X86EMUL_PROPAGATE_FAULT;
+        memcpy(addr, src, bytes_on_page);
+        shadow_validate_guest_pt_write(v, mfn, addr, bytes_on_page);
+        bytes -= bytes_on_page;
+        /* If we are writing zeros to this page, might want to unshadow */
+        if ( *(u8 *)addr == 0 )
+            check_for_early_unshadow(v, mfn);
+        sh_unmap_domain_page(addr);
+    }
+    shadow_audit_tables(v);
+    return X86EMUL_CONTINUE;
+}
+
+int
+sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, 
+                        unsigned long old, unsigned long new,
+                        unsigned int bytes, struct x86_emulate_ctxt *ctxt)
+{
+    mfn_t mfn;
+    void *addr;
+    unsigned long prev;
+    int rv = X86EMUL_CONTINUE;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    ASSERT(bytes <= sizeof (unsigned long));
+
+    if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
+        return X86EMUL_PROPAGATE_FAULT;
+
+    switch (bytes) 
+    {
+    case 1: prev = cmpxchg(((u8 *)addr), old, new);  break;
+    case 2: prev = cmpxchg(((u16 *)addr), old, new); break;
+    case 4: prev = cmpxchg(((u32 *)addr), old, new); break;
+    case 8: prev = cmpxchg(((u64 *)addr), old, new); break;
+    default:
+        SHADOW_PRINTK("cmpxchg of size %i is not supported\n", bytes);
+        prev = ~old;
+    }
+
+    if ( (prev == old)  )
+        shadow_validate_guest_pt_write(v, mfn, addr, bytes);
+    else
+        rv = X86EMUL_CMPXCHG_FAILED;
+
+    SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
+                  " wanted %#lx now %#lx bytes %u\n",
+                  vaddr, prev, old, new, *(unsigned long *)addr, bytes);
+
+    /* If we are writing zeros to this page, might want to unshadow */
+    if ( *(u8 *)addr == 0 )
+        check_for_early_unshadow(v, mfn);
+
+    sh_unmap_domain_page(addr);
+    shadow_audit_tables(v);
+    check_for_early_unshadow(v, mfn);
+    return rv;
+}
+
+int
+sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, 
+                          unsigned long old_lo, unsigned long old_hi,
+                          unsigned long new_lo, unsigned long new_hi,
+                          struct x86_emulate_ctxt *ctxt)
+{
+    mfn_t mfn;
+    void *addr;
+    u64 old, new, prev;
+    int rv = X86EMUL_CONTINUE;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
+        return X86EMUL_PROPAGATE_FAULT;
+
+    old = (((u64) old_hi) << 32) | (u64) old_lo;
+    new = (((u64) new_hi) << 32) | (u64) new_lo;
+    prev = cmpxchg(((u64 *)addr), old, new);
+
+    if ( (prev == old)  )
+        shadow_validate_guest_pt_write(v, mfn, addr, 8);
+    else
+        rv = X86EMUL_CMPXCHG_FAILED;
+
+    /* If we are writing zeros to this page, might want to unshadow */
+    if ( *(u8 *)addr == 0 )
+        check_for_early_unshadow(v, mfn);
+
+    sh_unmap_domain_page(addr);
+    shadow_audit_tables(v);
+    check_for_early_unshadow(v, mfn);
+    return rv;
+}
+
+
+/**************************************************************************/
+/* Audit tools */
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
+
+#define AUDIT_FAIL(_level, _fmt, _a...) do {                               \
+    printk("Shadow %u-on-%u audit failed at level %i, index %i\n"         \
+           "gl" #_level "mfn = %" SH_PRI_mfn                              \
+           " sl" #_level "mfn = %" SH_PRI_mfn                             \
+           " &gl" #_level "e = %p &sl" #_level "e = %p"                    \
+           " gl" #_level "e = %" SH_PRI_gpte                              \
+           " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n",        \
+           GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS,                      \
+           _level, guest_index(gl ## _level ## e),                         \
+           mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn),         \
+           gl ## _level ## e, sl ## _level ## e,                           \
+           gl ## _level ## e->l ## _level, sl ## _level ## e->l ## _level, \
+           ##_a);                                                          \
+    BUG();                                                                 \
+    done = 1;                                                              \
+} while (0)
+
+
+static char * sh_audit_flags(struct vcpu *v, int level,
+                              int gflags, int sflags) 
+/* Common code for auditing flag bits */
+{
+    if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_PRESENT) )
+        return "shadow is present but guest is not present";
+    if ( (sflags & _PAGE_GLOBAL) && !hvm_guest(v) ) 
+        return "global bit set in PV shadow";
+    if ( (level == 1 || (level == 2 && (gflags & _PAGE_PSE)))
+         && ((sflags & _PAGE_DIRTY) && !(gflags & _PAGE_DIRTY)) ) 
+        return "dirty bit not propagated";
+    if ( level == 2 && (sflags & _PAGE_PSE) )
+        return "PS bit set in shadow";
+#if SHADOW_PAGING_LEVELS == 3
+    if ( level == 3 ) return NULL; /* All the other bits are blank in PAEl3 */
+#endif
+    if ( (sflags & _PAGE_USER) != (gflags & _PAGE_USER) ) 
+        return "user/supervisor bit does not match";
+    if ( (sflags & _PAGE_NX_BIT) != (gflags & _PAGE_NX_BIT) ) 
+        return "NX bit does not match";
+    if ( (sflags & _PAGE_RW) && !(gflags & _PAGE_RW) ) 
+        return "shadow grants write access but guest does not";
+    if ( (sflags & _PAGE_ACCESSED) && !(gflags & _PAGE_ACCESSED) ) 
+        return "accessed bit not propagated";
+    return NULL;
+}
+
+static inline mfn_t
+audit_gfn_to_mfn(struct vcpu *v, gfn_t gfn, mfn_t gmfn)
+/* Convert this gfn to an mfn in the manner appropriate for the
+ * guest pagetable it's used in (gmfn) */ 
+{
+    if ( !shadow_mode_translate(v->domain) )
+        return _mfn(gfn_x(gfn));
+    
+    if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_type_mask)
+         != PGT_writable_page ) 
+        return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
+    else 
+        return sh_gfn_to_mfn(v->domain, gfn_x(gfn));
+} 
+
+
+int sh_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
+{
+    guest_l1e_t *gl1e, *gp;
+    shadow_l1e_t *sl1e;
+    mfn_t mfn, gmfn, gl1mfn;
+    gfn_t gfn;
+    char *s;
+    int done = 0;
+
+    /* Follow the backpointer */
+    gl1mfn = _mfn(mfn_to_page(sl1mfn)->u.inuse.type_info);
+    gl1e = gp = sh_map_domain_page(gl1mfn);
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, {
+
+        s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e),
+                            shadow_l1e_get_flags(*sl1e));
+        if ( s ) AUDIT_FAIL(1, "%s", s);
+
+        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
+        {
+            gfn = guest_l1e_get_gfn(*gl1e);
+            mfn = shadow_l1e_get_mfn(*sl1e);
+            gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn);
+            if ( mfn_x(gmfn) != mfn_x(mfn) )
+                AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
+                           " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
+        }
+    });
+    sh_unmap_domain_page(gp);
+    return done;
+}
+
+int sh_audit_fl1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
+{
+    guest_l1e_t *gl1e, e;
+    shadow_l1e_t *sl1e;
+    mfn_t gl1mfn = _mfn(INVALID_MFN);
+    int f;
+    int done = 0;
+
+    /* fl1 has no useful backpointer: all we can check are flags */
+    e = guest_l1e_from_gfn(_gfn(0), 0); gl1e = &e; /* Needed for macro */
+    SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, {
+        f = shadow_l1e_get_flags(*sl1e);
+        f &= ~(_PAGE_AVAIL0|_PAGE_AVAIL1|_PAGE_AVAIL2);
+        if ( !(f == 0 
+               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
+                        _PAGE_ACCESSED|_PAGE_DIRTY) 
+               || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)) )
+            AUDIT_FAIL(1, "fl1e has bad flags");
+    });
+    return 0;
+}
+
+int sh_audit_l2_table(struct vcpu *v, mfn_t sl2mfn, mfn_t x)
+{
+    guest_l2e_t *gl2e, *gp;
+    shadow_l2e_t *sl2e;
+    mfn_t mfn, gmfn, gl2mfn;
+    gfn_t gfn;
+    char *s;
+    int done = 0;
+#if GUEST_PAGING_LEVELS != 4
+    int xen_mappings = !shadow_mode_external(v->domain);
+#endif
+
+    /* Follow the backpointer */
+    gl2mfn = _mfn(mfn_to_page(sl2mfn)->u.inuse.type_info);
+    gl2e = gp = sh_map_domain_page(gl2mfn);
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, &gl2e, done, xen_mappings, {
+
+        s = sh_audit_flags(v, 2, guest_l2e_get_flags(*gl2e),
+                            shadow_l2e_get_flags(*sl2e));
+        if ( s ) AUDIT_FAIL(2, "%s", s);
+
+        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
+        {
+            gfn = guest_l2e_get_gfn(*gl2e);
+            mfn = shadow_l2e_get_mfn(*sl2e);
+            gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)  
+                ? get_fl1_shadow_status(v, gfn)
+                : get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl2mfn), 
+                                    PGC_SH_l1_shadow);
+            if ( mfn_x(gmfn) != mfn_x(mfn) )
+                AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
+                           " (--> %" SH_PRI_mfn ")"
+                           " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+                           gfn_x(gfn), 
+                           (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
+                           : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)),
+                           mfn_x(gmfn), mfn_x(mfn));
+        }
+    });
+    sh_unmap_domain_page(gp);
+    return 0;
+}
+
+#if GUEST_PAGING_LEVELS >= 3
+int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
+{
+    guest_l3e_t *gl3e, *gp;
+    shadow_l3e_t *sl3e;
+    mfn_t mfn, gmfn, gl3mfn;
+    gfn_t gfn;
+    char *s;
+    int done = 0;
+
+    /* Follow the backpointer */
+    gl3mfn = _mfn(mfn_to_page(sl3mfn)->u.inuse.type_info);
+    gl3e = gp = sh_map_domain_page(gl3mfn);
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, &gl3e, done, {
+
+        s = sh_audit_flags(v, 3, guest_l3e_get_flags(*gl3e),
+                            shadow_l3e_get_flags(*sl3e));
+        if ( s ) AUDIT_FAIL(3, "%s", s);
+
+        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
+        {
+            gfn = guest_l3e_get_gfn(*gl3e);
+            mfn = shadow_l3e_get_mfn(*sl3e);
+            gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl3mfn), 
+                                     (GUEST_PAGING_LEVELS == 3 
+                                      && !shadow_mode_external(v->domain)
+                                      && (guest_index(gl3e) % 4) == 3)
+                                     ? PGC_SH_l2h_pae_shadow
+                                     : PGC_SH_l2_shadow);
+            if ( mfn_x(gmfn) != mfn_x(mfn) )
+                AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn
+                           " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
+        }
+    });
+    sh_unmap_domain_page(gp);
+    return 0;
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+#if GUEST_PAGING_LEVELS >= 4
+int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
+{
+    guest_l4e_t *gl4e, *gp;
+    shadow_l4e_t *sl4e;
+    mfn_t mfn, gmfn, gl4mfn;
+    gfn_t gfn;
+    char *s;
+    int done = 0;
+    int xen_mappings = !shadow_mode_external(v->domain);
+
+    /* Follow the backpointer */
+    gl4mfn = _mfn(mfn_to_page(sl4mfn)->u.inuse.type_info);
+    gl4e = gp = sh_map_domain_page(gl4mfn);
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, &gl4e, done, xen_mappings,
+    {
+        s = sh_audit_flags(v, 4, guest_l4e_get_flags(*gl4e),
+                            shadow_l4e_get_flags(*sl4e));
+        if ( s ) AUDIT_FAIL(4, "%s", s);
+
+        if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
+        {
+            gfn = guest_l4e_get_gfn(*gl4e);
+            mfn = shadow_l4e_get_mfn(*sl4e);
+            gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl4mfn), 
+                                     PGC_SH_l3_shadow);
+            if ( mfn_x(gmfn) != mfn_x(mfn) )
+                AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn
+                           " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+                           gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
+        }
+    });
+    sh_unmap_domain_page(gp);
+    return 0;
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+
+#undef AUDIT_FAIL
+
+#endif /* Audit code */
+
+/**************************************************************************/
+/* Entry points into this mode of the shadow code.
+ * This will all be mangled by the preprocessor to uniquify everything. */
+struct shadow_paging_mode sh_paging_mode = {
+    .page_fault             = sh_page_fault, 
+    .invlpg                 = sh_invlpg,
+    .gva_to_gpa             = sh_gva_to_gpa,
+    .gva_to_gfn             = sh_gva_to_gfn,
+    .update_cr3             = sh_update_cr3,
+    .map_and_validate_gl1e  = sh_map_and_validate_gl1e,
+    .map_and_validate_gl2e  = sh_map_and_validate_gl2e,
+    .map_and_validate_gl2he = sh_map_and_validate_gl2he,
+    .map_and_validate_gl3e  = sh_map_and_validate_gl3e,
+    .map_and_validate_gl4e  = sh_map_and_validate_gl4e,
+    .detach_old_tables      = sh_detach_old_tables,
+    .x86_emulate_write      = sh_x86_emulate_write,
+    .x86_emulate_cmpxchg    = sh_x86_emulate_cmpxchg,
+    .x86_emulate_cmpxchg8b  = sh_x86_emulate_cmpxchg8b,
+    .make_monitor_table     = sh_make_monitor_table,
+    .destroy_monitor_table  = sh_destroy_monitor_table,
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+    .guess_wrmap            = sh_guess_wrmap,
+#endif
+    .guest_levels           = GUEST_PAGING_LEVELS,
+    .shadow_levels          = SHADOW_PAGING_LEVELS,
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End: 
+ */
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/mm/shadow/multi.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/shadow/multi.h    Mon Aug 28 12:09:36 2006 +0100
@@ -0,0 +1,116 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/multi.h
+ *
+ * Shadow declarations which will be multiply compiled.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl1mfn, void *new_gl1p, u32 size);
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size);
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl2mfn, void *new_gl2p, u32 size);
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl3mfn, void *new_gl3p, u32 size);
+extern int 
+SHADOW_INTERNAL_NAME(sh_map_and_validate_gl4e, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t gl4mfn, void *new_gl4p, u32 size);
+
+extern void 
+SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t smfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t smfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t smfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, SHADOW_LEVELS, GUEST_LEVELS)(
+    struct vcpu *v, mfn_t smfn);
+
+extern void
+SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows, 3, 3)
+    (struct vcpu *v, mfn_t smfn);
+
+extern void 
+SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl2mfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl3mfn);
+extern void 
+SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl4mfn);
+
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn);
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
+
+extern void
+SHADOW_INTERNAL_NAME(sh_clear_shadow_entry, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, void *ep, mfn_t smfn);
+
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_l1_shadow, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn);
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_l2_shadow, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn);
+extern int
+SHADOW_INTERNAL_NAME(sh_remove_l3_shadow, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn);
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
+int 
+SHADOW_INTERNAL_NAME(sh_audit_l1_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl1mfn, mfn_t x);
+int 
+SHADOW_INTERNAL_NAME(sh_audit_fl1_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl1mfn, mfn_t x);
+int 
+SHADOW_INTERNAL_NAME(sh_audit_l2_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl2mfn, mfn_t x);
+int 
+SHADOW_INTERNAL_NAME(sh_audit_l3_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl3mfn, mfn_t x);
+int 
+SHADOW_INTERNAL_NAME(sh_audit_l4_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t sl4mfn, mfn_t x);
+#endif
+
+#if SHADOW_LEVELS == GUEST_LEVELS
+extern mfn_t
+SHADOW_INTERNAL_NAME(sh_make_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v);
+extern void
+SHADOW_INTERNAL_NAME(sh_destroy_monitor_table, SHADOW_LEVELS, GUEST_LEVELS)
+    (struct vcpu *v, mfn_t mmfn);
+#endif
+
+extern struct shadow_paging_mode 
+SHADOW_INTERNAL_NAME(sh_paging_mode, SHADOW_LEVELS, GUEST_LEVELS);
diff -r 5b9ff5e8653a -r fab84f9c0ce6 xen/arch/x86/mm/shadow/page-guest32.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/shadow/page-guest32.h     Mon Aug 28 12:09:36 2006 +0100
@@ -0,0 +1,105 @@
+
+#ifndef __X86_PAGE_GUEST_H__
+#define __X86_PAGE_GUEST_H__
+
+#ifndef __ASSEMBLY__
+# include <asm/types.h>
+#endif
+
+#define PAGETABLE_ORDER_32         10
+#define L1_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
+#define L2_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
+#define ROOT_PAGETABLE_ENTRIES_32  L2_PAGETABLE_ENTRIES_32
+
+
+#define L1_PAGETABLE_SHIFT_32 12
+#define L2_PAGETABLE_SHIFT_32 22
+
+/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
+
+#ifndef __ASSEMBLY__
+
+typedef u32 intpte_32_t;
+
+typedef struct { intpte_32_t l1; } l1_pgentry_32_t;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.