[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Merge.



# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1194021491 0
# Node ID 650cadd1b28303dae4927ca65e64eb2507e3a3f3
# Parent  838e77a41a3c53a54428e642cb0440a8a6f8912b
# Parent  db9f62d8f7f4d2d8f8ccf7c512623977132bcffa
Merge.
---
 xen/arch/x86/hvm/hvm.c            |   36 +
 xen/arch/x86/hvm/platform.c       |   25 -
 xen/arch/x86/hvm/svm/svm.c        |   13 
 xen/arch/x86/hvm/vmx/vmx.c        |   14 
 xen/arch/x86/mm/hap/guest_walk.c  |    4 
 xen/arch/x86/mm/hap/hap.c         |    2 
 xen/arch/x86/mm/hap/private.h     |    9 
 xen/arch/x86/mm/p2m.c             |    6 
 xen/arch/x86/mm/shadow/common.c   |   16 
 xen/arch/x86/mm/shadow/multi.c    |  689 ++++++++++++++++----------------------
 xen/arch/x86/mm/shadow/private.h  |   13 
 xen/arch/x86/mm/shadow/types.h    |   49 +-
 xen/include/asm-x86/hvm/support.h |    1 
 xen/include/asm-x86/paging.h      |   18 
 xen/include/asm-x86/perfc_defn.h  |    8 
 15 files changed, 420 insertions(+), 483 deletions(-)

diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/hvm/hvm.c    Fri Nov 02 16:38:11 2007 +0000
@@ -931,6 +931,7 @@ static void *hvm_map(unsigned long va, i
 {
     unsigned long gfn, mfn;
     p2m_type_t p2mt;
+    uint32_t pfec;
 
     if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE )
     {
@@ -939,11 +940,15 @@ static void *hvm_map(unsigned long va, i
         return NULL;
     }
 
-    gfn = paging_gva_to_gfn(current, va);
+    /* We're mapping on behalf of the segment-load logic, which might
+     * write the accessed flags in the descriptors (in 32-bit mode), but
+     * we still treat it as a kernel-mode read (i.e. no access checks). */
+    pfec = PFEC_page_present;
+    gfn = paging_gva_to_gfn(current, va, &pfec);
     mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
     if ( !p2m_is_ram(p2mt) )
     {
-        hvm_inject_exception(TRAP_page_fault, PFEC_write_access, va);
+        hvm_inject_exception(TRAP_page_fault, pfec, va);
         return NULL;
     }
 
@@ -1263,14 +1268,24 @@ void hvm_task_switch(
  *  @size = number of bytes to copy
  *  @dir  = copy *to* guest (TRUE) or *from* guest (FALSE)?
  *  @virt = addr is *virtual* (TRUE) or *guest physical* (FALSE)?
+ *  @fetch = copy is an instruction fetch?
  * Returns number of bytes failed to copy (0 == complete success).
  */
-static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
+static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, 
+                      int virt, int fetch)
 {
     unsigned long gfn, mfn;
     p2m_type_t p2mt;
     char *p;
     int count, todo;
+    uint32_t pfec = PFEC_page_present;
+
+    if ( dir ) 
+        pfec |= PFEC_write_access;
+    if ( ring_3(guest_cpu_user_regs()) )
+        pfec |= PFEC_user_mode;
+    if ( fetch ) 
+        pfec |= PFEC_insn_fetch;
 
     todo = size;
     while ( todo > 0 )
@@ -1278,7 +1293,7 @@ static int __hvm_copy(void *buf, paddr_t
         count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
 
         if ( virt )
-            gfn = paging_gva_to_gfn(current, addr);
+            gfn = paging_gva_to_gfn(current, addr, &pfec);
         else
             gfn = addr >> PAGE_SHIFT;
         
@@ -1310,22 +1325,27 @@ static int __hvm_copy(void *buf, paddr_t
 
 int hvm_copy_to_guest_phys(paddr_t paddr, void *buf, int size)
 {
-    return __hvm_copy(buf, paddr, size, 1, 0);
+    return __hvm_copy(buf, paddr, size, 1, 0, 0);
 }
 
 int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size)
 {
-    return __hvm_copy(buf, paddr, size, 0, 0);
+    return __hvm_copy(buf, paddr, size, 0, 0, 0);
 }
 
 int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size)
 {
-    return __hvm_copy(buf, vaddr, size, 1, 1);
+    return __hvm_copy(buf, vaddr, size, 1, 1, 0);
 }
 
 int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size)
 {
-    return __hvm_copy(buf, vaddr, size, 0, 1);
+    return __hvm_copy(buf, vaddr, size, 0, 1, 0);
+}
+
+int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size)
+{
+    return __hvm_copy(buf, vaddr, size, 0, 1, hvm_nx_enabled(current));
 }
 
 
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/hvm/platform.c       Fri Nov 02 16:38:11 2007 +0000
@@ -833,7 +833,7 @@ int inst_copy_from_guest(unsigned char *
 {
     if ( inst_len > MAX_INST_LEN || inst_len <= 0 )
         return 0;
-    if ( hvm_copy_from_guest_virt(buf, guest_eip, inst_len) )
+    if ( hvm_fetch_from_guest_virt(buf, guest_eip, inst_len) )
         return 0;
     return inst_len;
 }
@@ -1075,6 +1075,7 @@ void handle_mmio(unsigned long gpa)
         unsigned long addr, gfn; 
         paddr_t paddr;
         int dir, size = op_size;
+        uint32_t pfec;
 
         ASSERT(count);
 
@@ -1082,8 +1083,11 @@ void handle_mmio(unsigned long gpa)
         addr = regs->edi;
         if ( ad_size == WORD )
             addr &= 0xFFFF;
-        addr += hvm_get_segment_base(v, x86_seg_es);
-        gfn = paging_gva_to_gfn(v, addr);
+        addr += hvm_get_segment_base(v, x86_seg_es);        
+        pfec = PFEC_page_present | PFEC_write_access;
+        if ( ring_3(regs) )
+            pfec |= PFEC_user_mode;
+        gfn = paging_gva_to_gfn(v, addr, &pfec);
         paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
         if ( paddr == gpa )
         {
@@ -1105,7 +1109,8 @@ void handle_mmio(unsigned long gpa)
             default: domain_crash_synchronous();
             }
             addr += hvm_get_segment_base(v, seg);
-            gfn = paging_gva_to_gfn(v, addr);
+            pfec &= ~PFEC_write_access;
+            gfn = paging_gva_to_gfn(v, addr, &pfec);
             paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
         }
         else
@@ -1115,12 +1120,9 @@ void handle_mmio(unsigned long gpa)
         {
             /* The guest does not have the non-mmio address mapped. 
              * Need to send in a page fault */
-            int errcode = 0;
-            /* IO read --> memory write */
-            if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
             regs->eip -= inst_len; /* do not advance %eip */
             regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */
-            hvm_inject_exception(TRAP_page_fault, errcode, addr);
+            hvm_inject_exception(TRAP_page_fault, pfec, addr);
             return;
         }
 
@@ -1308,10 +1310,9 @@ void handle_mmio(unsigned long gpa)
 
 DEFINE_PER_CPU(int, guest_handles_in_xen_space);
 
-/* Note that copy_{to,from}_user_hvm don't set the A and D bits on
-   PTEs, and require the PTE to be writable even when they're only
-   trying to read from it.  The guest is expected to deal with
-   this. */
+/* Note that copy_{to,from}_user_hvm require the PTE to be writable even
+   when they're only trying to read from it.  The guest is expected to
+   deal with this. */
 unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len)
 {
     if ( this_cpu(guest_handles_in_xen_space) )
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c        Fri Nov 02 16:38:11 2007 +0000
@@ -1441,6 +1441,7 @@ static void svm_io_instruction(struct vc
         unsigned long addr, count;
         paddr_t paddr;
         unsigned long gfn;
+        uint32_t pfec;
         int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
 
         if (!svm_get_io_address(v, regs, size, info, &count, &addr))
@@ -1459,15 +1460,17 @@ static void svm_io_instruction(struct vc
         }
 
         /* Translate the address to a physical address */
-        gfn = paging_gva_to_gfn(v, addr);
+        pfec = PFEC_page_present;
+        if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
+            pfec |= PFEC_write_access;
+        if ( ring_3(regs) )
+            pfec |= PFEC_user_mode;
+        gfn = paging_gva_to_gfn(v, addr, &pfec);
         if ( gfn == INVALID_GFN ) 
         {
             /* The guest does not have the RAM address mapped. 
              * Need to send in a page fault */
-            int errcode = 0;
-            /* IO read --> memory write */
-            if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
-            svm_hvm_inject_exception(TRAP_page_fault, errcode, addr);
+            svm_hvm_inject_exception(TRAP_page_fault, pfec, addr);
             return;
         }
         paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Fri Nov 02 16:38:11 2007 +0000
@@ -1642,7 +1642,7 @@ static void vmx_do_str_pio(unsigned long
     unsigned long addr, count = 1, base;
     paddr_t paddr;
     unsigned long gfn;
-    u32 ar_bytes, limit;
+    u32 ar_bytes, limit, pfec;
     int sign;
     int long_mode = 0;
 
@@ -1714,15 +1714,17 @@ static void vmx_do_str_pio(unsigned long
 #endif
 
     /* Translate the address to a physical address */
-    gfn = paging_gva_to_gfn(current, addr);
+    pfec = PFEC_page_present;
+    if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
+        pfec |= PFEC_write_access;
+    if ( ring_3(regs) )
+        pfec |= PFEC_user_mode;
+    gfn = paging_gva_to_gfn(current, addr, &pfec);
     if ( gfn == INVALID_GFN )
     {
         /* The guest does not have the RAM address mapped.
          * Need to send in a page fault */
-        int errcode = 0;
-        /* IO read --> memory write */
-        if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
-        vmx_inject_exception(TRAP_page_fault, errcode, addr);
+        vmx_inject_exception(TRAP_page_fault, pfec, addr);
         return;
     }
     paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c  Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/hap/guest_walk.c  Fri Nov 02 16:38:11 2007 +0000
@@ -40,7 +40,7 @@
 #if GUEST_PAGING_LEVELS > CONFIG_PAGING_LEVELS
 
 unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
-    struct vcpu *v, unsigned long gva)
+    struct vcpu *v, unsigned long gva, uint32_t *pfec)
 {
     gdprintk(XENLOG_ERR,
              "Guest paging level is greater than host paging level!\n");
@@ -61,7 +61,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
 #endif
 
 unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
-    struct vcpu *v, unsigned long gva)
+    struct vcpu *v, unsigned long gva, uint32_t *pfec)
 {
     unsigned long gcr3 = v->arch.hvm_vcpu.guest_cr[3];
     int mode = GUEST_PAGING_LEVELS;
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/hap/hap.c Fri Nov 02 16:38:11 2007 +0000
@@ -695,7 +695,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi
 }
 
 static unsigned long hap_gva_to_gfn_real_mode(
-    struct vcpu *v, unsigned long gva)
+    struct vcpu *v, unsigned long gva, uint32_t *pfec)
 {
     return ((paddr_t)gva >> PAGE_SHIFT);
 }
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/hap/private.h
--- a/xen/arch/x86/mm/hap/private.h     Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/hap/private.h     Fri Nov 02 16:38:11 2007 +0000
@@ -26,9 +26,12 @@
 /********************************************/
 /*          GUEST TRANSLATION FUNCS         */
 /********************************************/
-unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva);
-unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva);
-unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva, 
+                                    uint32_t *pfec);
+unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva,
+                                    uint32_t *pfec);
+unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva,
+                                    uint32_t *pfec);
 
 /********************************************/
 /*            MISC DEFINITIONS              */
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/p2m.c     Fri Nov 02 16:38:11 2007 +0000
@@ -31,7 +31,7 @@
 
 /* Debugging and auditing of the P2M code? */
 #define P2M_AUDIT     0
-#define P2M_DEBUGGING 1
+#define P2M_DEBUGGING 0
 
 /*
  * The P2M lock.  This protects all updates to the p2m table.
@@ -290,11 +290,11 @@ int p2m_alloc_table(struct domain *d,
                     void (*free_page)(struct domain *d, struct page_info *pg))
 
 {
-    mfn_t mfn;
+    mfn_t mfn = _mfn(INVALID_MFN);
     struct list_head *entry;
     struct page_info *page, *p2m_top;
     unsigned int page_count = 0;
-    unsigned long gfn;
+    unsigned long gfn = -1UL;
 
     p2m_lock(d);
 
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/shadow/common.c   Fri Nov 02 16:38:11 2007 +0000
@@ -150,11 +150,13 @@ hvm_read(enum x86_segment seg,
         return rc;
 
     *val = 0;
-    // XXX -- this is WRONG.
-    //        It entirely ignores the permissions in the page tables.
-    //        In this case, that is only a user vs supervisor access check.
-    //
-    if ( (rc = hvm_copy_from_guest_virt(val, addr, bytes)) == 0 )
+
+    if ( access_type == hvm_access_insn_fetch )
+        rc = hvm_fetch_from_guest_virt(val, addr, bytes);
+    else
+        rc = hvm_copy_from_guest_virt(val, addr, bytes);
+
+    if ( rc == 0 ) 
         return X86EMUL_OKAY;
 
     /* If we got here, there was nothing mapped here, or a bad GFN 
@@ -395,7 +397,7 @@ struct x86_emulate_ops *shadow_init_emul
         (!hvm_translate_linear_addr(
             x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
             hvm_access_insn_fetch, sh_ctxt, &addr) &&
-         !hvm_copy_from_guest_virt(
+         !hvm_fetch_from_guest_virt(
              sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
         ? sizeof(sh_ctxt->insn_buf) : 0;
 
@@ -423,7 +425,7 @@ void shadow_continue_emulation(struct sh
                 (!hvm_translate_linear_addr(
                     x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
                     hvm_access_insn_fetch, sh_ctxt, &addr) &&
-                 !hvm_copy_from_guest_virt(
+                 !hvm_fetch_from_guest_virt(
                      sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
                 ? sizeof(sh_ctxt->insn_buf) : 0;
             sh_ctxt->insn_buf_eip = regs->eip;
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c    Fri Nov 02 16:38:11 2007 +0000
@@ -189,7 +189,7 @@ guest_supports_nx(struct vcpu *v)
     if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
         return 0;
     if ( !is_hvm_vcpu(v) )
-        return 1;
+        return cpu_has_nx;
     return hvm_nx_enabled(v);
 }
 
@@ -197,22 +197,119 @@ guest_supports_nx(struct vcpu *v)
 /**************************************************************************/
 /* Functions for walking the guest page tables */
 
-
-/* Walk the guest pagetables, filling the walk_t with what we see. 
- * Takes an uninitialised walk_t.  The caller must call unmap_walk() 
- * on the walk_t before discarding it or calling guest_walk_tables again. 
- * If "guest_op" is non-zero, we are serving a genuine guest memory access, 
+/* Flags that are needed in a pagetable entry, with the sense of NX inverted */
+static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec) 
+{
+    static uint32_t flags[] = {
+        /* I/F -  Usr Wr */
+        /* 0   0   0   0 */ _PAGE_PRESENT, 
+        /* 0   0   0   1 */ _PAGE_PRESENT|_PAGE_RW,
+        /* 0   0   1   0 */ _PAGE_PRESENT|_PAGE_USER,
+        /* 0   0   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
+        /* 0   1   0   0 */ _PAGE_PRESENT, 
+        /* 0   1   0   1 */ _PAGE_PRESENT|_PAGE_RW,
+        /* 0   1   1   0 */ _PAGE_PRESENT|_PAGE_USER,
+        /* 0   1   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
+        /* 1   0   0   0 */ _PAGE_PRESENT|_PAGE_NX_BIT, 
+        /* 1   0   0   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
+        /* 1   0   1   0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
+        /* 1   0   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
+        /* 1   1   0   0 */ _PAGE_PRESENT|_PAGE_NX_BIT, 
+        /* 1   1   0   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
+        /* 1   1   1   0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
+        /* 1   1   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
+    };
+    uint32_t f = flags[(pfec & 0x1f) >> 1];
+    /* Don't demand not-NX if the CPU wouldn't enforce it. */
+    if ( !guest_supports_nx(v) )
+        f &= ~_PAGE_NX_BIT;
+    return f;
+}
+
+/* Read, check and modify a guest pagetable entry.  Returns 0 if the
+ * flags are OK.  Although we use l1e types here, the logic and the bits
+ * are the same for all types except PAE l3es. */
+static int guest_walk_entry(struct vcpu *v, mfn_t gmfn, 
+                            void *gp, void *wp,
+                            uint32_t flags, int level)
+{
+    guest_l1e_t e, old_e;
+    uint32_t gflags;
+    int rc;
+
+    /* Read the guest entry */
+    e = *(guest_l1e_t *)gp;
+
+    /* Check that all the mandatory flag bits are there.  Invert NX, to
+     * calculate as if there were an "X" bit that allowed access. */
+    gflags = guest_l1e_get_flags(e) ^ _PAGE_NX_BIT;
+    rc = ((gflags & flags) != flags);
+    
+    /* Set the accessed/dirty bits */
+    if ( rc == 0 ) 
+    {
+        uint32_t bits = _PAGE_ACCESSED;
+        if ( (flags & _PAGE_RW) // Implies that the action is a write
+             && ((level == 1) || ((level == 2) && (gflags & _PAGE_PSE))) )
+            bits |= _PAGE_DIRTY;
+        old_e = e;
+        e.l1 |= bits;
+        SHADOW_PRINTK("flags %lx bits %lx old_e %llx e %llx\n",
+                      (unsigned long) flags, 
+                      (unsigned long) bits, 
+                      (unsigned long long) old_e.l1, 
+                      (unsigned long long) e.l1);
+        /* Try to write the entry back.  If it's changed under out feet 
+         * then leave it alone */
+        if ( e.l1 != old_e.l1 )
+        {
+            (void) cmpxchg(((guest_intpte_t *)gp), old_e.l1, e.l1);
+            paging_mark_dirty(v->domain, mfn_x(gmfn));
+        }
+    }
+
+    /* Record the entry in the walk */
+    *(guest_l1e_t *)wp = e;
+    return rc;
+}
+
+/* Walk the guest pagetables, after the manner of a hardware walker. 
+ *
+ * Inputs: a vcpu, a virtual address, a walk_t to fill, a 
+ *         pointer to a pagefault code, and a flag "shadow_op".
+ * 
+ * We walk the vcpu's guest pagetables, filling the walk_t with what we
+ * see and adding any Accessed and Dirty bits that are needed in the
+ * guest entries.  Using the pagefault code, we check the permissions as
+ * we go.  For the purposes of reading pagetables we treat all non-RAM
+ * memory as contining zeroes.
+ * 
+ * If "shadow_op" is non-zero, we are serving a genuine guest memory access, 
  * and must (a) be under the shadow lock, and (b) remove write access
- * from any gueat PT pages we see, as we will be using their contents to 
- * perform shadow updates.
- * Returns 0 for success or non-zero if the guest pagetables are malformed.
- * N.B. Finding a not-present entry does not cause a non-zero return code. */
-static inline int 
-guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
+ * from any guest PT pages we see, as we will be shadowing them soon
+ * and will rely on the contents' not having changed.
+ * 
+ * Returns 0 for success or non-zero if the walk did not complete.
+ * N.B. This is different from the old return code but almost no callers
+ * checked the old return code anyway.
+ */
+static int 
+guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, 
+                  uint32_t pfec, int shadow_op)
 {
     struct domain *d = v->domain;
     p2m_type_t p2mt;
-    ASSERT(!guest_op || shadow_locked_by_me(d));
+    guest_l1e_t *l1p;
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+    guest_l1e_t *l2p;
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    guest_l1e_t *l3p;
+#endif    
+#endif
+    uint32_t flags = mandatory_flags(v, pfec);
+    int rc;
+
+    ASSERT(!shadow_op || shadow_locked_by_me(d));
     
     perfc_incr(shadow_guest_walk);
     memset(gw, 0, sizeof(*gw));
@@ -220,84 +317,104 @@ guest_walk_tables(struct vcpu *v, unsign
 
 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
-    /* Get l4e from the top level table */
+    /* Get the l4e from the top level table and check its flags*/
     gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
-    gw->l4e = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable 
-        + guest_l4_table_offset(va);
-    /* Walk down to the l3e */
-    if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
-    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
+    rc = guest_walk_entry(v, gw->l4mfn,
+                          (guest_l4e_t *)v->arch.paging.shadow.guest_vtable
+                          + guest_l4_table_offset(va),
+                          &gw->l4e, flags, 4);
+    if ( rc != 0 ) return rc;
+
+    /* Map the l3 table */
+    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
     if ( !p2m_is_ram(p2mt) ) return 1;
     ASSERT(mfn_valid(gw->l3mfn));
     /* This mfn is a pagetable: make sure the guest can't write to it. */
-    if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
+    if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask); 
-    gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
-        + guest_l3_table_offset(va);
+    /* Get the l3e and check its flags*/
+    l3p = sh_map_domain_page(gw->l3mfn);
+    rc = guest_walk_entry(v, gw->l3mfn, l3p + guest_l3_table_offset(va), 
+                          &gw->l3e, flags, 3);
+    sh_unmap_domain_page(l3p);
+    if ( rc != 0 ) return rc;
+
 #else /* PAE only... */
-    /* Get l3e from the cache of the guest's top level table */
-    gw->l3e = (guest_l3e_t 
*)&v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
+
+    /* Get l3e from the cache of the top level table and check its flag */
+    gw->l3e = v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
+    if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) ) return 1;
+
 #endif /* PAE or 64... */
-    /* Walk down to the l2e */
-    if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
-    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
+
+    /* Map the l2 table */
+    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
     if ( !p2m_is_ram(p2mt) ) return 1;
     ASSERT(mfn_valid(gw->l2mfn));
     /* This mfn is a pagetable: make sure the guest can't write to it. */
-    if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
+    if ( shadow_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask); 
-    gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
-        + guest_l2_table_offset(va);
+    /* Get the l2e */
+    l2p = sh_map_domain_page(gw->l2mfn);
+    rc = guest_walk_entry(v, gw->l2mfn, l2p + guest_l2_table_offset(va),
+                          &gw->l2e, flags, 2);
+    sh_unmap_domain_page(l2p);
+    if ( rc != 0 ) return rc;
+
 #else /* 32-bit only... */
-    /* Get l2e from the top level table */
+
+    /* Get l2e from the top level table and check its flags */
     gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
-    gw->l2e = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable 
-        + guest_l2_table_offset(va);
+    rc = guest_walk_entry(v, gw->l2mfn, 
+                          (guest_l2e_t *)v->arch.paging.shadow.guest_vtable
+                          + guest_l2_table_offset(va),
+                          &gw->l2e, flags, 2);
+    if ( rc != 0 ) return rc;
+
 #endif /* All levels... */
-    
-    if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
+
     if ( guest_supports_superpages(v) &&
-         (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) ) 
+         (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE) ) 
     {
         /* Special case: this guest VA is in a PSE superpage, so there's
          * no guest l1e.  We make one up so that the propagation code
          * can generate a shadow l1 table.  Start with the gfn of the 
          * first 4k-page of the superpage. */
-        gfn_t start = guest_l2e_get_gfn(*gw->l2e);
+        gfn_t start = guest_l2e_get_gfn(gw->l2e);
         /* Grant full access in the l1e, since all the guest entry's 
-         * access controls are enforced in the shadow l2e.  This lets 
-         * us reflect l2 changes later without touching the l1s. */
+         * access controls are enforced in the shadow l2e. */
         int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
                      _PAGE_ACCESSED|_PAGE_DIRTY);
-        /* propagate PWT PCD to level 1 for PSE */
-        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PWT) )
-            flags |= _PAGE_PWT;
-        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PCD) )
-            flags |= _PAGE_PCD;
         /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
-         * of the level 1 */
-        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) 
-            flags |= _PAGE_PAT; 
+         * of the level 1. */
+        if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE_PAT) ) 
+            flags |= _PAGE_PAT;
+        /* Copy the cache-control bits to the l1 as well, because we
+         * can't represent PAT in the (non-PSE) shadow l2e. :(
+         * This could cause problems if a guest ever maps an area of
+         * memory with superpages using more than one caching mode. */
+        flags |= guest_l2e_get_flags(gw->l2e) & (_PAGE_PWT|_PAGE_PCD);
         /* Increment the pfn by the right number of 4k pages.  
          * The ~0x1 is to mask out the PAT bit mentioned above. */
         start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
-        gw->eff_l1e = guest_l1e_from_gfn(start, flags);
-        gw->l1e = NULL;
+        gw->l1e = guest_l1e_from_gfn(start, flags);
         gw->l1mfn = _mfn(INVALID_MFN);
     } 
     else 
     {
         /* Not a superpage: carry on and find the l1e. */
-        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
+        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
         if ( !p2m_is_ram(p2mt) ) return 1;
         ASSERT(mfn_valid(gw->l1mfn));
         /* This mfn is a pagetable: make sure the guest can't write to it. */
-        if ( guest_op 
+        if ( shadow_op 
              && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
             flush_tlb_mask(d->domain_dirty_cpumask); 
-        gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
-            + guest_l1_table_offset(va);
-        gw->eff_l1e = *gw->l1e;
+        l1p = sh_map_domain_page(gw->l1mfn);
+        rc = guest_walk_entry(v, gw->l2mfn, l1p + guest_l1_table_offset(va),
+                              &gw->l1e, flags, 1);
+        sh_unmap_domain_page(l1p);
+        if ( rc != 0 ) return rc;
     }
 
     return 0;
@@ -308,9 +425,9 @@ static inline gfn_t
 static inline gfn_t
 guest_walk_to_gfn(walk_t *gw)
 {
-    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+    if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
         return _gfn(INVALID_GFN);
-    return guest_l1e_get_gfn(gw->eff_l1e);
+    return guest_l1e_get_gfn(gw->l1e);
 }
 
 /* Given a walk_t, translate the gw->va into the guest's notion of the
@@ -318,29 +435,12 @@ static inline paddr_t
 static inline paddr_t
 guest_walk_to_gpa(walk_t *gw)
 {
-    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+    if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
         return 0;
-    return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
-}
-
-
-/* Unmap (and reinitialise) a guest walk.  
- * Call this to dispose of any walk filled in by guest_walk_tables() */
-static void unmap_walk(struct vcpu *v, walk_t *gw)
-{
-#if GUEST_PAGING_LEVELS >= 3
-#if GUEST_PAGING_LEVELS >= 4
-    if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e);
-#endif
-    if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e);
-#endif
-    if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e);
-#ifdef DEBUG
-    memset(gw, 0, sizeof(*gw));
-#endif
-}
-
-
+    return guest_l1e_get_paddr(gw->l1e) + (gw->va & ~PAGE_MASK);
+}
+
+#if 0 /* Keep for debugging */
 /* Pretty-print the contents of a guest-walk */
 static inline void print_gw(walk_t *gw)
 {
@@ -348,26 +448,17 @@ static inline void print_gw(walk_t *gw)
 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
     SHADOW_PRINTK("   l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn));
-    SHADOW_PRINTK("   l4e=%p\n", gw->l4e);
-    if ( gw->l4e )
-        SHADOW_PRINTK("   *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4);
+    SHADOW_PRINTK("   l4e=%" SH_PRI_gpte "\n", gw->l4e.l4);
     SHADOW_PRINTK("   l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn));
 #endif /* PAE or 64... */
-    SHADOW_PRINTK("   l3e=%p\n", gw->l3e);
-    if ( gw->l3e )
-        SHADOW_PRINTK("   *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3);
+    SHADOW_PRINTK("   l3e=%" SH_PRI_gpte "\n", gw->l3e.l3);
 #endif /* All levels... */
     SHADOW_PRINTK("   l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn));
-    SHADOW_PRINTK("   l2e=%p\n", gw->l2e);
-    if ( gw->l2e )
-        SHADOW_PRINTK("   *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2);
+    SHADOW_PRINTK("   l2e=%" SH_PRI_gpte "\n", gw->l2e.l2);
     SHADOW_PRINTK("   l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn));
-    SHADOW_PRINTK("   l1e=%p\n", gw->l1e);
-    if ( gw->l1e )
-        SHADOW_PRINTK("   *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1);
-    SHADOW_PRINTK("   eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1);
-}
-
+    SHADOW_PRINTK("   l1e=%" SH_PRI_gpte "\n", gw->l1e.l1);
+}
+#endif /* 0 */
 
 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
 /* Lightweight audit: pass all the shadows associated with this guest walk
@@ -404,10 +495,10 @@ static void sh_audit_gw(struct vcpu *v, 
          && mfn_valid((smfn = get_shadow_status(v, gw->l1mfn, 
                                                 SH_type_l1_shadow))) )
         (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
-    else if ( gw->l2e
-              && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
+    else if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PRESENT)
+              && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)
               && mfn_valid( 
-              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
+              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(gw->l2e)))) )
         (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
 }
 
@@ -415,85 +506,6 @@ static void sh_audit_gw(struct vcpu *v, 
 #define sh_audit_gw(_v, _gw) do {} while(0)
 #endif /* audit code */
 
-
-
-/**************************************************************************/
-/* Function to write to the guest tables, for propagating accessed and 
- * dirty bits from the shadow to the guest.
- * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
- * and an operation type.  The guest entry is always passed as an l1e: 
- * since we only ever write flags, that's OK.
- * Returns the new flag bits of the guest entry. */
-
-static u32 guest_set_ad_bits(struct vcpu *v,
-                             mfn_t gmfn, 
-                             guest_l1e_t *ep,
-                             unsigned int level, 
-                             fetch_type_t ft)
-{
-    u32 flags;
-    int res = 0;
-
-    ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
-    ASSERT(level <= GUEST_PAGING_LEVELS);
-    ASSERT(shadow_locked_by_me(v->domain));
-
-    flags = guest_l1e_get_flags(*ep);
-
-    /* Only set A and D bits for guest-initiated accesses */
-    if ( !(ft & FETCH_TYPE_DEMAND) )
-        return flags;
-
-    ASSERT(mfn_valid(gmfn)
-           && (sh_mfn_is_a_page_table(gmfn)
-               || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) 
-                   == 0)));
-
-    /* PAE l3s do not have A and D bits */
-    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
-
-    /* Need the D bit as well for writes, in L1es and PSE L2es. */
-    if ( ft == ft_demand_write  
-         && (level == 1 ||
-             (level == 2 && (flags & _PAGE_PSE) && 
guest_supports_superpages(v))) )
-    {
-        if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) 
-             == (_PAGE_DIRTY | _PAGE_ACCESSED) )
-            return flags;  /* Guest already has A and D bits set */
-        flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
-        perfc_incr(shadow_ad_update);
-    }
-    else 
-    {
-        if ( flags & _PAGE_ACCESSED )
-            return flags;  /* Guest already has A bit set */
-        flags |= _PAGE_ACCESSED;
-        perfc_incr(shadow_a_update);
-    }
-
-    /* Set the bit(s) */
-    paging_mark_dirty(v->domain, mfn_x(gmfn));
-    SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
-                 "old flags = %#x, new flags = %#x\n", 
-                 gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), 
-                 flags);
-    *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
-    
-    /* Propagate this change to any other shadows of the page 
-     * (only necessary if there is more than one shadow) */
-    if ( mfn_to_page(gmfn)->count_info & PGC_page_table )
-    {
-        u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask;
-        /* More than one type bit set in shadow-flags? */
-        if ( shflags & ~(1UL << find_first_set_bit(shflags)) )
-            res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep));
-    }
-
-    /* We should never need to flush the TLB or recopy PAE entries */
-    ASSERT((res == 0) || (res == SHADOW_SET_CHANGED));
-
-    return flags;
-}
 
 #if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS == 
SHADOW_PAGING_LEVELS)
 void *
@@ -509,11 +521,9 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
     // FIXME!
 
     shadow_lock(v->domain);
-    guest_walk_tables(v, addr, &gw, 1);
-
-    if ( gw.l2e &&
-         (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) &&
-         !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) & 
_PAGE_PSE)) )
+    guest_walk_tables(v, addr, &gw, 0, 1);
+
+    if ( mfn_valid(gw.l1mfn) )
     {
         if ( gl1mfn )
             *gl1mfn = mfn_x(gw.l1mfn);
@@ -521,7 +531,6 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
             (guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
     }
 
-    unmap_walk(v, &gw);
     shadow_unlock(v->domain);
 
     return pl1e;
@@ -538,9 +547,8 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns
     // FIXME!
 
     shadow_lock(v->domain);
-    guest_walk_tables(v, addr, &gw, 1);
-    *(guest_l1e_t *)eff_l1e = gw.eff_l1e;
-    unmap_walk(v, &gw);
+    guest_walk_tables(v, addr, &gw, 0, 1);
+    *(guest_l1e_t *)eff_l1e = gw.l1e;
     shadow_unlock(v->domain);
 }
 #endif /* CONFIG==SHADOW==GUEST */
@@ -636,17 +644,17 @@ unsigned char pat_type_2_pte_flags(unsig
 
 static always_inline void
 _sh_propagate(struct vcpu *v, 
-              void *guest_entry_ptr, 
-              mfn_t guest_table_mfn, 
+              guest_intpte_t guest_intpte,
               mfn_t target_mfn, 
               void *shadow_entry_ptr,
               int level,
               fetch_type_t ft, 
               p2m_type_t p2mt)
 {
-    guest_l1e_t *gp = guest_entry_ptr;
+    guest_l1e_t guest_entry = { guest_intpte };
     shadow_l1e_t *sp = shadow_entry_ptr;
     struct domain *d = v->domain;
+    gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
     u32 pass_thru_flags;
     u32 gflags, sflags;
 
@@ -660,15 +668,7 @@ _sh_propagate(struct vcpu *v,
         goto done;
     }
 
-    if ( mfn_valid(guest_table_mfn) )
-        /* Handle A and D bit propagation into the guest */
-        gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft);
-    else 
-    {
-        /* Must be an fl1e or a prefetch */
-        ASSERT(level==1 || !(ft & FETCH_TYPE_DEMAND));
-        gflags = guest_l1e_get_flags(*gp);
-    }
+    gflags = guest_l1e_get_flags(guest_entry);
 
     if ( unlikely(!(gflags & _PAGE_PRESENT)) )
     {
@@ -684,7 +684,7 @@ _sh_propagate(struct vcpu *v,
     if ( level == 1 && p2mt == p2m_mmio_dm )
     {
         /* Guest l1e maps emulated MMIO space */
-        *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
+        *sp = sh_l1e_mmio(target_gfn, gflags);
         if ( !d->arch.paging.shadow.has_fast_mmio_entries )
             d->arch.paging.shadow.has_fast_mmio_entries = 1;
         goto done;
@@ -694,9 +694,6 @@ _sh_propagate(struct vcpu *v,
     // case of a prefetch, an invalid mfn means that we can not usefully
     // shadow anything, and so we return early.
     //
-    /* N.B. For pass-through MMIO, either this test needs to be relaxed,
-     * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
-     * MMIO areas need to be added to the frame-table to make them "valid". */
     if ( shadow_mode_refcounts(d) && 
          !mfn_valid(target_mfn) && (p2mt != p2m_mmio_direct) )
     {
@@ -718,20 +715,22 @@ _sh_propagate(struct vcpu *v,
         pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT;
     sflags = gflags & pass_thru_flags;
 
-    /* Only change memory caching type for pass-through domain */
+    /*
+     * For HVM domains with direct access to MMIO areas, set the correct
+     * caching attributes in the shadows to match what was asked for
+     */
     if ( (level == 1) && is_hvm_domain(d) &&
          !list_empty(&(domain_hvm_iommu(d)->pdev_list)) )
     {
         unsigned int type;
-        if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(guest_l1e_get_gfn(*gp)),
-                                          &type) )
+        if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), &type) )
             sflags |= pat_type_2_pte_flags(type);
-        else if ( v->domain->arch.hvm_domain.is_in_uc_mode )
+        else if ( d->arch.hvm_domain.is_in_uc_mode )
             sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
         else
             sflags |= get_pat_flags(v,
                                     gflags,
-                                    guest_l1e_get_paddr(*gp),
+                                    gfn_to_paddr(target_gfn),
                                     mfn_x(target_mfn) << PAGE_SHIFT);
     }
 
@@ -813,59 +812,55 @@ _sh_propagate(struct vcpu *v,
  done:
     SHADOW_DEBUG(PROPAGATE,
                  "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
-                 fetch_type_names[ft], level, gp->l1, sp->l1);
-}
-
-
-/* These four wrappers give us a little bit of type-safety back around the 
- * use of void-* pointers in _sh_propagate(), and allow the compiler to 
- * optimize out some level checks. */
+                 fetch_type_names[ft], level, guest_entry.l1, sp->l1);
+}
+
+
+/* These four wrappers give us a little bit of type-safety back around
+ * the use of void-* pointers and intpte types in _sh_propagate(), and
+ * allow the compiler to optimize out some level checks. */
 
 #if GUEST_PAGING_LEVELS >= 4
 static void
 l4e_propagate_from_guest(struct vcpu *v, 
-                         guest_l4e_t *gl4e,
-                         mfn_t gl4mfn,
+                         guest_l4e_t gl4e,
                          mfn_t sl3mfn,
                          shadow_l4e_t *sl4e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
+    _sh_propagate(v, gl4e.l4, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
 }
 
 static void
 l3e_propagate_from_guest(struct vcpu *v,
-                         guest_l3e_t *gl3e,
-                         mfn_t gl3mfn, 
+                         guest_l3e_t gl3e,
                          mfn_t sl2mfn, 
                          shadow_l3e_t *sl3e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
+    _sh_propagate(v, gl3e.l3, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
 }
 #endif // GUEST_PAGING_LEVELS >= 4
 
 static void
 l2e_propagate_from_guest(struct vcpu *v, 
-                         guest_l2e_t *gl2e,
-                         mfn_t gl2mfn,
+                         guest_l2e_t gl2e,
                          mfn_t sl1mfn,
                          shadow_l2e_t *sl2e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
+    _sh_propagate(v, gl2e.l2, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
 }
 
 static void
 l1e_propagate_from_guest(struct vcpu *v, 
-                         guest_l1e_t *gl1e,
-                         mfn_t gl1mfn,
+                         guest_l1e_t gl1e,
                          mfn_t gmfn, 
                          shadow_l1e_t *sl1e,
                          fetch_type_t ft, 
                          p2m_type_t p2mt)
 {
-    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
+    _sh_propagate(v, gl1e.l1, gmfn, sl1e, 1, ft, p2mt);
 }
 
 
@@ -1859,8 +1854,7 @@ static shadow_l3e_t * shadow_get_and_cre
             *sl3mfn = sh_make_shadow(v, gw->l3mfn, SH_type_l3_shadow);
         }
         /* Install the new sl3 table in the sl4e */
-        l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn, 
-                                 *sl3mfn, &new_sl4e, ft);
+        l4e_propagate_from_guest(v, gw->l4e, *sl3mfn, &new_sl4e, ft);
         r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
         ASSERT((r & SHADOW_SET_FLUSH) == 0);
         if ( r & SHADOW_SET_ERROR )
@@ -1909,8 +1903,7 @@ static shadow_l2e_t * shadow_get_and_cre
             *sl2mfn = sh_make_shadow(v, gw->l2mfn, t);
         }
         /* Install the new sl2 table in the sl3e */
-        l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn, 
-                                 *sl2mfn, &new_sl3e, ft);
+        l3e_propagate_from_guest(v, gw->l3e, *sl2mfn, &new_sl3e, ft);
         r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
         ASSERT((r & SHADOW_SET_FLUSH) == 0);
         if ( r & SHADOW_SET_ERROR )
@@ -1934,7 +1927,7 @@ static shadow_l2e_t * shadow_get_and_cre
     /* This next line is important: the guest l2 has a 16k
      * shadow, we need to return the right mfn of the four. This
      * call will set it for us as a side-effect. */
-    (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
+    (void) shadow_l2_index(sl2mfn, guest_l2_table_offset(gw->va));
     /* Reading the top level table is always valid. */
     return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
 #endif 
@@ -1956,8 +1949,8 @@ static shadow_l1e_t * shadow_get_and_cre
      * re-do it to fix a PSE dirty bit. */
     if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT 
          && likely(ft != ft_demand_write
-                   || (guest_l2e_get_flags(*gw->l2e) & _PAGE_DIRTY) 
-                   || !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)) )
+                   || (shadow_l2e_get_flags(*sl2e) & _PAGE_RW) 
+                   || !(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
     {
         *sl1mfn = shadow_l2e_get_mfn(*sl2e);
         ASSERT(mfn_valid(*sl1mfn));
@@ -1965,14 +1958,14 @@ static shadow_l1e_t * shadow_get_and_cre
     else 
     {
         shadow_l2e_t new_sl2e;
-        int r, flags = guest_l2e_get_flags(*gw->l2e);
+        int r, flags = guest_l2e_get_flags(gw->l2e);
         /* No l1 shadow installed: find and install it. */
         if ( !(flags & _PAGE_PRESENT) )
             return NULL; /* No guest page. */
         if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) ) 
         {
             /* Splintering a superpage */
-            gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
+            gfn_t l2gfn = guest_l2e_get_gfn(gw->l2e);
             *sl1mfn = get_fl1_shadow_status(v, l2gfn);
             if ( !mfn_valid(*sl1mfn) ) 
             {
@@ -1992,8 +1985,7 @@ static shadow_l1e_t * shadow_get_and_cre
             }
         }
         /* Install the new sl1 table in the sl2e */
-        l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn, 
-                                 *sl1mfn, &new_sl2e, ft);
+        l2e_propagate_from_guest(v, gw->l2e, *sl1mfn, &new_sl2e, ft);
         r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
         ASSERT((r & SHADOW_SET_FLUSH) == 0);        
         if ( r & SHADOW_SET_ERROR )
@@ -2247,7 +2239,7 @@ static int validate_gl4e(struct vcpu *v,
 static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
 {
     shadow_l4e_t new_sl4e;
-    guest_l4e_t *new_gl4e = new_ge;
+    guest_l4e_t new_gl4e = *(guest_l4e_t *)new_ge;
     shadow_l4e_t *sl4p = se;
     mfn_t sl3mfn = _mfn(INVALID_MFN);
     struct domain *d = v->domain;
@@ -2256,17 +2248,16 @@ static int validate_gl4e(struct vcpu *v,
 
     perfc_incr(shadow_validate_gl4e_calls);
 
-    if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
-    {
-        gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
+    if ( guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT )
+    {
+        gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e);
         mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
         if ( p2m_is_ram(p2mt) )
             sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
         else
             result |= SHADOW_SET_ERROR;
     }
-    l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
-                             sl3mfn, &new_sl4e, ft_prefetch);
+    l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch);
 
     // check for updates to xen reserved slots
     if ( !shadow_mode_external(d) )
@@ -2301,7 +2292,7 @@ static int validate_gl3e(struct vcpu *v,
 static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
 {
     shadow_l3e_t new_sl3e;
-    guest_l3e_t *new_gl3e = new_ge;
+    guest_l3e_t new_gl3e = *(guest_l3e_t *)new_ge;
     shadow_l3e_t *sl3p = se;
     mfn_t sl2mfn = _mfn(INVALID_MFN);
     p2m_type_t p2mt;
@@ -2309,17 +2300,16 @@ static int validate_gl3e(struct vcpu *v,
 
     perfc_incr(shadow_validate_gl3e_calls);
 
-    if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
-    {
-        gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
+    if ( guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT )
+    {
+        gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e);
         mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
         if ( p2m_is_ram(p2mt) )
             sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
         else
             result |= SHADOW_SET_ERROR;
     }
-    l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN), 
-                             sl2mfn, &new_sl3e, ft_prefetch);
+    l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch);
     result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
 
     return result;
@@ -2329,7 +2319,7 @@ static int validate_gl2e(struct vcpu *v,
 static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
 {
     shadow_l2e_t new_sl2e;
-    guest_l2e_t *new_gl2e = new_ge;
+    guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge;
     shadow_l2e_t *sl2p = se;
     mfn_t sl1mfn = _mfn(INVALID_MFN);
     p2m_type_t p2mt;
@@ -2337,11 +2327,11 @@ static int validate_gl2e(struct vcpu *v,
 
     perfc_incr(shadow_validate_gl2e_calls);
 
-    if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
-    {
-        gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
+    if ( guest_l2e_get_flags(new_gl2e) & _PAGE_PRESENT )
+    {
+        gfn_t gl1gfn = guest_l2e_get_gfn(new_gl2e);
         if ( guest_supports_superpages(v) &&
-             (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
+             (guest_l2e_get_flags(new_gl2e) & _PAGE_PSE) )
         {
             // superpage -- need to look up the shadow L1 which holds the
             // splitters...
@@ -2364,8 +2354,7 @@ static int validate_gl2e(struct vcpu *v,
                 result |= SHADOW_SET_ERROR;
         }
     }
-    l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
-                             sl1mfn, &new_sl2e, ft_prefetch);
+    l2e_propagate_from_guest(v, new_gl2e, sl1mfn, &new_sl2e, ft_prefetch);
 
     // check for updates to xen reserved slots in PV guests...
     // XXX -- need to revisit this for PV 3-on-4 guests.
@@ -2415,7 +2404,7 @@ static int validate_gl1e(struct vcpu *v,
 static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
 {
     shadow_l1e_t new_sl1e;
-    guest_l1e_t *new_gl1e = new_ge;
+    guest_l1e_t new_gl1e = *(guest_l1e_t *)new_ge;
     shadow_l1e_t *sl1p = se;
     gfn_t gfn;
     mfn_t gmfn;
@@ -2424,11 +2413,10 @@ static int validate_gl1e(struct vcpu *v,
 
     perfc_incr(shadow_validate_gl1e_calls);
 
-    gfn = guest_l1e_get_gfn(*new_gl1e);
+    gfn = guest_l1e_get_gfn(new_gl1e);
     gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
 
-    l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 
-                             ft_prefetch, p2mt);
+    l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt);
     
     result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
     return result;
@@ -2615,7 +2603,7 @@ static void sh_prefetch(struct vcpu *v, 
     int i, dist;
     gfn_t gfn;
     mfn_t gmfn;
-    guest_l1e_t gl1e;
+    guest_l1e_t *gl1p = NULL, gl1e;
     shadow_l1e_t sl1e;
     u32 gflags;
     p2m_type_t p2mt;
@@ -2626,16 +2614,23 @@ static void sh_prefetch(struct vcpu *v, 
     if ( dist > PREFETCH_DISTANCE )
         dist = PREFETCH_DISTANCE;
 
+    if ( mfn_valid(gw->l1mfn) )
+    {
+        /* Normal guest page; grab the next guest entry */
+        gl1p = sh_map_domain_page(gw->l1mfn);
+        gl1p += guest_l1_table_offset(gw->va);
+    }
+
     for ( i = 1; i < dist ; i++ ) 
     {
         /* No point in prefetching if there's already a shadow */
         if ( ptr_sl1e[i].l1 != 0 )
             break;
 
-        if ( gw->l1e )
+        if ( mfn_valid(gw->l1mfn) )
         {
             /* Normal guest page; grab the next guest entry */
-            gl1e = gw->l1e[i];
+            gl1e = gl1p[i];
             /* Not worth continuing if we hit an entry that will need another
              * fault for A/D-bit propagation anyway */
             gflags = guest_l1e_get_flags(gl1e);
@@ -2647,24 +2642,23 @@ static void sh_prefetch(struct vcpu *v, 
         else 
         {
             /* Fragmented superpage, unless we've been called wrongly */
-            ASSERT(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE);
+            ASSERT(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE);
             /* Increment the l1e's GFN by the right number of guest pages */
             gl1e = guest_l1e_from_gfn(
-                _gfn(gfn_x(guest_l1e_get_gfn(gw->eff_l1e)) + i), 
-                guest_l1e_get_flags(gw->eff_l1e));
+                _gfn(gfn_x(guest_l1e_get_gfn(gw->l1e)) + i), 
+                guest_l1e_get_flags(gw->l1e));
         }
 
         /* Look at the gfn that the l1e is pointing at */
         gfn = guest_l1e_get_gfn(gl1e);
         gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
 
-        /* Propagate the entry.  Safe to use a pointer to our local 
-         * gl1e, since this is not a demand-fetch so there will be no 
-         * write-back to the guest. */
-        l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
-                                 gmfn, &sl1e, ft_prefetch, p2mt);
+        /* Propagate the entry.  */
+        l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt);
         (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
     }
+    if ( gl1p != NULL )
+        sh_unmap_domain_page(gl1p);
 }
 
 #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
@@ -2684,7 +2678,6 @@ static int sh_page_fault(struct vcpu *v,
 {
     struct domain *d = v->domain;
     walk_t gw;
-    u32 accumulated_gflags;
     gfn_t gfn;
     mfn_t gmfn, sl1mfn=_mfn(0);
     shadow_l1e_t sl1e, *ptr_sl1e;
@@ -2769,10 +2762,10 @@ static int sh_page_fault(struct vcpu *v,
     
     shadow_audit_tables(v);
                    
-    if ( guest_walk_tables(v, va, &gw, 1) != 0 )
-    {
-        SHADOW_PRINTK("malformed guest pagetable\n");
-        print_gw(&gw);
+    if ( guest_walk_tables(v, va, &gw, regs->error_code, 1) != 0 )
+    {
+        perfc_incr(shadow_fault_bail_real_fault);
+        goto not_a_shadow_fault;
     }
 
     /* It's possible that the guest has put pagetables in memory that it has 
@@ -2788,64 +2781,12 @@ static int sh_page_fault(struct vcpu *v,
 
     sh_audit_gw(v, &gw);
 
-    // We do not look at the gw->l1e, as that will not exist for superpages.
-    // Instead, we use the gw->eff_l1e...
-    //
-    // We need not check all the levels of the guest page table entries for
-    // present vs not-present, as the eff_l1e will always be not present if
-    // one of the higher level entries is not present.
-    //
-    if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
-    {
-        perfc_incr(shadow_fault_bail_not_present);
-        goto not_a_shadow_fault;
-    }
-
-    // All levels of the guest page table are now known to be present.
-    accumulated_gflags = accumulate_guest_flags(v, &gw);
-
-    // Check for attempts to access supervisor-only pages from user mode,
-    // i.e. ring 3.  Such errors are not caused or dealt with by the shadow
-    // code.
-    //
-    if ( (regs->error_code & PFEC_user_mode) &&
-         !(accumulated_gflags & _PAGE_USER) )
-    {
-        /* illegal user-mode access to supervisor-only page */
-        perfc_incr(shadow_fault_bail_user_supervisor);
-        goto not_a_shadow_fault;
-    }
-
-    // Was it a write fault?
+    /* What kind of access are we dealing with? */
     ft = ((regs->error_code & PFEC_write_access)
           ? ft_demand_write : ft_demand_read);
-    if ( ft == ft_demand_write )
-    {
-        if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
-        {
-            perfc_incr(shadow_fault_bail_ro_mapping);
-            goto not_a_shadow_fault;
-        }
-    }
-    else // must have been either an insn fetch or read fault
-    {
-        // Check for NX bit violations: attempts to execute code that is
-        // marked "do not execute".  Such errors are not caused or dealt with
-        // by the shadow code.
-        //
-        if ( regs->error_code & PFEC_insn_fetch )
-        {
-            if ( accumulated_gflags & _PAGE_NX_BIT )
-            {
-                /* NX prevented this code fetch */
-                perfc_incr(shadow_fault_bail_nx);
-                goto not_a_shadow_fault;
-            }
-        }
-    }
 
     /* What mfn is the guest trying to access? */
-    gfn = guest_l1e_get_gfn(gw.eff_l1e);
+    gfn = guest_l1e_get_gfn(gw.l1e);
     gmfn = gfn_to_mfn(d, gfn, &p2mt);
 
     if ( shadow_mode_refcounts(d) && 
@@ -2876,14 +2817,12 @@ static int sh_page_fault(struct vcpu *v,
          * shadow_set_l*e(), which will have crashed the guest.
          * Get out of the fault handler immediately. */
         ASSERT(d->is_shutting_down);
-        unmap_walk(v, &gw);
         shadow_unlock(d);
         return 0;
     }
 
     /* Calculate the shadow entry and write it */
-    l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
-                             gmfn, &sl1e, ft, p2mt);
+    l1e_propagate_from_guest(v, gw.l1e, gmfn, &sl1e, ft, p2mt);
     r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
 
 #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
@@ -2921,7 +2860,6 @@ static int sh_page_fault(struct vcpu *v,
 
  done:
     sh_audit_gw(v, &gw);
-    unmap_walk(v, &gw);
     SHADOW_PRINTK("fixed\n");
     shadow_audit_tables(v);
     shadow_unlock(d);
@@ -2972,7 +2910,6 @@ static int sh_page_fault(struct vcpu *v,
      * take it again when we write to the pagetables.
      */
     sh_audit_gw(v, &gw);
-    unmap_walk(v, &gw);
     shadow_audit_tables(v);
     shadow_unlock(d);
 
@@ -3033,7 +2970,6 @@ static int sh_page_fault(struct vcpu *v,
         goto not_a_shadow_fault;
     perfc_incr(shadow_fault_mmio);
     sh_audit_gw(v, &gw);
-    unmap_walk(v, &gw);
     SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa);
     shadow_audit_tables(v);
     reset_early_unshadow(v);
@@ -3043,7 +2979,6 @@ static int sh_page_fault(struct vcpu *v,
 
  not_a_shadow_fault:
     sh_audit_gw(v, &gw);
-    unmap_walk(v, &gw);
     SHADOW_PRINTK("not a shadow fault\n");
     shadow_audit_tables(v);
     reset_early_unshadow(v);
@@ -3129,30 +3064,36 @@ sh_invlpg(struct vcpu *v, unsigned long 
 
 
 static unsigned long
-sh_gva_to_gfn(struct vcpu *v, unsigned long va)
+sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec)
 /* Called to translate a guest virtual address to what the *guest*
  * pagetables would map it to. */
 {
     walk_t gw;
     gfn_t gfn;
-    
+
 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
     struct shadow_vtlb t = {0};
-    if ( vtlb_lookup(v, va, &t) )
+    /* Check the vTLB cache first */
+    if ( vtlb_lookup(v, va, pfec[0], &t) ) 
         return t.frame_number;
 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
 
-    guest_walk_tables(v, va, &gw, 0);
+    if ( guest_walk_tables(v, va, &gw, pfec[0], 0) != 0 )
+    {
+        if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
+            pfec[0] &= ~PFEC_page_present;
+        return INVALID_GFN;
+    }
     gfn = guest_walk_to_gfn(&gw);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
     t.page_number = va >> PAGE_SHIFT;
     t.frame_number = gfn_x(gfn);
     t.flags = accumulate_guest_flags(v, &gw); 
+    t.pfec = pfec[0];
     vtlb_insert(v, t);
 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
 
-    unmap_walk(v, &gw);
     return gfn_x(gfn);
 }
 
@@ -4006,9 +3947,8 @@ static inline void * emulate_map_dest(st
                                       struct sh_emulate_ctxt *sh_ctxt,
                                       mfn_t *mfnp)
 {
-    walk_t gw;
-    u32 flags, errcode;
-    gfn_t gfn;
+    uint32_t pfec;
+    unsigned long gfn;
     mfn_t mfn;
     p2m_type_t p2mt;
 
@@ -4016,50 +3956,20 @@ static inline void * emulate_map_dest(st
     if ( ring_3(sh_ctxt->ctxt.regs) ) 
         return NULL;
 
-#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
-    /* Try the virtual TLB first */
-    {
-        struct shadow_vtlb t = {0};
-        if ( vtlb_lookup(v, vaddr, &t) 
-             && ((t.flags & (_PAGE_PRESENT|_PAGE_RW)) 
-                 == (_PAGE_PRESENT|_PAGE_RW)) )
-        {
-            flags = t.flags;
-            gfn = _gfn(t.frame_number);
-        }
+    /* Translate the VA, and exit with a page-fault if we fail */
+    pfec = PFEC_page_present | PFEC_write_access;
+    gfn = sh_gva_to_gfn(v, vaddr, &pfec);
+    if ( gfn == INVALID_GFN ) 
+    {
+        if ( is_hvm_vcpu(v) )
+            hvm_inject_exception(TRAP_page_fault, pfec, vaddr);
         else
-        {
-            /* Need to do the full lookup, just in case permissions
-             * have increased since we cached this entry */
-            
-#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
-
-            /* Walk the guest pagetables */
-            guest_walk_tables(v, vaddr, &gw, 1);
-            flags = accumulate_guest_flags(v, &gw);
-            gfn = guest_l1e_get_gfn(gw.eff_l1e);
-            sh_audit_gw(v, &gw);
-            unmap_walk(v, &gw);
-            
-#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
-            /* Remember this translation for next time */
-            t.page_number = vaddr >> PAGE_SHIFT;
-            t.frame_number = gfn_x(gfn);
-            t.flags = flags;
-            vtlb_insert(v, t);
-        }
-    }
-#endif
-
-    errcode = PFEC_write_access;
-    if ( !(flags & _PAGE_PRESENT) ) 
-        goto page_fault;
-
-    errcode |= PFEC_page_present;
-    if ( !(flags & _PAGE_RW) ) 
-        goto page_fault;
-
-    mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+            propagate_page_fault(vaddr, pfec);
+        return NULL;
+    }
+
+    /* Translate the GFN */
+    mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
     if ( p2m_is_ram(p2mt) )
     {
         ASSERT(mfn_valid(mfn));
@@ -4069,13 +3979,6 @@ static inline void * emulate_map_dest(st
     }
     else 
         return NULL;
-
- page_fault:
-    if ( is_hvm_vcpu(v) )
-        hvm_inject_exception(TRAP_page_fault, errcode, vaddr);
-    else
-        propagate_page_fault(vaddr, errcode);
-    return NULL;
 }
 
 static int safe_not_to_verify_write(mfn_t gmfn, void *dst, void *src, 
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/shadow/private.h  Fri Nov 02 16:38:11 2007 +0000
@@ -665,9 +665,10 @@ void shadow_continue_emulation(
 #define VTLB_ENTRIES 13
 
 struct shadow_vtlb {
-    unsigned long page_number;    /* Guest virtual address >> PAGE_SHIFT  */
-    unsigned long frame_number;   /* Guest physical address >> PAGE_SHIFT */
-    u32 flags;    /* Accumulated guest pte flags, or 0 for an empty slot. */
+    unsigned long page_number;      /* Guest virtual address >> PAGE_SHIFT  */
+    unsigned long frame_number;     /* Guest physical address >> PAGE_SHIFT */
+    uint32_t pfec;  /* Pagefault code for the lookup that filled this entry */
+    uint32_t flags; /* Accumulated guest pte flags, or 0 for an empty slot. */
 };
 
 /* Call whenever the guest flushes hit actual TLB */
@@ -692,7 +693,7 @@ static inline void vtlb_insert(struct vc
 }
 
 /* Look a translation up in the vTLB.  Returns 0 if not found. */
-static inline int vtlb_lookup(struct vcpu *v, unsigned long va,
+static inline int vtlb_lookup(struct vcpu *v, unsigned long va, uint32_t pfec,
                               struct shadow_vtlb *result) 
 {
     unsigned long page_number = va >> PAGE_SHIFT;
@@ -701,7 +702,9 @@ static inline int vtlb_lookup(struct vcp
 
     spin_lock(&v->arch.paging.vtlb_lock);
     if ( v->arch.paging.vtlb[i].flags != 0 
-         && v->arch.paging.vtlb[i].page_number == page_number )
+         && v->arch.paging.vtlb[i].page_number == page_number 
+         /* Any successful walk that had at least these pfec bits is OK */
+         && (v->arch.paging.vtlb[i].pfec & pfec) == pfec )
     {
         rv = 1; 
         result[0] = v->arch.paging.vtlb[i];
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h    Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/shadow/types.h    Fri Nov 02 16:38:11 2007 +0000
@@ -251,6 +251,7 @@ TYPE_SAFE(u32,gfn)
 /* Types of the guest's page tables */
 typedef l1_pgentry_32_t guest_l1e_t;
 typedef l2_pgentry_32_t guest_l2e_t;
+typedef intpte_32_t guest_intpte_t;
 
 /* Access functions for them */
 static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
@@ -319,6 +320,7 @@ typedef l3_pgentry_t guest_l3e_t;
 #if GUEST_PAGING_LEVELS >= 4
 typedef l4_pgentry_t guest_l4e_t;
 #endif
+typedef intpte_t guest_intpte_t;
 
 /* Access functions for them */
 static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
@@ -419,32 +421,27 @@ gfn_to_paddr(gfn_t gfn)
 
 /* Type used for recording a walk through guest pagetables.  It is
  * filled in by the pagetable walk function, and also used as a cache
- * for later walks.  
- * Any non-null pointer in this structure represents a mapping of guest
- * memory.  We must always call walk_init() before using a walk_t, and 
- * call walk_unmap() when we're done. 
- * The "Effective l1e" field is used when there isn't an l1e to point to, 
- * but we have fabricated an l1e for propagation to the shadow (e.g., 
- * for splintering guest superpages into many shadow l1 entries).  */
+ * for later walks.  When we encounter a suporpage l2e, we fabricate an
+ * l1e for propagation to the shadow (for splintering guest superpages
+ * into many shadow l1 entries).  */
 typedef struct shadow_walk_t walk_t;
 struct shadow_walk_t 
 {
     unsigned long va;           /* Address we were looking for */
 #if GUEST_PAGING_LEVELS >= 3
 #if GUEST_PAGING_LEVELS >= 4
-    guest_l4e_t *l4e;           /* Pointer to guest's level 4 entry */
-#endif
-    guest_l3e_t *l3e;           /* Pointer to guest's level 3 entry */
-#endif
-    guest_l2e_t *l2e;           /* Pointer to guest's level 2 entry */
-    guest_l1e_t *l1e;           /* Pointer to guest's level 1 entry */
-    guest_l1e_t eff_l1e;        /* Effective level 1 entry */
-#if GUEST_PAGING_LEVELS >= 4
-    mfn_t l4mfn;                /* MFN that the level 4 entry is in */
-    mfn_t l3mfn;                /* MFN that the level 3 entry is in */
-#endif
-    mfn_t l2mfn;                /* MFN that the level 2 entry is in */
-    mfn_t l1mfn;                /* MFN that the level 1 entry is in */
+    guest_l4e_t l4e;            /* Guest's level 4 entry */
+#endif
+    guest_l3e_t l3e;            /* Guest's level 3 entry */
+#endif
+    guest_l2e_t l2e;            /* Guest's level 2 entry */
+    guest_l1e_t l1e;            /* Guest's level 1 entry (or fabrication) */
+#if GUEST_PAGING_LEVELS >= 4
+    mfn_t l4mfn;                /* MFN that the level 4 entry was in */
+    mfn_t l3mfn;                /* MFN that the level 3 entry was in */
+#endif
+    mfn_t l2mfn;                /* MFN that the level 2 entry was in */
+    mfn_t l1mfn;                /* MFN that the level 1 entry was in */
 };
 
 /* macros for dealing with the naming of the internal function names of the
@@ -542,7 +539,7 @@ accumulate_guest_flags(struct vcpu *v, w
 {
     u32 accumulated_flags;
 
-    if ( unlikely(!(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT)) )
+    if ( unlikely(!(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT)) )
         return 0;
         
     // We accumulate the permission flags with bitwise ANDing.
@@ -550,17 +547,17 @@ accumulate_guest_flags(struct vcpu *v, w
     // For the NX bit, however, the polarity is wrong, so we accumulate the
     // inverse of the NX bit.
     //
-    accumulated_flags =  guest_l1e_get_flags(gw->eff_l1e) ^ _PAGE_NX_BIT;
-    accumulated_flags &= guest_l2e_get_flags(*gw->l2e) ^ _PAGE_NX_BIT;
+    accumulated_flags =  guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
+    accumulated_flags &= guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
 
     // Note that PAE guests do not have USER or RW or NX bits in their L3s.
     //
 #if GUEST_PAGING_LEVELS == 3
     accumulated_flags &=
-        ~_PAGE_PRESENT | (guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT);
+        ~_PAGE_PRESENT | (guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT);
 #elif GUEST_PAGING_LEVELS >= 4
-    accumulated_flags &= guest_l3e_get_flags(*gw->l3e) ^ _PAGE_NX_BIT;
-    accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT;
+    accumulated_flags &= guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
+    accumulated_flags &= guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
 #endif
 
     // Revert the NX bit back to its original polarity
diff -r 838e77a41a3c -r 650cadd1b283 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/include/asm-x86/hvm/support.h Fri Nov 02 16:38:11 2007 +0000
@@ -86,6 +86,7 @@ int hvm_copy_from_guest_phys(void *buf, 
 int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size);
 int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size);
 int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size);
+int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size);
 
 void hvm_print_line(struct vcpu *v, const char c);
 void hlt_timer_fn(void *data);
diff -r 838e77a41a3c -r 650cadd1b283 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h      Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/include/asm-x86/paging.h      Fri Nov 02 16:38:11 2007 +0000
@@ -105,7 +105,8 @@ struct paging_mode {
     int           (*page_fault            )(struct vcpu *v, unsigned long va,
                                             struct cpu_user_regs *regs);
     int           (*invlpg                )(struct vcpu *v, unsigned long va);
-    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va);
+    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va,
+                                            uint32_t *pfec);
     void          (*update_cr3            )(struct vcpu *v, int do_locking);
     void          (*update_paging_modes   )(struct vcpu *v);
     void          (*write_p2m_entry       )(struct vcpu *v, unsigned long gfn,
@@ -204,12 +205,17 @@ static inline int paging_invlpg(struct v
 }
 
 /* Translate a guest virtual address to the frame number that the
- * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest 
- * tables don't map this address. */
+ * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest
+ * tables don't map this address for this kind of access.
+ * pfec[0] is used to determine which kind of access this is when
+ * walking the tables.  The caller should set the PFEC_page_present bit
+ * in pfec[0]; in the failure case, that bit will be cleared if appropriate. */
 #define INVALID_GFN (-1UL)
-static inline unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va)
-{
-    return v->arch.paging.mode->gva_to_gfn(v, va);
+static inline unsigned long paging_gva_to_gfn(struct vcpu *v, 
+                                              unsigned long va,
+                                              uint32_t *pfec)
+{
+    return v->arch.paging.mode->gva_to_gfn(v, va, pfec);
 }
 
 /* Update all the things that are derived from the guest's CR3.
diff -r 838e77a41a3c -r 650cadd1b283 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h  Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/include/asm-x86/perfc_defn.h  Fri Nov 02 16:38:11 2007 +0000
@@ -50,12 +50,8 @@ PERFCOUNTER(shadow_fault_fast_mmio, "sha
 PERFCOUNTER(shadow_fault_fast_mmio, "shadow_fault fast path mmio")
 PERFCOUNTER(shadow_fault_fast_fail, "shadow_fault fast path error")
 PERFCOUNTER(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn")
-PERFCOUNTER(shadow_fault_bail_not_present, 
-                                        "shadow_fault guest not-present")
-PERFCOUNTER(shadow_fault_bail_nx,  "shadow_fault guest NX fault")
-PERFCOUNTER(shadow_fault_bail_ro_mapping, "shadow_fault guest R/W fault")
-PERFCOUNTER(shadow_fault_bail_user_supervisor, 
-                                        "shadow_fault guest U/S fault")
+PERFCOUNTER(shadow_fault_bail_real_fault, 
+                                        "shadow_fault really guest fault")
 PERFCOUNTER(shadow_fault_emulate_read, "shadow_fault emulates a read")
 PERFCOUNTER(shadow_fault_emulate_write, "shadow_fault emulates a write")
 PERFCOUNTER(shadow_fault_emulate_failed, "shadow_fault emulator fails")

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.