[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [XEN] Shadow: emulate a few extra instructions on PAE pagetable writes



# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Date 1180704731 -3600
# Node ID 13eca4bf2c69aff4c9b689d0dff45929e44e4edb
# Parent  b182bd560e47ba3497211603a840ffb3afa13dc8
[XEN] Shadow: emulate a few extra instructions on PAE pagetable writes
in the hope of catching the "other half" write without another enter/exit.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
 xen/arch/x86/mm/shadow/common.c  |   32 ++++++++++++++
 xen/arch/x86/mm/shadow/multi.c   |   86 ++++++++++++++++++++++++++++-----------
 xen/arch/x86/mm/shadow/private.h |    7 ++-
 xen/include/asm-x86/domain.h     |    2 
 xen/include/asm-x86/perfc_defn.h |    3 +
 5 files changed, 103 insertions(+), 27 deletions(-)

diff -r b182bd560e47 -r 13eca4bf2c69 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Thu May 31 16:09:11 2007 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Fri Jun 01 14:32:11 2007 +0100
@@ -248,7 +248,7 @@ hvm_emulate_insn_fetch(enum x86_segment 
 {
     struct sh_emulate_ctxt *sh_ctxt =
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
-    unsigned int insn_off = offset - ctxt->regs->eip;
+    unsigned int insn_off = offset - sh_ctxt->insn_buf_eip;
 
     /* Fall back if requested bytes are not in the prefetch cache. */
     if ( unlikely((insn_off + bytes) > sh_ctxt->insn_buf_bytes) )
@@ -450,6 +450,7 @@ struct x86_emulate_ops *shadow_init_emul
     }
 
     /* Attempt to prefetch whole instruction. */
+    sh_ctxt->insn_buf_eip = regs->eip;
     sh_ctxt->insn_buf_bytes =
         (!hvm_translate_linear_addr(
             x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
@@ -459,6 +460,35 @@ struct x86_emulate_ops *shadow_init_emul
         ? sizeof(sh_ctxt->insn_buf) : 0;
 
     return &hvm_shadow_emulator_ops;
+}
+
+/* Update an initialized emulation context to prepare for the next 
+ * instruction */
+void shadow_continue_emulation(struct sh_emulate_ctxt *sh_ctxt, 
+                               struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    unsigned long addr, diff;
+
+    /* We don't refetch the segment bases, because we don't emulate
+     * writes to segment registers */
+
+    if ( is_hvm_vcpu(v) )
+    {
+        diff = regs->eip - sh_ctxt->insn_buf_eip;
+        if ( diff > sh_ctxt->insn_buf_bytes )
+        {
+            /* Prefetch more bytes. */
+            sh_ctxt->insn_buf_bytes =
+                (!hvm_translate_linear_addr(
+                    x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
+                    hvm_access_insn_fetch, sh_ctxt, &addr) &&
+                 !hvm_copy_from_guest_virt(
+                     sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
+                ? sizeof(sh_ctxt->insn_buf) : 0;
+            sh_ctxt->insn_buf_eip = regs->eip;
+        }
+    }
 }
 
 /**************************************************************************/
diff -r b182bd560e47 -r 13eca4bf2c69 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu May 31 16:09:11 2007 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Fri Jun 01 14:32:11 2007 +0100
@@ -2871,6 +2871,20 @@ static int sh_page_fault(struct vcpu *v,
     if ( !shadow_mode_refcounts(d) || !guest_mode(regs) )
         goto not_a_shadow_fault;
 
+    /*
+     * We do not emulate user writes. Instead we use them as a hint that the
+     * page is no longer a page table. This behaviour differs from native, but
+     * it seems very unlikely that any OS grants user access to page tables.
+     */
+    if ( (regs->error_code & PFEC_user_mode) )
+    {
+        SHADOW_PRINTK("user-mode fault to PT, unshadowing mfn %#lx\n", 
+                      mfn_x(gmfn));
+        perfc_incr(shadow_fault_emulate_failed);
+        sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
+        goto done;
+    }
+
     if ( is_hvm_domain(d) )
     {
         /*
@@ -2897,14 +2911,7 @@ static int sh_page_fault(struct vcpu *v,
 
     emul_ops = shadow_init_emulation(&emul_ctxt, regs);
 
-    /*
-     * We do not emulate user writes. Instead we use them as a hint that the
-     * page is no longer a page table. This behaviour differs from native, but
-     * it seems very unlikely that any OS grants user access to page tables.
-     */
-    r = X86EMUL_UNHANDLEABLE;
-    if ( !(regs->error_code & PFEC_user_mode) )
-        r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
+    r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
 
     /*
      * NB. We do not unshadow on X86EMUL_EXCEPTION. It's not clear that it
@@ -2921,6 +2928,35 @@ static int sh_page_fault(struct vcpu *v,
          * though, this is a hint that this page should not be shadowed. */
         sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
     }
+
+#if GUEST_PAGING_LEVELS == 3 /* PAE guest */
+    if ( r == X86EMUL_OKAY ) {
+        int i;
+        /* Emulate up to four extra instructions in the hope of catching 
+         * the "second half" of a 64-bit pagetable write. */
+        for ( i = 0 ; i < 4 ; i++ )
+        {
+            shadow_continue_emulation(&emul_ctxt, regs);
+            v->arch.paging.last_write_was_pt = 0;
+            r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
+            if ( r == X86EMUL_OKAY )
+            {
+                if ( v->arch.paging.last_write_was_pt )
+                {
+                    perfc_incr(shadow_em_ex_pt);
+                    break; /* Don't emulate past the other half of the write */
+                }
+                else 
+                    perfc_incr(shadow_em_ex_non_pt);
+            }
+            else
+            {
+                perfc_incr(shadow_em_ex_fail);
+                break; /* Don't emulate again if we failed! */
+            }
+        }
+    }
+#endif /* PAE guest */
 
     /* Emulator has changed the user registers: write back */
     if ( is_hvm_domain(d) )
@@ -3878,6 +3914,11 @@ static inline void * emulate_map_dest(st
     gfn_t gfn;
     mfn_t mfn;
 
+    /* We don't emulate user-mode writes to page tables */
+    if ( ring_3(sh_ctxt->ctxt.regs) ) 
+        return NULL;
+
+    /* Walk the guest pagetables */
     guest_walk_tables(v, vaddr, &gw, 1);
     flags = accumulate_guest_flags(v, &gw);
     gfn = guest_l1e_get_gfn(gw.eff_l1e);
@@ -3885,27 +3926,24 @@ static inline void * emulate_map_dest(st
     sh_audit_gw(v, &gw);
     unmap_walk(v, &gw);
 
-    if ( !(flags & _PAGE_PRESENT) )
-    {
-        errcode = 0;
+    errcode = PFEC_write_access;
+    if ( !(flags & _PAGE_PRESENT) ) 
         goto page_fault;
-    }
-
-    if ( !(flags & _PAGE_RW) ||
-         (!(flags & _PAGE_USER) && ring_3(sh_ctxt->ctxt.regs)) )
-    {
-        errcode = PFEC_page_present;
+
+    errcode |= PFEC_page_present;
+    if ( !(flags & _PAGE_RW) ) 
         goto page_fault;
-    }
-
-    if ( !mfn_valid(mfn) )
+
+    if ( mfn_valid(mfn) )
+    {
+        *mfnp = mfn;
+        v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
+        return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
+    }
+    else 
         return NULL;
 
-    *mfnp = mfn;
-    return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
-
  page_fault:
-    errcode |= PFEC_write_access;
     if ( is_hvm_vcpu(v) )
         hvm_inject_exception(TRAP_page_fault, errcode, vaddr);
     else
diff -r b182bd560e47 -r 13eca4bf2c69 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Thu May 31 16:09:11 2007 +0100
+++ b/xen/arch/x86/mm/shadow/private.h  Fri Jun 01 14:32:11 2007 +0100
@@ -634,9 +634,10 @@ struct sh_emulate_ctxt {
 struct sh_emulate_ctxt {
     struct x86_emulate_ctxt ctxt;
 
-    /* [HVM] Cache of up to 15 bytes of instruction. */
-    uint8_t insn_buf[15];
+    /* [HVM] Cache of up to 31 bytes of instruction. */
+    uint8_t insn_buf[31];
     uint8_t insn_buf_bytes;
+    unsigned long insn_buf_eip;
 
     /* [HVM] Cache of segment registers already gathered for this emulation. */
     unsigned int valid_seg_regs;
@@ -644,6 +645,8 @@ struct sh_emulate_ctxt {
 };
 
 struct x86_emulate_ops *shadow_init_emulation(
+    struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
+void shadow_continue_emulation(
     struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
 
 #endif /* _XEN_SHADOW_PRIVATE_H */
diff -r b182bd560e47 -r 13eca4bf2c69 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu May 31 16:09:11 2007 +0100
+++ b/xen/include/asm-x86/domain.h      Fri Jun 01 14:32:11 2007 +0100
@@ -171,6 +171,8 @@ struct paging_vcpu {
     struct paging_mode *mode;
     /* HVM guest: paging enabled (CR0.PG)?  */
     unsigned int translate_enabled:1;
+    /* HVM guest: last emulate was to a pagetable */
+    unsigned int last_write_was_pt:1;
 
     /* paging support extension */
     struct shadow_vcpu shadow;
diff -r b182bd560e47 -r 13eca4bf2c69 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h  Thu May 31 16:09:11 2007 +0100
+++ b/xen/include/asm-x86/perfc_defn.h  Fri Jun 01 14:32:11 2007 +0100
@@ -90,5 +90,8 @@ PERFCOUNTER(shadow_invlpg,         "shad
 PERFCOUNTER(shadow_invlpg,         "shadow emulates invlpg")
 PERFCOUNTER(shadow_invlpg_fault,   "shadow invlpg faults")
 
+PERFCOUNTER(shadow_em_ex_pt,       "shadow extra pt write")
+PERFCOUNTER(shadow_em_ex_non_pt,   "shadow extra non-pt-write op")
+PERFCOUNTER(shadow_em_ex_fail,     "shadow extra emulation failed")
 
 /*#endif*/ /* __XEN_PERFC_DEFN_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.