[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Provide fast write emulation path to release shadow lock.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1203078791 0
# Node ID 03d13b696027a99fc7fcfe50515a7d4c4618825a
# Parent  38b532eea3bfc46dcc21c0b1bc91e533f9d1e462
Provide fast write emulation path to release shadow lock.

Basically we can consider shadow fault logic into two parts,
with 1st part to cover logistic work like validating guest
page table or fix shadow table, and the 2nd part for write
emulation.

However there's one scenario we can optimize to skip the
1st part. For previous successfully emulated virtual frame,
it's very likely approaching at write emulation logic again
if next adjacent shadow fault is hitting same virtual frame.
It's wasteful to re-walk 1st part which is already covered
by last shadow fault. In this case, actually we can jump to
emulation code early, without any lock acquisition until
final shadow validation for write emulation. By perfc counts
on 64bit SMP HVM guest, 89% of total shadow write emulation
are observed falling into this fast path when doing kernel
build in guest.

Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx>
---
 xen/arch/x86/mm/shadow/common.c  |    7 ++
 xen/arch/x86/mm/shadow/multi.c   |  100 +++++++++++++++++++++++++++++++++------
 xen/arch/x86/mm/shadow/private.h |    5 +
 xen/include/asm-x86/domain.h     |   10 +++
 xen/include/asm-x86/perfc_defn.h |    3 +
 5 files changed, 107 insertions(+), 18 deletions(-)

diff -r 38b532eea3bf -r 03d13b696027 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Fri Feb 15 09:54:28 2008 +0000
+++ b/xen/arch/x86/mm/shadow/common.c   Fri Feb 15 12:33:11 2008 +0000
@@ -1039,13 +1039,18 @@ void shadow_free(struct domain *d, mfn_t
 
     for ( i = 0; i < 1<<order; i++ ) 
     {
-#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+#if SHADOW_OPTIMIZATIONS & (SHOPT_WRITABLE_HEURISTIC | SHOPT_FAST_EMULATION)
         struct vcpu *v;
         for_each_vcpu(d, v) 
         {
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
             /* No longer safe to look for a writeable mapping in this shadow */
             if ( v->arch.paging.shadow.last_writeable_pte_smfn == mfn_x(smfn) 
+ i ) 
                 v->arch.paging.shadow.last_writeable_pte_smfn = 0;
+#endif
+#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
+            v->arch.paging.last_write_emul_ok = 0;
+#endif
         }
 #endif
         /* Strip out the type: this is now a free shadow page */
diff -r 38b532eea3bf -r 03d13b696027 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Fri Feb 15 09:54:28 2008 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c    Fri Feb 15 12:33:11 2008 +0000
@@ -2623,13 +2623,13 @@ static inline void check_for_early_unsha
 static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
 {
 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
-    if ( v->arch.paging.shadow.last_emulated_mfn == mfn_x(gmfn) &&
-         sh_mfn_is_a_page_table(gmfn) )
+    if ( v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn)
+         && sh_mfn_is_a_page_table(gmfn) )
     {
         perfc_incr(shadow_early_unshadow);
         sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
     }
-    v->arch.paging.shadow.last_emulated_mfn = mfn_x(gmfn);
+    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
 #endif
 }
 
@@ -2637,7 +2637,7 @@ static inline void reset_early_unshadow(
 static inline void reset_early_unshadow(struct vcpu *v)
 {
 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
-    v->arch.paging.shadow.last_emulated_mfn = INVALID_MFN;
+    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = INVALID_MFN;
 #endif
 }
 
@@ -2744,12 +2744,39 @@ static int sh_page_fault(struct vcpu *v,
     int r;
     fetch_type_t ft = 0;
     p2m_type_t p2mt;
+#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
+    int fast_emul = 0;
+#endif
 
     SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u, rip=%lx\n",
                   v->domain->domain_id, v->vcpu_id, va, regs->error_code,
                   regs->rip);
 
     perfc_incr(shadow_fault);
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
+    /* If faulting frame is successfully emulated in last shadow fault
+     * it's highly likely to reach same emulation action for this frame.
+     * Then try to emulate early to avoid lock aquisition.
+     */
+    if ( v->arch.paging.last_write_emul_ok
+         && v->arch.paging.shadow.last_emulated_frame == (va >> PAGE_SHIFT) ) 
+    {
+        /* check whether error code is 3, or else fall back to normal path
+         * in case of some validation is required
+         */
+        if ( regs->error_code == (PFEC_write_access | PFEC_page_present) )
+        {
+            fast_emul = 1;
+            gmfn = v->arch.paging.shadow.last_emulated_mfn;
+            perfc_incr(shadow_fault_fast_emulate);
+            goto early_emulation;
+        }
+        else
+            v->arch.paging.last_write_emul_ok = 0;
+    }
+#endif
+
     //
     // XXX: Need to think about eventually mapping superpages directly in the
     //      shadow (when possible), as opposed to splintering them into a
@@ -2960,6 +2987,17 @@ static int sh_page_fault(struct vcpu *v,
         goto done;
     }
 
+    /*
+     * We don't need to hold the lock for the whole emulation; we will
+     * take it again when we write to the pagetables.
+     */
+    sh_audit_gw(v, &gw);
+    shadow_audit_tables(v);
+    shadow_unlock(d);
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
+ early_emulation:
+#endif
     if ( is_hvm_domain(d) )
     {
         /*
@@ -2971,6 +3009,13 @@ static int sh_page_fault(struct vcpu *v,
          */
         if ( unlikely(hvm_event_pending(v)) )
         {
+#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
+            if ( fast_emul )
+            {
+                perfc_incr(shadow_fault_fast_emulate_fail);
+                v->arch.paging.last_write_emul_ok = 0;
+            }
+#endif
             gdprintk(XENLOG_DEBUG, "write to pagetable during event "
                      "injection: cr2=%#lx, mfn=%#lx\n", 
                      va, mfn_x(gmfn));
@@ -2981,14 +3026,6 @@ static int sh_page_fault(struct vcpu *v,
 
     SHADOW_PRINTK("emulate: eip=%#lx esp=%#lx\n", 
                   (unsigned long)regs->eip, (unsigned long)regs->esp);
-
-    /*
-     * We don't need to hold the lock for the whole emulation; we will
-     * take it again when we write to the pagetables.
-     */
-    sh_audit_gw(v, &gw);
-    shadow_audit_tables(v);
-    shadow_unlock(d);
 
     emul_ops = shadow_init_emulation(&emul_ctxt, regs);
 
@@ -3001,14 +3038,43 @@ static int sh_page_fault(struct vcpu *v,
      */
     if ( r == X86EMUL_UNHANDLEABLE )
     {
+        perfc_incr(shadow_fault_emulate_failed);
+#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
+        if ( fast_emul )
+        {
+            perfc_incr(shadow_fault_fast_emulate_fail);
+            v->arch.paging.last_write_emul_ok = 0;
+        }
+#endif
         SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n", 
                        mfn_x(gmfn));
-        perfc_incr(shadow_fault_emulate_failed);
         /* If this is actually a page table, then we have a bug, and need 
          * to support more operations in the emulator.  More likely, 
          * though, this is a hint that this page should not be shadowed. */
         shadow_remove_all_shadows(v, gmfn);
     }
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
+    /* Record successfully emulated information as heuristics to next
+     * fault on same frame for acceleration. But be careful to verify
+     * its attribute still as page table, or else unshadow triggered
+     * in write emulation normally requires a re-sync with guest page
+     * table to recover r/w permission. Incorrect record for such case
+     * will cause unexpected more shadow faults due to propagation is
+     * skipped.
+     */
+    if ( (r == X86EMUL_OKAY) && sh_mfn_is_a_page_table(gmfn) )
+    {
+        if ( !fast_emul )
+        {
+            v->arch.paging.shadow.last_emulated_frame = va >> PAGE_SHIFT;
+            v->arch.paging.shadow.last_emulated_mfn = gmfn;
+            v->arch.paging.last_write_emul_ok = 1;
+        }
+    }
+    else if ( fast_emul )
+        v->arch.paging.last_write_emul_ok = 0;
+#endif
 
 #if GUEST_PAGING_LEVELS == 3 /* PAE guest */
     if ( r == X86EMUL_OKAY ) {
@@ -3077,6 +3143,10 @@ sh_invlpg(struct vcpu *v, unsigned long 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
     /* No longer safe to use cached gva->gfn translations */
     vtlb_flush(v);
+#endif
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
+    v->arch.paging.last_write_emul_ok = 0;
 #endif
 
     /* First check that we can safely read the shadow l2e.  SMP/PAE linux can
@@ -3815,6 +3885,10 @@ sh_update_cr3(struct vcpu *v, int do_loc
     vtlb_flush(v);
 #endif
 
+#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
+    v->arch.paging.last_write_emul_ok = 0;
+#endif
+
     /* Release the lock, if we took it (otherwise it's the caller's problem) */
     if ( do_locking ) shadow_unlock(v->domain);
 }
diff -r 38b532eea3bf -r 03d13b696027 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Fri Feb 15 09:54:28 2008 +0000
+++ b/xen/arch/x86/mm/shadow/private.h  Fri Feb 15 12:33:11 2008 +0000
@@ -62,8 +62,9 @@ extern int shadow_audit_enable;
 #define SHOPT_LINUX_L3_TOPLEVEL   0x10  /* Pin l3es on early 64bit linux */
 #define SHOPT_SKIP_VERIFY         0x20  /* Skip PTE v'fy when safe to do so */
 #define SHOPT_VIRTUAL_TLB         0x40  /* Cache guest v->p translations */
-
-#define SHADOW_OPTIMIZATIONS      0x7f
+#define SHOPT_FAST_EMULATION      0x80  /* Fast write emulation */
+
+#define SHADOW_OPTIMIZATIONS      0xff
 
 
 /******************************************************************************
diff -r 38b532eea3bf -r 03d13b696027 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Feb 15 09:54:28 2008 +0000
+++ b/xen/include/asm-x86/domain.h      Fri Feb 15 12:33:11 2008 +0000
@@ -108,10 +108,14 @@ struct shadow_vcpu {
 #endif
     /* Non-PAE guests: pointer to guest top-level pagetable */
     void *guest_vtable;
-    /* Last MFN that we emulated a write to. */
-    unsigned long last_emulated_mfn;
+    /* Last MFN that we emulated a write to as unshadow heuristics. */
+    unsigned long last_emulated_mfn_for_unshadow;
     /* MFN of the last shadow that we shot a writeable mapping in */
     unsigned long last_writeable_pte_smfn;
+    /* Last frame number that we emulated a write to. */
+    unsigned long last_emulated_frame;
+    /* Last MFN that we emulated a write successfully */
+    unsigned long last_emulated_mfn;
 };
 
 /************************************************/
@@ -189,6 +193,8 @@ struct paging_vcpu {
     struct paging_mode *mode;
     /* HVM guest: last emulate was to a pagetable */
     unsigned int last_write_was_pt:1;
+    /* HVM guest: last write emulation succeeds */
+    unsigned int last_write_emul_ok:1;
     /* Translated guest: virtual TLB */
     struct shadow_vtlb *vtlb;
     spinlock_t          vtlb_lock;
diff -r 38b532eea3bf -r 03d13b696027 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h  Fri Feb 15 09:54:28 2008 +0000
+++ b/xen/include/asm-x86/perfc_defn.h  Fri Feb 15 12:33:11 2008 +0000
@@ -57,6 +57,9 @@ PERFCOUNTER(shadow_fault_emulate_failed,
 PERFCOUNTER(shadow_fault_emulate_failed, "shadow_fault emulator fails")
 PERFCOUNTER(shadow_fault_emulate_stack, "shadow_fault emulate stack write")
 PERFCOUNTER(shadow_fault_emulate_wp, "shadow_fault emulate for CR0.WP=0")
+PERFCOUNTER(shadow_fault_fast_emulate, "shadow_fault fast emulate")
+PERFCOUNTER(shadow_fault_fast_emulate_fail,
+                                   "shadow_fault fast emulate failed")
 PERFCOUNTER(shadow_fault_mmio,     "shadow_fault handled as mmio")
 PERFCOUNTER(shadow_fault_fixed,    "shadow_fault fixed fault")
 PERFCOUNTER(shadow_ptwr_emulate,   "shadow causes ptwr to emulate")

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.